This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Revert "Update docs to concur with $`,$&,$' changes"
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
LW
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
78d46eaa
NC
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
78d46eaa
NC
400#if 'J'-'I' == 1
401/* ASCII */
f43a9a31 402STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
1651fc44 409 /* C */ sizeof(unsigned char),
80a13697 410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 411 /* D */ LONG_DOUBLESIZE,
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* Q */ sizeof(Uquad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 450#if defined(HAS_QUAD)
a7a3cfaa 451 /* q */ sizeof(Quad_t),
80a13697 452#else
a7a3cfaa 453 0,
80a13697 454#endif
a7a3cfaa
TH
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
7212898e 479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 480 /* N */ SIZE32,
7212898e 481#else
a7a3cfaa 482 0,
7212898e 483#endif
a7a3cfaa
TH
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
7212898e 487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 488 /* V */ SIZE32,
7212898e 489#else
a7a3cfaa 490 0,
7212898e 491#endif
a7a3cfaa
TH
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
7212898e 498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 499 /* n */ SIZE16,
7212898e 500#else
a7a3cfaa 501 0,
7212898e 502#endif
a7a3cfaa
TH
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
7212898e 506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 507 /* v */ SIZE16,
7212898e 508#else
a7a3cfaa 509 0,
7212898e 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
520};
521#else
522/* EBCDIC (or bust) */
f43a9a31 523STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 548#if defined(HAS_QUAD)
a7a3cfaa 549 /* q */ sizeof(Quad_t),
80a13697 550#else
a7a3cfaa 551 0,
80a13697 552#endif
a7a3cfaa
TH
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 560 /* C */ sizeof(unsigned char),
80a13697 561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 562 /* D */ LONG_DOUBLESIZE,
80a13697 563#else
a7a3cfaa 564 0,
80a13697 565#endif
a7a3cfaa
TH
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
80a13697 577#if defined(HAS_QUAD)
a7a3cfaa 578 /* Q */ sizeof(Uquad_t),
80a13697 579#else
a7a3cfaa 580 0,
80a13697 581#endif
a7a3cfaa
TH
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
7212898e 604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 605 /* n */ SIZE16,
7212898e 606#else
a7a3cfaa 607 0,
7212898e 608#endif
a7a3cfaa
TH
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
7212898e 612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 613 /* v */ SIZE16,
7212898e 614#else
a7a3cfaa 615 0,
7212898e 616#endif
a7a3cfaa
TH
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
7212898e 624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 625 /* N */ SIZE32,
7212898e 626#else
a7a3cfaa 627 0,
7212898e 628#endif
a7a3cfaa
TH
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
7212898e 632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 633 /* V */ SIZE32,
7212898e 634#else
a7a3cfaa 635 0,
7212898e 636#endif
a7a3cfaa
TH
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
639};
640#endif
641
08ca2aa3 642STATIC U8
f7fe979e 643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 644{
08ca2aa3 645 STRLEN retlen;
0bcc34c2 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 648 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 654 if (val >= 0x100) {
a2a5de95
NC
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
658 val &= 0xff;
659 }
660 *s += retlen;
fe2774ed 661 return (U8)val;
08ca2aa3
TH
662}
663
f337b084
TH
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
666 *(U8 *)(s)++)
667
668STATIC bool
f7fe979e 669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
670{
671 UV val;
672 STRLEN retlen;
f7fe979e 673 const char *from = *s;
08ca2aa3 674 int bad = 0;
f7fe979e 675 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
f337b084 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
fe2774ed 688 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
f7fe979e 694 const char *ptr;
9e27e96a 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
f337b084 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
699 }
700 if (from > end) from = end;
701 }
a2a5de95
NC
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 704 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
f7fe979e 714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 715{
97aff369 716 dVAR;
08ca2aa3 717 STRLEN retlen;
0bcc34c2 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
f337b084 725 *s += retlen;
08ca2aa3
TH
726 return TRUE;
727}
78d46eaa 728
64844641 729STATIC char *
14333449 730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 731 const U8 * const end = start + len;
64844641 732
7918f24d
NC
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
f337b084 735 while (start < end) {
48fa4626
ST
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
742 }
743 start++;
744 }
64844641 745 return dest;
f337b084
TH
746}
747
230e1fce
NC
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
64844641
AL
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
f337b084
TH
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
3473cf63 761 if (utf8) glen *= UTF8_EXPAND; \
f337b084 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 763 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
f7fe979e 770 const STRLEN glen = (in_len); \
f337b084 771 STRLEN gl = glen; \
3473cf63 772 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
b162af07 775 SvCUR_set((cat), (cur) - (start)); \
0bd48802 776 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
f7fe979e 785 const U8 au8 = (byte); \
64844641 786 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
f7fe979e
AL
803static const char *_action( const tempsym_t* symptr )
804{
10edeb5d 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
806}
807
206947d2 808/* Returns the sizeof() struct described by pat */
028d1f6d 809STATIC I32
f337b084 810S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 811{
f337b084 812 I32 total = 0;
206947d2 813
7918f24d
NC
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
49704364 816 while (next_symbol(symptr)) {
f337b084 817 I32 len;
f7fe979e 818 int size;
f337b084
TH
819
820 switch (symptr->howlen) {
fc241834 821 case e_star:
49704364 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 823 _action( symptr ) );
49704364 824 break;
f337b084
TH
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
49704364
LW
829 }
830
a7a3cfaa 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 832 if (!size) {
f7fe979e 833 int star;
80a13697
NC
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 839 _action( symptr ) );
28be1210
TH
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
80a13697 844 case '@':
28be1210 845 case '.':
80a13697
NC
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 851 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 852 _action( symptr ) );
80a13697
NC
853 case '%':
854 size = 0;
855 break;
856 case '(':
fc241834
RGS
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
80a13697
NC
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
f7fe979e 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
80a13697
NC
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
78d46eaa 906
80a13697
NC
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
78d46eaa
NC
910 break;
911 }
206947d2
IZ
912 }
913 total += len * size;
914 }
915 return total;
916}
917
49704364
LW
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
f7fe979e 922STATIC const char *
5aaab254 923S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 924{
7918f24d
NC
925 PERL_ARGS_ASSERT_GROUP_END;
926
49704364 927 while (patptr < patend) {
f7fe979e 928 const char c = *patptr++;
49704364
LW
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
18529408 942 }
49704364
LW
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
18529408
IZ
946}
947
49704364
LW
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
fc241834 952 */
f7fe979e 953STATIC const char *
5aaab254 954S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
955{
956 I32 len = *patptr++ - '0';
7918f24d
NC
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
49704364
LW
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
f337b084 973S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 974{
f7fe979e 975 const char* patptr = symptr->patptr;
0bcc34c2 976 const char* const patend = symptr->patend;
49704364 977
7918f24d
NC
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
49704364
LW
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
fc241834 992 /* We should have found a template code */
49704364 993 I32 code = *patptr++ & 0xFF;
66c611c5 994 U32 inherited_modifiers = 0;
49704364
LW
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 1000 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
1001 }
1002 continue;
1003 }
fc241834 1004
49704364 1005 /* for '(', skip to ')' */
fc241834 1006 if (code == '(') {
49704364
LW
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 1009 _action( symptr ) );
49704364
LW
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 1014 _action( symptr ) );
49704364
LW
1015 }
1016
66c611c5
MHM
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1109a392
MHM
1023 /* look for modifiers */
1024 while (patptr < patend) {
b7787f18
AL
1025 const char *allowed;
1026 I32 modifier;
1109a392
MHM
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
7212898e 1030 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1031 break;
7212898e 1032#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1040 break;
f337b084 1041#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1042 default:
b7787f18
AL
1043 allowed = "";
1044 modifier = 0;
1109a392
MHM
1045 break;
1046 }
66c611c5 1047
1109a392
MHM
1048 if (modifier == 0)
1049 break;
66c611c5 1050
1109a392
MHM
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1053 allowed, _action( symptr ) );
66c611c5
MHM
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1061 *patptr, _action( symptr ) );
66c611c5 1062
a2a5de95
NC
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1109a392 1068 }
66c611c5 1069
1109a392
MHM
1070 code |= modifier;
1071 patptr++;
49704364
LW
1072 }
1073
66c611c5
MHM
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
fc241834 1077 /* look for count and/or / */
49704364
LW
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
f7fe979e 1088 const char* lenptr = ++patptr;
49704364
LW
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1096 _action( symptr ) );
49704364
LW
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
66c611c5 1120 if (*patptr == '/') {
49704364
LW
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
66c611c5
MHM
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1126 _action( symptr ) );
49704364
LW
1127 }
1128 break;
1129 }
18529408 1130 }
49704364
LW
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
fc241834 1138 symptr->patptr = patptr;
49704364 1139 return TRUE;
18529408 1140 }
49704364 1141 }
fc241834 1142 symptr->patptr = patptr;
49704364 1143 return FALSE;
18529408
IZ
1144}
1145
18529408 1146/*
fc241834 1147 There is no way to cleanly handle the case where we should process the
08ca2aa3 1148 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
fc241834 1155STATIC bool
08ca2aa3
TH
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
7918f24d
NC
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
08ca2aa3
TH
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
f7fe979e 1165 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
08ca2aa3
TH
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
f7fe979e 1182 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
7accc089
JH
1190=for apidoc unpackstring
1191
608d3aed 1192The engine implementing unpack() Perl function. C<unpackstring> puts the
349c8cee 1193extracted list items on the @_ stack and returns the number of elements.
608d3aed 1194Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
349c8cee
DD
1195Unlike L</call_pv>, do not do a C<PUSHMARK>. This function takes no parameters
1196on the @_ stack.
7accc089
JH
1197
1198=cut */
1199
1200I32
f7fe979e 1201Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1202{
f7fe979e 1203 tempsym_t sym;
08ca2aa3 1204
7918f24d
NC
1205 PERL_ARGS_ASSERT_UNPACKSTRING;
1206
f337b084 1207 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1208 else if (need_utf8(pat, patend)) {
1209 /* We probably should try to avoid this in case a scalar context call
1210 wouldn't get to the "U0" */
1211 STRLEN len = strend - s;
230e1fce 1212 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1213 SAVEFREEPV(s);
1214 strend = s + len;
f337b084 1215 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1216 }
1217
f337b084
TH
1218 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1219 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1220
f7fe979e 1221 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1222
1223 return unpack_rec(&sym, s, s, strend, NULL );
1224}
1225
4136a0f7 1226STATIC I32
f7fe979e 1227S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1228{
27da23d5 1229 dVAR; dSP;
3297d27d 1230 SV *sv = NULL;
f7fe979e 1231 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1232 howlen_t howlen;
a6ec74c1 1233 I32 checksum = 0;
92d41999 1234 UV cuv = 0;
a6ec74c1 1235 NV cdouble = 0.0;
f337b084 1236 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1237 bool beyond = FALSE;
21c16052 1238 bool explicit_length;
9e27e96a 1239 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1240 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1241
1242 PERL_ARGS_ASSERT_UNPACK_REC;
1243
28be1210 1244 symptr->strbeg = s - strbeg;
49704364 1245
49704364 1246 while (next_symbol(symptr)) {
a7a3cfaa 1247 packprops_t props;
9e27e96a 1248 I32 len;
f337b084 1249 I32 datumtype = symptr->code;
206947d2 1250 /* do first one only unless in list context
08ca2aa3 1251 / is implemented by unpacking the count, then popping it from the
206947d2 1252 stack, so must check that we're not in the middle of a / */
49704364 1253 if ( unpack_only_one
206947d2 1254 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1255 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1256 break;
49704364 1257
f337b084 1258 switch (howlen = symptr->howlen) {
fc241834
RGS
1259 case e_star:
1260 len = strend - strbeg; /* long enough */
49704364 1261 break;
f337b084
TH
1262 default:
1263 /* e_no_len and e_number */
1264 len = symptr->length;
1265 break;
49704364 1266 }
18529408 1267
21c16052 1268 explicit_length = TRUE;
a6ec74c1 1269 redo_switch:
49704364 1270 beyond = s >= strend;
a7a3cfaa
TH
1271
1272 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1273 if (props) {
1274 /* props nonzero means we can process this letter. */
9e27e96a
AL
1275 const long size = props & PACK_SIZE_MASK;
1276 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1277 if (len > howmany)
1278 len = howmany;
1279
1280 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1281 if (len && unpack_only_one) len = 1;
1282 EXTEND(SP, len);
1283 EXTEND_MORTAL(len);
78d46eaa
NC
1284 }
1285 }
a7a3cfaa 1286
1109a392 1287 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1288 default:
1109a392 1289 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1290
a6ec74c1 1291 case '%':
49704364 1292 if (howlen == e_no_len)
18529408 1293 len = 16; /* len is not specified */
a6ec74c1 1294 checksum = len;
92d41999 1295 cuv = 0;
a6ec74c1 1296 cdouble = 0;
18529408 1297 continue;
a6ec74c1 1298 break;
18529408
IZ
1299 case '(':
1300 {
49704364 1301 tempsym_t savsym = *symptr;
9e27e96a 1302 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1303 symptr->flags |= group_modifiers;
49704364 1304 symptr->patend = savsym.grpend;
28be1210 1305 symptr->previous = &savsym;
49704364 1306 symptr->level++;
18529408 1307 PUTBACK;
c6f750d1 1308 if (len && unpack_only_one) len = 1;
18529408 1309 while (len--) {
49704364 1310 symptr->patptr = savsym.grpbeg;
f337b084
TH
1311 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1312 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1313 unpack_rec(symptr, s, strbeg, strend, &s);
1314 if (s == strend && savsym.howlen == e_star)
49704364 1315 break; /* No way to continue */
18529408
IZ
1316 }
1317 SPAGAIN;
28be1210 1318 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1319 *symptr = savsym;
18529408
IZ
1320 break;
1321 }
28be1210
TH
1322#ifdef PERL_PACK_CAN_SHRIEKSIGN
1323 case '.' | TYPE_IS_SHRIEKING:
1324#endif
1325 case '.': {
9e27e96a 1326 const char *from;
28be1210
TH
1327 SV *sv;
1328#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1329 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1330#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1331 const bool u8 = utf8;
28be1210
TH
1332#endif
1333 if (howlen == e_star) from = strbeg;
1334 else if (len <= 0) from = s;
1335 else {
1336 tempsym_t *group = symptr;
1337
1338 while (--len && group) group = group->previous;
1339 from = group ? strbeg + group->strbeg : strbeg;
1340 }
1341 sv = from <= s ?
00646304
CB
1342 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1343 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1344 mXPUSHs(sv);
28be1210
TH
1345 break;
1346 }
1347#ifdef PERL_PACK_CAN_SHRIEKSIGN
1348 case '@' | TYPE_IS_SHRIEKING:
1349#endif
a6ec74c1 1350 case '@':
28be1210
TH
1351 s = strbeg + symptr->strbeg;
1352#ifdef PERL_PACK_CAN_SHRIEKSIGN
1353 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1354#else /* PERL_PACK_CAN_SHRIEKSIGN */
1355 if (utf8)
1356#endif
1357 {
08ca2aa3
TH
1358 while (len > 0) {
1359 if (s >= strend)
1360 Perl_croak(aTHX_ "'@' outside of string in unpack");
1361 s += UTF8SKIP(s);
1362 len--;
1363 }
1364 if (s > strend)
1365 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1366 } else {
28be1210 1367 if (strend-s < len)
fc241834 1368 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1369 s += len;
08ca2aa3 1370 }
a6ec74c1 1371 break;
62f95557
IZ
1372 case 'X' | TYPE_IS_SHRIEKING:
1373 if (!len) /* Avoid division by 0 */
1374 len = 1;
08ca2aa3 1375 if (utf8) {
f7fe979e 1376 const char *hop, *last;
f337b084
TH
1377 I32 l = len;
1378 hop = last = strbeg;
1379 while (hop < s) {
1380 hop += UTF8SKIP(hop);
1381 if (--l == 0) {
08ca2aa3 1382 last = hop;
f337b084
TH
1383 l = len;
1384 }
fc241834 1385 }
f337b084
TH
1386 if (last > s)
1387 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1388 s = last;
1389 break;
f337b084
TH
1390 }
1391 len = (s - strbeg) % len;
62f95557 1392 /* FALL THROUGH */
a6ec74c1 1393 case 'X':
08ca2aa3
TH
1394 if (utf8) {
1395 while (len > 0) {
1396 if (s <= strbeg)
1397 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1398 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1399 if (s <= strbeg)
1400 Perl_croak(aTHX_ "'X' outside of string in unpack");
1401 }
1402 len--;
1403 }
1404 } else {
fc241834
RGS
1405 if (len > s - strbeg)
1406 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1407 s -= len;
08ca2aa3 1408 }
a6ec74c1 1409 break;
9e27e96a
AL
1410 case 'x' | TYPE_IS_SHRIEKING: {
1411 I32 ai32;
62f95557
IZ
1412 if (!len) /* Avoid division by 0 */
1413 len = 1;
230e1fce
NC
1414 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1415 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1416 if (ai32 == 0) break;
1417 len -= ai32;
9e27e96a 1418 }
62f95557 1419 /* FALL THROUGH */
a6ec74c1 1420 case 'x':
08ca2aa3
TH
1421 if (utf8) {
1422 while (len>0) {
1423 if (s >= strend)
1424 Perl_croak(aTHX_ "'x' outside of string in unpack");
1425 s += UTF8SKIP(s);
1426 len--;
1427 }
1428 } else {
fc241834
RGS
1429 if (len > strend - s)
1430 Perl_croak(aTHX_ "'x' outside of string in unpack");
1431 s += len;
f337b084 1432 }
a6ec74c1
JH
1433 break;
1434 case '/':
49704364
LW
1435 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1436 break;
a6ec74c1
JH
1437 case 'A':
1438 case 'Z':
1439 case 'a':
08ca2aa3
TH
1440 if (checksum) {
1441 /* Preliminary length estimate is assumed done in 'W' */
1442 if (len > strend - s) len = strend - s;
1443 goto W_checksum;
1444 }
1445 if (utf8) {
1446 I32 l;
f7fe979e 1447 const char *hop;
08ca2aa3
TH
1448 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1449 if (hop >= strend) {
1450 if (hop > strend)
1451 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1452 break;
fc241834 1453 }
a6ec74c1 1454 }
08ca2aa3
TH
1455 if (hop > strend)
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 len = hop - s;
1458 } else if (len > strend - s)
1459 len = strend - s;
1460
1461 if (datumtype == 'Z') {
1462 /* 'Z' strips stuff after first null */
f7fe979e 1463 const char *ptr, *end;
f337b084
TH
1464 end = s + len;
1465 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1466 sv = newSVpvn(s, ptr-s);
1467 if (howlen == e_star) /* exact for 'Z*' */
1468 len = ptr-s + (ptr != strend ? 1 : 0);
1469 } else if (datumtype == 'A') {
1470 /* 'A' strips both nulls and spaces */
f7fe979e 1471 const char *ptr;
18bdf90a
TH
1472 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1473 for (ptr = s+len-1; ptr >= s; ptr--)
1474 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1475 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1476 if (ptr >= s) ptr += UTF8SKIP(ptr);
1477 else ptr++;
28be1210 1478 if (ptr > s+len)
18bdf90a
TH
1479 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1480 } else {
1481 for (ptr = s+len-1; ptr >= s; ptr--)
1482 if (*ptr != 0 && !isSPACE(*ptr)) break;
1483 ptr++;
1484 }
08ca2aa3
TH
1485 sv = newSVpvn(s, ptr-s);
1486 } else sv = newSVpvn(s, len);
1487
1488 if (utf8) {
1489 SvUTF8_on(sv);
1490 /* Undo any upgrade done due to need_utf8() */
f337b084 1491 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1492 sv_utf8_downgrade(sv, 0);
a6ec74c1 1493 }
6e449a3a 1494 mXPUSHs(sv);
08ca2aa3 1495 s += len;
a6ec74c1
JH
1496 break;
1497 case 'B':
08ca2aa3
TH
1498 case 'b': {
1499 char *str;
49704364 1500 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1501 len = (strend - s) * 8;
1502 if (checksum) {
f337b084 1503 if (utf8)
08ca2aa3 1504 while (len >= 8 && s < strend) {
f337b084 1505 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1506 len -= 8;
1507 }
f337b084 1508 else
fc241834 1509 while (len >= 8) {
08ca2aa3 1510 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1511 len -= 8;
1512 }
08ca2aa3
TH
1513 if (len && s < strend) {
1514 U8 bits;
f337b084
TH
1515 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1516 if (datumtype == 'b')
a6ec74c1 1517 while (len-- > 0) {
92d41999 1518 if (bits & 1) cuv++;
a6ec74c1
JH
1519 bits >>= 1;
1520 }
f337b084 1521 else
a6ec74c1 1522 while (len-- > 0) {
08ca2aa3 1523 if (bits & 0x80) cuv++;
a6ec74c1
JH
1524 bits <<= 1;
1525 }
fc241834 1526 }
a6ec74c1
JH
1527 break;
1528 }
08ca2aa3 1529
561b68a9 1530 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1531 SvPOK_on(sv);
1532 str = SvPVX(sv);
1533 if (datumtype == 'b') {
f337b084 1534 U8 bits = 0;
f7fe979e 1535 const I32 ai32 = len;
08ca2aa3
TH
1536 for (len = 0; len < ai32; len++) {
1537 if (len & 7) bits >>= 1;
1538 else if (utf8) {
1539 if (s >= strend) break;
f337b084 1540 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1541 } else bits = *(U8 *) s++;
1542 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1543 }
08ca2aa3 1544 } else {
f337b084 1545 U8 bits = 0;
f7fe979e 1546 const I32 ai32 = len;
08ca2aa3
TH
1547 for (len = 0; len < ai32; len++) {
1548 if (len & 7) bits <<= 1;
1549 else if (utf8) {
1550 if (s >= strend) break;
f337b084 1551 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1552 } else bits = *(U8 *) s++;
1553 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1554 }
1555 }
1556 *str = '\0';
aa07b2f6 1557 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1558 XPUSHs(sv);
a6ec74c1 1559 break;
08ca2aa3 1560 }
a6ec74c1 1561 case 'H':
08ca2aa3 1562 case 'h': {
3297d27d 1563 char *str = NULL;
fc241834 1564 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1565 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1566 len = (strend - s) * 2;
858fe5e1
TC
1567 if (!checksum) {
1568 sv = sv_2mortal(newSV(len ? len : 1));
1569 SvPOK_on(sv);
1570 str = SvPVX(sv);
1571 }
a6ec74c1 1572 if (datumtype == 'h') {
f337b084 1573 U8 bits = 0;
9e27e96a 1574 I32 ai32 = len;
fc241834
RGS
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits >>= 4;
1577 else if (utf8) {
1578 if (s >= strend) break;
f337b084 1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1580 } else bits = * (U8 *) s++;
858fe5e1
TC
1581 if (!checksum)
1582 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1583 }
08ca2aa3 1584 } else {
f337b084 1585 U8 bits = 0;
f7fe979e 1586 const I32 ai32 = len;
08ca2aa3
TH
1587 for (len = 0; len < ai32; len++) {
1588 if (len & 1) bits <<= 4;
1589 else if (utf8) {
1590 if (s >= strend) break;
f337b084 1591 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1592 } else bits = *(U8 *) s++;
858fe5e1
TC
1593 if (!checksum)
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1595 }
1596 }
858fe5e1
TC
1597 if (!checksum) {
1598 *str = '\0';
1599 SvCUR_set(sv, str - SvPVX_const(sv));
1600 XPUSHs(sv);
1601 }
a6ec74c1 1602 break;
08ca2aa3 1603 }
1651fc44
ML
1604 case 'C':
1605 if (len == 0) {
1606 if (explicit_length)
1607 /* Switch to "character" mode */
1608 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1609 break;
1610 }
1611 /* FALL THROUGH */
a6ec74c1 1612 case 'c':
1651fc44
ML
1613 while (len-- > 0 && s < strend) {
1614 int aint;
1615 if (utf8)
1616 {
1617 STRLEN retlen;
1618 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1619 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1620 if (retlen == (STRLEN) -1 || retlen == 0)
1621 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1622 s += retlen;
1623 }
1624 else
1625 aint = *(U8 *)(s)++;
1626 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1627 aint -= 256;
08ca2aa3 1628 if (!checksum)
6e449a3a 1629 mPUSHi(aint);
73cb7263
NC
1630 else if (checksum > bits_in_uv)
1631 cdouble += (NV)aint;
1632 else
1633 cuv += aint;
a6ec74c1
JH
1634 }
1635 break;
08ca2aa3
TH
1636 case 'W':
1637 W_checksum:
1651fc44 1638 if (utf8) {
08ca2aa3 1639 while (len-- > 0 && s < strend) {
08ca2aa3 1640 STRLEN retlen;
f7fe979e 1641 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1642 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1643 if (retlen == (STRLEN) -1 || retlen == 0)
1644 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1645 s += retlen;
1646 if (!checksum)
6e449a3a 1647 mPUSHu(val);
08ca2aa3
TH
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV) val;
d6d3e8bd 1650 else
08ca2aa3 1651 cuv += val;
fc241834 1652 }
08ca2aa3 1653 } else if (!checksum)
a6ec74c1 1654 while (len-- > 0) {
f7fe979e 1655 const U8 ch = *(U8 *) s++;
6e449a3a 1656 mPUSHu(ch);
a6ec74c1 1657 }
08ca2aa3
TH
1658 else if (checksum > bits_in_uv)
1659 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1660 else
1661 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1662 break;
1663 case 'U':
35bcd338 1664 if (len == 0) {
c5333953 1665 if (explicit_length && howlen != e_star) {
08ca2aa3 1666 /* Switch to "bytes in UTF-8" mode */
f337b084 1667 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1668 else
1669 /* Should be impossible due to the need_utf8() test */
1670 Perl_croak(aTHX_ "U0 mode on a byte string");
1671 }
35bcd338
JH
1672 break;
1673 }
08ca2aa3 1674 if (len > strend - s) len = strend - s;
fc241834 1675 if (!checksum) {
08ca2aa3
TH
1676 if (len && unpack_only_one) len = 1;
1677 EXTEND(SP, len);
1678 EXTEND_MORTAL(len);
fc241834 1679 }
08ca2aa3
TH
1680 while (len-- > 0 && s < strend) {
1681 STRLEN retlen;
1682 UV auv;
1683 if (utf8) {
1684 U8 result[UTF8_MAXLEN];
f7fe979e 1685 const char *ptr = s;
08ca2aa3 1686 STRLEN len;
08ca2aa3
TH
1687 /* Bug: warns about bad utf8 even if we are short on bytes
1688 and will break out of the loop */
230e1fce
NC
1689 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1690 'U'))
08ca2aa3
TH
1691 break;
1692 len = UTF8SKIP(result);
fc241834 1693 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1694 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1695 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1696 s = ptr;
1697 } else {
cde84f2a 1698 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1699 if (retlen == (STRLEN) -1 || retlen == 0)
1700 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1701 s += retlen;
1702 }
1703 if (!checksum)
6e449a3a 1704 mPUSHu(auv);
73cb7263 1705 else if (checksum > bits_in_uv)
08ca2aa3 1706 cdouble += (NV) auv;
73cb7263 1707 else
08ca2aa3 1708 cuv += auv;
a6ec74c1
JH
1709 }
1710 break;
49704364
LW
1711 case 's' | TYPE_IS_SHRIEKING:
1712#if SHORTSIZE != SIZE16
73cb7263 1713 while (len-- > 0) {
08ca2aa3 1714 short ashort;
f337b084
TH
1715 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1716 DO_BO_UNPACK(ashort, s);
08ca2aa3 1717 if (!checksum)
6e449a3a 1718 mPUSHi(ashort);
73cb7263
NC
1719 else if (checksum > bits_in_uv)
1720 cdouble += (NV)ashort;
1721 else
1722 cuv += ashort;
49704364
LW
1723 }
1724 break;
1725#else
1726 /* Fallthrough! */
a6ec74c1 1727#endif
49704364 1728 case 's':
73cb7263 1729 while (len-- > 0) {
08ca2aa3
TH
1730 I16 ai16;
1731
1732#if U16SIZE > SIZE16
1733 ai16 = 0;
1734#endif
f337b084 1735 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1736 DO_BO_UNPACK(ai16, 16);
1109a392 1737#if U16SIZE > SIZE16
73cb7263
NC
1738 if (ai16 > 32767)
1739 ai16 -= 65536;
a6ec74c1 1740#endif
08ca2aa3 1741 if (!checksum)
6e449a3a 1742 mPUSHi(ai16);
73cb7263
NC
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)ai16;
1745 else
1746 cuv += ai16;
a6ec74c1
JH
1747 }
1748 break;
49704364
LW
1749 case 'S' | TYPE_IS_SHRIEKING:
1750#if SHORTSIZE != SIZE16
73cb7263 1751 while (len-- > 0) {
08ca2aa3 1752 unsigned short aushort;
f337b084
TH
1753 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1754 DO_BO_UNPACK(aushort, s);
08ca2aa3 1755 if (!checksum)
6e449a3a 1756 mPUSHu(aushort);
73cb7263
NC
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV)aushort;
1759 else
1760 cuv += aushort;
49704364
LW
1761 }
1762 break;
1763#else
486ec47a 1764 /* Fallthrough! */
49704364 1765#endif
a6ec74c1
JH
1766 case 'v':
1767 case 'n':
1768 case 'S':
73cb7263 1769 while (len-- > 0) {
08ca2aa3
TH
1770 U16 au16;
1771#if U16SIZE > SIZE16
1772 au16 = 0;
1773#endif
f337b084 1774 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1775 DO_BO_UNPACK(au16, 16);
a6ec74c1 1776#ifdef HAS_NTOHS
73cb7263
NC
1777 if (datumtype == 'n')
1778 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1779#endif
1780#ifdef HAS_VTOHS
73cb7263
NC
1781 if (datumtype == 'v')
1782 au16 = vtohs(au16);
a6ec74c1 1783#endif
08ca2aa3 1784 if (!checksum)
6e449a3a 1785 mPUSHu(au16);
73cb7263 1786 else if (checksum > bits_in_uv)
f337b084 1787 cdouble += (NV) au16;
73cb7263
NC
1788 else
1789 cuv += au16;
a6ec74c1
JH
1790 }
1791 break;
7212898e 1792#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1793 case 'v' | TYPE_IS_SHRIEKING:
1794 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1795 while (len-- > 0) {
08ca2aa3
TH
1796 I16 ai16;
1797# if U16SIZE > SIZE16
1798 ai16 = 0;
1799# endif
f337b084 1800 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1801# ifdef HAS_NTOHS
73cb7263 1802 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1803 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1804# endif /* HAS_NTOHS */
1805# ifdef HAS_VTOHS
73cb7263 1806 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1807 ai16 = (I16) vtohs((U16) ai16);
1808# endif /* HAS_VTOHS */
1809 if (!checksum)
6e449a3a 1810 mPUSHi(ai16);
73cb7263 1811 else if (checksum > bits_in_uv)
08ca2aa3 1812 cdouble += (NV) ai16;
73cb7263
NC
1813 else
1814 cuv += ai16;
068bd2e7
MHM
1815 }
1816 break;
08ca2aa3 1817#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1818 case 'i':
49704364 1819 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1820 while (len-- > 0) {
08ca2aa3 1821 int aint;
f337b084
TH
1822 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1823 DO_BO_UNPACK(aint, i);
08ca2aa3 1824 if (!checksum)
6e449a3a 1825 mPUSHi(aint);
73cb7263
NC
1826 else if (checksum > bits_in_uv)
1827 cdouble += (NV)aint;
1828 else
1829 cuv += aint;
a6ec74c1
JH
1830 }
1831 break;
1832 case 'I':
49704364 1833 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1834 while (len-- > 0) {
08ca2aa3 1835 unsigned int auint;
f337b084
TH
1836 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1837 DO_BO_UNPACK(auint, i);
08ca2aa3 1838 if (!checksum)
6e449a3a 1839 mPUSHu(auint);
73cb7263
NC
1840 else if (checksum > bits_in_uv)
1841 cdouble += (NV)auint;
1842 else
1843 cuv += auint;
a6ec74c1
JH
1844 }
1845 break;
92d41999 1846 case 'j':
73cb7263 1847 while (len-- > 0) {
08ca2aa3 1848 IV aiv;
f337b084 1849 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1850#if IVSIZE == INTSIZE
f337b084 1851 DO_BO_UNPACK(aiv, i);
1109a392 1852#elif IVSIZE == LONGSIZE
f337b084 1853 DO_BO_UNPACK(aiv, l);
1109a392 1854#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1855 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1856#else
1857 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1858#endif
08ca2aa3 1859 if (!checksum)
6e449a3a 1860 mPUSHi(aiv);
73cb7263
NC
1861 else if (checksum > bits_in_uv)
1862 cdouble += (NV)aiv;
1863 else
1864 cuv += aiv;
92d41999
JH
1865 }
1866 break;
1867 case 'J':
73cb7263 1868 while (len-- > 0) {
08ca2aa3 1869 UV auv;
f337b084 1870 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1871#if IVSIZE == INTSIZE
f337b084 1872 DO_BO_UNPACK(auv, i);
08ca2aa3 1873#elif IVSIZE == LONGSIZE
f337b084 1874 DO_BO_UNPACK(auv, l);
08ca2aa3 1875#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1876 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1877#else
1878 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1879#endif
08ca2aa3 1880 if (!checksum)
6e449a3a 1881 mPUSHu(auv);
73cb7263
NC
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)auv;
1884 else
1885 cuv += auv;
92d41999
JH
1886 }
1887 break;
49704364
LW
1888 case 'l' | TYPE_IS_SHRIEKING:
1889#if LONGSIZE != SIZE32
73cb7263 1890 while (len-- > 0) {
08ca2aa3 1891 long along;
f337b084
TH
1892 SHIFT_VAR(utf8, s, strend, along, datumtype);
1893 DO_BO_UNPACK(along, l);
08ca2aa3 1894 if (!checksum)
6e449a3a 1895 mPUSHi(along);
73cb7263
NC
1896 else if (checksum > bits_in_uv)
1897 cdouble += (NV)along;
1898 else
1899 cuv += along;
49704364
LW
1900 }
1901 break;
1902#else
1903 /* Fallthrough! */
a6ec74c1 1904#endif
49704364 1905 case 'l':
73cb7263 1906 while (len-- > 0) {
08ca2aa3
TH
1907 I32 ai32;
1908#if U32SIZE > SIZE32
1909 ai32 = 0;
1910#endif
f337b084 1911 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1912 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1913#if U32SIZE > SIZE32
08ca2aa3 1914 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1915#endif
08ca2aa3 1916 if (!checksum)
6e449a3a 1917 mPUSHi(ai32);
73cb7263
NC
1918 else if (checksum > bits_in_uv)
1919 cdouble += (NV)ai32;
1920 else
1921 cuv += ai32;
a6ec74c1
JH
1922 }
1923 break;
49704364
LW
1924 case 'L' | TYPE_IS_SHRIEKING:
1925#if LONGSIZE != SIZE32
73cb7263 1926 while (len-- > 0) {
08ca2aa3 1927 unsigned long aulong;
f337b084
TH
1928 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1929 DO_BO_UNPACK(aulong, l);
08ca2aa3 1930 if (!checksum)
6e449a3a 1931 mPUSHu(aulong);
73cb7263
NC
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)aulong;
1934 else
1935 cuv += aulong;
49704364
LW
1936 }
1937 break;
1938#else
1939 /* Fall through! */
1940#endif
a6ec74c1
JH
1941 case 'V':
1942 case 'N':
1943 case 'L':
73cb7263 1944 while (len-- > 0) {
08ca2aa3
TH
1945 U32 au32;
1946#if U32SIZE > SIZE32
1947 au32 = 0;
1948#endif
f337b084 1949 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1950 DO_BO_UNPACK(au32, 32);
a6ec74c1 1951#ifdef HAS_NTOHL
73cb7263
NC
1952 if (datumtype == 'N')
1953 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1954#endif
1955#ifdef HAS_VTOHL
73cb7263
NC
1956 if (datumtype == 'V')
1957 au32 = vtohl(au32);
a6ec74c1 1958#endif
08ca2aa3 1959 if (!checksum)
6e449a3a 1960 mPUSHu(au32);
fc241834
RGS
1961 else if (checksum > bits_in_uv)
1962 cdouble += (NV)au32;
1963 else
1964 cuv += au32;
a6ec74c1
JH
1965 }
1966 break;
7212898e 1967#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1968 case 'V' | TYPE_IS_SHRIEKING:
1969 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1970 while (len-- > 0) {
08ca2aa3
TH
1971 I32 ai32;
1972# if U32SIZE > SIZE32
1973 ai32 = 0;
1974# endif
f337b084 1975 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1976# ifdef HAS_NTOHL
73cb7263
NC
1977 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1978 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1979# endif
1980# ifdef HAS_VTOHL
73cb7263
NC
1981 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1982 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1983# endif
1984 if (!checksum)
6e449a3a 1985 mPUSHi(ai32);
73cb7263
NC
1986 else if (checksum > bits_in_uv)
1987 cdouble += (NV)ai32;
1988 else
1989 cuv += ai32;
068bd2e7
MHM
1990 }
1991 break;
08ca2aa3 1992#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1993 case 'p':
a6ec74c1 1994 while (len-- > 0) {
f7fe979e 1995 const char *aptr;
f337b084 1996 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1997 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1998 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1999 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
2000 }
2001 break;
2002 case 'w':
a6ec74c1
JH
2003 {
2004 UV auv = 0;
2005 U32 bytes = 0;
fc241834 2006
08ca2aa3
TH
2007 while (len > 0 && s < strend) {
2008 U8 ch;
f337b084 2009 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2010 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2011 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2012 if (ch < 0x80) {
a6ec74c1 2013 bytes = 0;
6e449a3a 2014 mPUSHu(auv);
a6ec74c1
JH
2015 len--;
2016 auv = 0;
08ca2aa3 2017 continue;
a6ec74c1 2018 }
08ca2aa3 2019 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2020 const char *t;
a6ec74c1 2021
f5992bc4 2022 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2023 while (s < strend) {
f337b084 2024 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2025 sv = mul128(sv, (U8)(ch & 0x7f));
2026 if (!(ch & 0x80)) {
a6ec74c1
JH
2027 bytes = 0;
2028 break;
2029 }
2030 }
10516c54 2031 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2032 while (*t == '0')
2033 t++;
2034 sv_chop(sv, t);
6e449a3a 2035 mPUSHs(sv);
a6ec74c1
JH
2036 len--;
2037 auv = 0;
2038 }
2039 }
2040 if ((s >= strend) && bytes)
49704364 2041 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2042 }
2043 break;
2044 case 'P':
49704364
LW
2045 if (symptr->howlen == e_star)
2046 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2047 EXTEND(SP, 1);
2d3e0934 2048 if (s + sizeof(char*) <= strend) {
08ca2aa3 2049 char *aptr;
f337b084 2050 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2051 DO_BO_UNPACK_PC(aptr);
fc241834 2052 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2053 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2054 }
a6ec74c1
JH
2055 break;
2056#ifdef HAS_QUAD
2057 case 'q':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 Quad_t aquad;
f337b084
TH
2060 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2061 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2062 if (!checksum)
6e449a3a
MHM
2063 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2064 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2065 else if (checksum > bits_in_uv)
2066 cdouble += (NV)aquad;
2067 else
2068 cuv += aquad;
2069 }
a6ec74c1
JH
2070 break;
2071 case 'Q':
73cb7263 2072 while (len-- > 0) {
08ca2aa3 2073 Uquad_t auquad;
f337b084
TH
2074 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2075 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2076 if (!checksum)
6e449a3a
MHM
2077 mPUSHs(auquad <= UV_MAX ?
2078 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2079 else if (checksum > bits_in_uv)
2080 cdouble += (NV)auquad;
2081 else
2082 cuv += auquad;
a6ec74c1
JH
2083 }
2084 break;
08ca2aa3 2085#endif /* HAS_QUAD */
a6ec74c1
JH
2086 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2087 case 'f':
73cb7263 2088 while (len-- > 0) {
08ca2aa3 2089 float afloat;
f337b084 2090 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2091 DO_BO_UNPACK_N(afloat, float);
2092 if (!checksum)
6e449a3a 2093 mPUSHn(afloat);
08ca2aa3 2094 else
73cb7263 2095 cdouble += afloat;
fc241834 2096 }
a6ec74c1
JH
2097 break;
2098 case 'd':
73cb7263 2099 while (len-- > 0) {
08ca2aa3 2100 double adouble;
f337b084 2101 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2102 DO_BO_UNPACK_N(adouble, double);
2103 if (!checksum)
6e449a3a 2104 mPUSHn(adouble);
08ca2aa3 2105 else
73cb7263 2106 cdouble += adouble;
fc241834 2107 }
a6ec74c1 2108 break;
92d41999 2109 case 'F':
73cb7263 2110 while (len-- > 0) {
275663fa
TC
2111 NV_bytes anv;
2112 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2113 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 2114 if (!checksum)
275663fa 2115 mPUSHn(anv.nv);
08ca2aa3 2116 else
275663fa 2117 cdouble += anv.nv;
fc241834 2118 }
92d41999
JH
2119 break;
2120#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2121 case 'D':
73cb7263 2122 while (len-- > 0) {
275663fa
TC
2123 ld_bytes aldouble;
2124 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2125 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 2126 if (!checksum)
275663fa 2127 mPUSHn(aldouble.ld);
08ca2aa3 2128 else
275663fa 2129 cdouble += aldouble.ld;
92d41999
JH
2130 }
2131 break;
2132#endif
a6ec74c1 2133 case 'u':
858fe5e1 2134 if (!checksum) {
f7fe979e 2135 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2136 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2137 if (l) SvPOK_on(sv);
2138 }
2139 if (utf8) {
2140 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2141 I32 a, b, c, d;
db187877 2142 char hunk[3];
08ca2aa3 2143
08ca2aa3
TH
2144 while (len > 0) {
2145 next_uni_uu(aTHX_ &s, strend, &a);
2146 next_uni_uu(aTHX_ &s, strend, &b);
2147 next_uni_uu(aTHX_ &s, strend, &c);
2148 next_uni_uu(aTHX_ &s, strend, &d);
2149 hunk[0] = (char)((a << 2) | (b >> 4));
2150 hunk[1] = (char)((b << 4) | (c >> 2));
2151 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2152 if (!checksum)
2153 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
2154 len -= 3;
2155 }
2156 if (s < strend) {
f7fe979e
AL
2157 if (*s == '\n') {
2158 s++;
2159 }
08ca2aa3
TH
2160 else {
2161 /* possible checksum byte */
f7fe979e
AL
2162 const char *skip = s+UTF8SKIP(s);
2163 if (skip < strend && *skip == '\n')
2164 s = skip+1;
08ca2aa3
TH
2165 }
2166 }
2167 }
2168 } else {
fc241834
RGS
2169 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2170 I32 a, b, c, d;
db187877 2171 char hunk[3];
a6ec74c1 2172
fc241834
RGS
2173 len = PL_uudmap[*(U8*)s++] & 077;
2174 while (len > 0) {
2175 if (s < strend && ISUUCHAR(*s))
2176 a = PL_uudmap[*(U8*)s++] & 077;
2177 else
2178 a = 0;
2179 if (s < strend && ISUUCHAR(*s))
2180 b = PL_uudmap[*(U8*)s++] & 077;
2181 else
2182 b = 0;
2183 if (s < strend && ISUUCHAR(*s))
2184 c = PL_uudmap[*(U8*)s++] & 077;
2185 else
2186 c = 0;
2187 if (s < strend && ISUUCHAR(*s))
2188 d = PL_uudmap[*(U8*)s++] & 077;
2189 else
2190 d = 0;
2191 hunk[0] = (char)((a << 2) | (b >> 4));
2192 hunk[1] = (char)((b << 4) | (c >> 2));
2193 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2194 if (!checksum)
2195 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
2196 len -= 3;
2197 }
2198 if (*s == '\n')
2199 s++;
2200 else /* possible checksum byte */
2201 if (s + 1 < strend && s[1] == '\n')
2202 s += 2;
a6ec74c1 2203 }
08ca2aa3 2204 }
858fe5e1
TC
2205 if (!checksum)
2206 XPUSHs(sv);
a6ec74c1
JH
2207 break;
2208 }
49704364 2209
a6ec74c1 2210 if (checksum) {
1109a392 2211 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2212 (checksum > bits_in_uv &&
08ca2aa3
TH
2213 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2214 NV trouble, anv;
a6ec74c1 2215
08ca2aa3 2216 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2217 while (checksum >= 16) {
2218 checksum -= 16;
08ca2aa3 2219 anv *= 65536.0;
a6ec74c1 2220 }
a6ec74c1 2221 while (cdouble < 0.0)
08ca2aa3
TH
2222 cdouble += anv;
2223 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2224 sv = newSVnv(cdouble);
a6ec74c1
JH
2225 }
2226 else {
fa8ec7c1
NC
2227 if (checksum < bits_in_uv) {
2228 UV mask = ((UV)1 << checksum) - 1;
92d41999 2229 cuv &= mask;
a6ec74c1 2230 }
c4c5f44a 2231 sv = newSVuv(cuv);
a6ec74c1 2232 }
6e449a3a 2233 mXPUSHs(sv);
a6ec74c1
JH
2234 checksum = 0;
2235 }
fc241834 2236
49704364
LW
2237 if (symptr->flags & FLAG_SLASH){
2238 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 2239 break;
49704364
LW
2240 if( next_symbol(symptr) ){
2241 if( symptr->howlen == e_number )
2242 Perl_croak(aTHX_ "Count after length/code in unpack" );
2243 if( beyond ){
2244 /* ...end of char buffer then no decent length available */
2245 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2246 } else {
2247 /* take top of stack (hope it's numeric) */
2248 len = POPi;
2249 if( len < 0 )
2250 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2251 }
2252 } else {
2253 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2254 }
2255 datumtype = symptr->code;
21c16052 2256 explicit_length = FALSE;
49704364
LW
2257 goto redo_switch;
2258 }
a6ec74c1 2259 }
49704364 2260
18529408
IZ
2261 if (new_s)
2262 *new_s = s;
2263 PUTBACK;
2264 return SP - PL_stack_base - start_sp_offset;
2265}
2266
2267PP(pp_unpack)
2268{
97aff369 2269 dVAR;
18529408 2270 dSP;
bab9c0ac 2271 dPOPPOPssrl;
18529408
IZ
2272 I32 gimme = GIMME_V;
2273 STRLEN llen;
2274 STRLEN rlen;
5c144d81
NC
2275 const char *pat = SvPV_const(left, llen);
2276 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2277 const char *strend = s + rlen;
2278 const char *patend = pat + llen;
08ca2aa3 2279 I32 cnt;
18529408
IZ
2280
2281 PUTBACK;
7accc089 2282 cnt = unpackstring(pat, patend, s, strend,
49704364 2283 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2284 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2285
18529408
IZ
2286 SPAGAIN;
2287 if ( !cnt && gimme == G_SCALAR )
2288 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2289 RETURN;
2290}
2291
f337b084 2292STATIC U8 *
f7fe979e 2293doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2294{
f337b084 2295 *h++ = PL_uuemap[len];
a6ec74c1 2296 while (len > 2) {
f337b084
TH
2297 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2298 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2299 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2300 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2301 s += 3;
2302 len -= 3;
2303 }
2304 if (len > 0) {
f7fe979e 2305 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2309 *h++ = PL_uuemap[0];
a6ec74c1 2310 }
f337b084
TH
2311 *h++ = '\n';
2312 return h;
a6ec74c1
JH
2313}
2314
2315STATIC SV *
f7fe979e 2316S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2317{
8b6e33c7
AL
2318 SV *result = newSVpvn(s, l);
2319 char *const result_c = SvPV_nolen(result); /* convenience */
2320 char *out = result_c;
2321 bool skip = 1;
2322 bool ignore = 0;
a6ec74c1 2323
7918f24d
NC
2324 PERL_ARGS_ASSERT_IS_AN_INT;
2325
a6ec74c1
JH
2326 while (*s) {
2327 switch (*s) {
2328 case ' ':
2329 break;
2330 case '+':
2331 if (!skip) {
2332 SvREFCNT_dec(result);
2333 return (NULL);
2334 }
2335 break;
2336 case '0':
2337 case '1':
2338 case '2':
2339 case '3':
2340 case '4':
2341 case '5':
2342 case '6':
2343 case '7':
2344 case '8':
2345 case '9':
2346 skip = 0;
2347 if (!ignore) {
2348 *(out++) = *s;
2349 }
2350 break;
2351 case '.':
2352 ignore = 1;
2353 break;
2354 default:
2355 SvREFCNT_dec(result);
2356 return (NULL);
2357 }
2358 s++;
2359 }
2360 *(out++) = '\0';
2361 SvCUR_set(result, out - result_c);
2362 return (result);
2363}
2364
2365/* pnum must be '\0' terminated */
2366STATIC int
2367S_div128(pTHX_ SV *pnum, bool *done)
2368{
8b6e33c7
AL
2369 STRLEN len;
2370 char * const s = SvPV(pnum, len);
2371 char *t = s;
2372 int m = 0;
2373
7918f24d
NC
2374 PERL_ARGS_ASSERT_DIV128;
2375
8b6e33c7
AL
2376 *done = 1;
2377 while (*t) {
2378 const int i = m * 10 + (*t - '0');
2379 const int r = (i >> 7); /* r < 10 */
2380 m = i & 0x7F;
2381 if (r) {
2382 *done = 0;
2383 }
2384 *(t++) = '0' + r;
a6ec74c1 2385 }
8b6e33c7
AL
2386 *(t++) = '\0';
2387 SvCUR_set(pnum, (STRLEN) (t - s));
2388 return (m);
a6ec74c1
JH
2389}
2390
18529408 2391/*
7accc089
JH
2392=for apidoc packlist
2393
2394The engine implementing pack() Perl function.
2395
bfce84ec
AL
2396=cut
2397*/
7accc089
JH
2398
2399void
5aaab254 2400Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2401{
97aff369 2402 dVAR;
aadb217d
JH
2403 tempsym_t sym;
2404
7918f24d
NC
2405 PERL_ARGS_ASSERT_PACKLIST;
2406
f7fe979e 2407 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2408
f337b084
TH
2409 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2410 Also make sure any UTF8 flag is loaded */
56eb0262 2411 SvPV_force_nolen(cat);
bfce84ec
AL
2412 if (DO_UTF8(cat))
2413 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2414
49704364
LW
2415 (void)pack_rec( cat, &sym, beglist, endlist );
2416}
2417
f337b084
TH
2418/* like sv_utf8_upgrade, but also repoint the group start markers */
2419STATIC void
2420marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2421 STRLEN len;
2422 tempsym_t *group;
f7fe979e
AL
2423 const char *from_ptr, *from_start, *from_end, **marks, **m;
2424 char *to_start, *to_ptr;
f337b084
TH
2425
2426 if (SvUTF8(sv)) return;
2427
aa07b2f6 2428 from_start = SvPVX_const(sv);
f337b084
TH
2429 from_end = from_start + SvCUR(sv);
2430 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2431 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2432 if (from_ptr == from_end) {
2433 /* Simple case: no character needs to be changed */
2434 SvUTF8_on(sv);
2435 return;
2436 }
2437
3473cf63 2438 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2439 Newx(to_start, len, char);
f337b084
TH
2440 Copy(from_start, to_start, from_ptr-from_start, char);
2441 to_ptr = to_start + (from_ptr-from_start);
2442
a02a5408 2443 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2444 for (group=sym_ptr; group; group = group->previous)
2445 marks[group->level] = from_start + group->strbeg;
2446 marks[sym_ptr->level+1] = from_end+1;
2447 for (m = marks; *m < from_ptr; m++)
2448 *m = to_start + (*m-from_start);
2449
2450 for (;from_ptr < from_end; from_ptr++) {
2451 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2452 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2453 }
2454 *to_ptr = 0;
2455
2456 while (*m == from_ptr) *m++ = to_ptr;
2457 if (m != marks + sym_ptr->level+1) {
2458 Safefree(marks);
2459 Safefree(to_start);
5637ef5b
NC
2460 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2461 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2462 }
2463 for (group=sym_ptr; group; group = group->previous)
2464 group->strbeg = marks[group->level] - to_start;
2465 Safefree(marks);
2466
2467 if (SvOOK(sv)) {
2468 if (SvIVX(sv)) {
b162af07 2469 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2470 from_start -= SvIVX(sv);
2471 SvIV_set(sv, 0);
2472 }
2473 SvFLAGS(sv) &= ~SVf_OOK;
2474 }
2475 if (SvLEN(sv) != 0)
2476 Safefree(from_start);
f880fe2f 2477 SvPV_set(sv, to_start);
b162af07
SP
2478 SvCUR_set(sv, to_ptr - to_start);
2479 SvLEN_set(sv, len);
f337b084
TH
2480 SvUTF8_on(sv);
2481}
2482
2483/* Exponential string grower. Makes string extension effectively O(n)
2484 needed says how many extra bytes we need (not counting the final '\0')
2485 Only grows the string if there is an actual lack of space
2486*/
2487STATIC char *
0bd48802 2488S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2489 const STRLEN cur = SvCUR(sv);
2490 const STRLEN len = SvLEN(sv);
f337b084 2491 STRLEN extend;
7918f24d
NC
2492
2493 PERL_ARGS_ASSERT_SV_EXP_GROW;
2494
f337b084
TH
2495 if (len - cur > needed) return SvPVX(sv);
2496 extend = needed > len ? needed : len;
2497 return SvGROW(sv, len+extend+1);
2498}
49704364
LW
2499
2500STATIC
2501SV **
f337b084 2502S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2503{
97aff369 2504 dVAR;
49704364 2505 tempsym_t lookahead;
f337b084
TH
2506 I32 items = endlist - beglist;
2507 bool found = next_symbol(symptr);
2508 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2509 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2510
7918f24d
NC
2511 PERL_ARGS_ASSERT_PACK_REC;
2512
f337b084
TH
2513 if (symptr->level == 0 && found && symptr->code == 'U') {
2514 marked_upgrade(aTHX_ cat, symptr);
2515 symptr->flags |= FLAG_DO_UTF8;
2516 utf8 = 0;
49704364 2517 }
f337b084 2518 symptr->strbeg = SvCUR(cat);
49704364
LW
2519
2520 while (found) {
f337b084
TH
2521 SV *fromstr;
2522 STRLEN fromlen;
2523 I32 len;
a0714e2c 2524 SV *lengthcode = NULL;
49704364 2525 I32 datumtype = symptr->code;
f337b084
TH
2526 howlen_t howlen = symptr->howlen;
2527 char *start = SvPVX(cat);
2528 char *cur = start + SvCUR(cat);
49704364 2529
f337b084
TH
2530#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2531
2532 switch (howlen) {
fc241834 2533 case e_star:
f337b084
TH
2534 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2535 0 : items;
2536 break;
2537 default:
2538 /* e_no_len and e_number */
2539 len = symptr->length;
49704364
LW
2540 break;
2541 }
2542
f337b084 2543 if (len) {
a7a3cfaa 2544 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2545
a7a3cfaa
TH
2546 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2547 /* We can process this letter. */
2548 STRLEN size = props & PACK_SIZE_MASK;
2549 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2550 }
f337b084
TH
2551 }
2552
49704364
LW
2553 /* Look ahead for next symbol. Do we have code/code? */
2554 lookahead = *symptr;
2555 found = next_symbol(&lookahead);
246f24af
TH
2556 if (symptr->flags & FLAG_SLASH) {
2557 IV count;
f337b084 2558 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2559 if (strchr("aAZ", lookahead.code)) {
2560 if (lookahead.howlen == e_number) count = lookahead.length;
2561 else {
ce399ba6 2562 if (items > 0) {
48a5da33 2563 count = sv_len_utf8(*beglist);
ce399ba6 2564 }
246f24af
TH
2565 else count = 0;
2566 if (lookahead.code == 'Z') count++;
2567 }
2568 } else {
2569 if (lookahead.howlen == e_number && lookahead.length < items)
2570 count = lookahead.length;
2571 else count = items;
2572 }
2573 lookahead.howlen = e_number;
2574 lookahead.length = count;
2575 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2576 }
49704364 2577
fc241834
RGS
2578 /* Code inside the switch must take care to properly update
2579 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2580 doesn't simply leave using break */
1109a392 2581 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2582 default:
f337b084
TH
2583 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2584 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2585 case '%':
49704364 2586 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2587 {
2588 char *from;
2589#ifdef PERL_PACK_CAN_SHRIEKSIGN
2590 case '.' | TYPE_IS_SHRIEKING:
2591#endif
2592 case '.':
2593 if (howlen == e_star) from = start;
2594 else if (len == 0) from = cur;
2595 else {
2596 tempsym_t *group = symptr;
2597
2598 while (--len && group) group = group->previous;
2599 from = group ? start + group->strbeg : start;
2600 }
2601 fromstr = NEXTFROM;
2602 len = SvIV(fromstr);
2603 goto resize;
2604#ifdef PERL_PACK_CAN_SHRIEKSIGN
2605 case '@' | TYPE_IS_SHRIEKING:
2606#endif
a6ec74c1 2607 case '@':
28be1210
TH
2608 from = start + symptr->strbeg;
2609 resize:
2610#ifdef PERL_PACK_CAN_SHRIEKSIGN
2611 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2612#else /* PERL_PACK_CAN_SHRIEKSIGN */
2613 if (utf8)
2614#endif
2615 if (len >= 0) {
2616 while (len && from < cur) {
2617 from += UTF8SKIP(from);
2618 len--;
2619 }
2620 if (from > cur)
2621 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2622 if (len) {
2623 /* Here we know from == cur */
2624 grow:
2625 GROWING(0, cat, start, cur, len);
2626 Zero(cur, len, char);
2627 cur += len;
2628 } else if (from < cur) {
2629 len = cur - from;
2630 goto shrink;
2631 } else goto no_change;
2632 } else {
2633 cur = from;
2634 len = -len;
2635 goto utf8_shrink;
f337b084 2636 }
28be1210
TH
2637 else {
2638 len -= cur - from;
f337b084 2639 if (len > 0) goto grow;
28be1210 2640 if (len == 0) goto no_change;
fc241834 2641 len = -len;
28be1210 2642 goto shrink;
f337b084 2643 }
a6ec74c1 2644 break;
28be1210 2645 }
fc241834 2646 case '(': {
49704364 2647 tempsym_t savsym = *symptr;
66c611c5
MHM
2648 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2649 symptr->flags |= group_modifiers;
49704364
LW
2650 symptr->patend = savsym.grpend;
2651 symptr->level++;
f337b084 2652 symptr->previous = &lookahead;
18529408 2653 while (len--) {
f337b084
TH
2654 U32 was_utf8;
2655 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2656 else symptr->flags &= ~FLAG_PARSE_UTF8;
2657 was_utf8 = SvUTF8(cat);
49704364 2658 symptr->patptr = savsym.grpbeg;
f337b084
TH
2659 beglist = pack_rec(cat, symptr, beglist, endlist);
2660 if (SvUTF8(cat) != was_utf8)
2661 /* This had better be an upgrade while in utf8==0 mode */
2662 utf8 = 1;
2663
49704364 2664 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2665 break; /* No way to continue */
2666 }
ee790063 2667 items = endlist - beglist;
f337b084
TH
2668 lookahead.flags = symptr->flags & ~group_modifiers;
2669 goto no_change;
18529408 2670 }
62f95557
IZ
2671 case 'X' | TYPE_IS_SHRIEKING:
2672 if (!len) /* Avoid division by 0 */
2673 len = 1;
f337b084
TH
2674 if (utf8) {
2675 char *hop, *last;
2676 I32 l = len;
2677 hop = last = start;
2678 while (hop < cur) {
2679 hop += UTF8SKIP(hop);
2680 if (--l == 0) {
2681 last = hop;
2682 l = len;
2683 }
2684 }
2685 if (last > cur)
2686 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2687 cur = last;
2688 break;
2689 }
2690 len = (cur-start) % len;
62f95557 2691 /* FALL THROUGH */
a6ec74c1 2692 case 'X':
f337b084
TH
2693 if (utf8) {
2694 if (len < 1) goto no_change;
28be1210 2695 utf8_shrink:
f337b084
TH
2696 while (len > 0) {
2697 if (cur <= start)
28be1210
TH
2698 Perl_croak(aTHX_ "'%c' outside of string in pack",
2699 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2700 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2701 if (cur <= start)
28be1210
TH
2702 Perl_croak(aTHX_ "'%c' outside of string in pack",
2703 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2704 }
2705 len--;
2706 }
2707 } else {
fc241834 2708 shrink:
f337b084 2709 if (cur - start < len)
28be1210
TH
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2712 cur -= len;
2713 }
2714 if (cur < start+symptr->strbeg) {
2715 /* Make sure group starts don't point into the void */
2716 tempsym_t *group;
9e27e96a 2717 const STRLEN length = cur-start;
f337b084
TH
2718 for (group = symptr;
2719 group && length < group->strbeg;
2720 group = group->previous) group->strbeg = length;
2721 lookahead.strbeg = length;
2722 }
a6ec74c1 2723 break;
fc241834
RGS
2724 case 'x' | TYPE_IS_SHRIEKING: {
2725 I32 ai32;
62f95557
IZ
2726 if (!len) /* Avoid division by 0 */
2727 len = 1;
230e1fce 2728 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2729 else ai32 = (cur - start) % len;
2730 if (ai32 == 0) goto no_change;
2731 len -= ai32;
2732 }
2733 /* FALL THROUGH */
a6ec74c1 2734 case 'x':
f337b084 2735 goto grow;
a6ec74c1
JH
2736 case 'A':
2737 case 'Z':
f337b084 2738 case 'a': {
f7fe979e 2739 const char *aptr;
f337b084 2740
a6ec74c1 2741 fromstr = NEXTFROM;
e62f0680 2742 aptr = SvPV_const(fromstr, fromlen);
f337b084 2743 if (DO_UTF8(fromstr)) {
f7fe979e 2744 const char *end, *s;
f337b084
TH
2745
2746 if (!utf8 && !SvUTF8(cat)) {
2747 marked_upgrade(aTHX_ cat, symptr);
2748 lookahead.flags |= FLAG_DO_UTF8;
2749 lookahead.strbeg = symptr->strbeg;
2750 utf8 = 1;
2751 start = SvPVX(cat);
2752 cur = start + SvCUR(cat);
2753 }
fc241834 2754 if (howlen == e_star) {
f337b084
TH
2755 if (utf8) goto string_copy;
2756 len = fromlen+1;
2757 }
2758 s = aptr;
2759 end = aptr + fromlen;
2760 fromlen = datumtype == 'Z' ? len-1 : len;
2761 while ((I32) fromlen > 0 && s < end) {
2762 s += UTF8SKIP(s);
2763 fromlen--;
2764 }
2765 if (s > end)
2766 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2767 if (utf8) {
fc241834 2768 len = fromlen;
f337b084
TH
2769 if (datumtype == 'Z') len++;
2770 fromlen = s-aptr;
2771 len += fromlen;
fc241834 2772
f337b084 2773 goto string_copy;
fc241834 2774 }
f337b084
TH
2775 fromlen = len - fromlen;
2776 if (datumtype == 'Z') fromlen--;
2777 if (howlen == e_star) {
2778 len = fromlen;
2779 if (datumtype == 'Z') len++;
fc241834 2780 }
f337b084 2781 GROWING(0, cat, start, cur, len);
fc241834 2782 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2783 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2784 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2785 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2786 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2787 cur += fromlen;
a6ec74c1 2788 len -= fromlen;
f337b084
TH
2789 } else if (utf8) {
2790 if (howlen == e_star) {
2791 len = fromlen;
2792 if (datumtype == 'Z') len++;
a6ec74c1 2793 }
f337b084
TH
2794 if (len <= (I32) fromlen) {
2795 fromlen = len;
2796 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2797 }
fc241834 2798 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2799 upgrade, so:
2800 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2801 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2802 len -= fromlen;
2803 while (fromlen > 0) {
230e1fce 2804 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2805 aptr++;
2806 fromlen--;
fc241834 2807 }
f337b084
TH
2808 } else {
2809 string_copy:
2810 if (howlen == e_star) {
2811 len = fromlen;
2812 if (datumtype == 'Z') len++;
2813 }
2814 if (len <= (I32) fromlen) {
2815 fromlen = len;
2816 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2817 }
f337b084
TH
2818 GROWING(0, cat, start, cur, len);
2819 Copy(aptr, cur, fromlen, char);
2820 cur += fromlen;
2821 len -= fromlen;
a6ec74c1 2822 }
f337b084
TH
2823 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2824 cur += len;
3c4fb04a 2825 SvTAINT(cat);
a6ec74c1 2826 break;
f337b084 2827 }
a6ec74c1 2828 case 'B':
f337b084 2829 case 'b': {
b83604b4 2830 const char *str, *end;
f337b084
TH
2831 I32 l, field_len;
2832 U8 bits;
2833 bool utf8_source;
2834 U32 utf8_flags;
a6ec74c1 2835
fc241834 2836 fromstr = NEXTFROM;
b83604b4 2837 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2838 end = str + fromlen;
2839 if (DO_UTF8(fromstr)) {
2840 utf8_source = TRUE;
041457d9 2841 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2842 } else {
2843 utf8_source = FALSE;
2844 utf8_flags = 0; /* Unused, but keep compilers happy */
2845 }
2846 if (howlen == e_star) len = fromlen;
2847 field_len = (len+7)/8;
2848 GROWING(utf8, cat, start, cur, field_len);
2849 if (len > (I32)fromlen) len = fromlen;
2850 bits = 0;
2851 l = 0;
2852 if (datumtype == 'B')
2853 while (l++ < len) {
2854 if (utf8_source) {
95b63a38 2855 UV val = 0;
f337b084
TH
2856 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2857 bits |= val & 1;
2858 } else bits |= *str++ & 1;
2859 if (l & 7) bits <<= 1;
fc241834 2860 else {
f337b084
TH
2861 PUSH_BYTE(utf8, cur, bits);
2862 bits = 0;
a6ec74c1
JH
2863 }
2864 }
f337b084
TH
2865 else
2866 /* datumtype == 'b' */
2867 while (l++ < len) {
2868 if (utf8_source) {
95b63a38 2869 UV val = 0;
f337b084
TH
2870 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2871 if (val & 1) bits |= 0x80;
2872 } else if (*str++ & 1)
2873 bits |= 0x80;
2874 if (l & 7) bits >>= 1;
fc241834 2875 else {
f337b084
TH
2876 PUSH_BYTE(utf8, cur, bits);
2877 bits = 0;
a6ec74c1
JH
2878 }
2879 }
f337b084
TH
2880 l--;
2881 if (l & 7) {
fc241834 2882 if (datumtype == 'B')
f337b084 2883 bits <<= 7 - (l & 7);
fc241834 2884 else
f337b084
TH
2885 bits >>= 7 - (l & 7);
2886 PUSH_BYTE(utf8, cur, bits);
2887 l += 7;
a6ec74c1 2888 }
f337b084
TH
2889 /* Determine how many chars are left in the requested field */
2890 l /= 8;
2891 if (howlen == e_star) field_len = 0;
2892 else field_len -= l;
2893 Zero(cur, field_len, char);
2894 cur += field_len;
a6ec74c1 2895 break;
f337b084 2896 }
a6ec74c1 2897 case 'H':
f337b084 2898 case 'h': {
10516c54 2899 const char *str, *end;
f337b084
TH
2900 I32 l, field_len;
2901 U8 bits;
2902 bool utf8_source;
2903 U32 utf8_flags;
a6ec74c1 2904
fc241834 2905 fromstr = NEXTFROM;
10516c54 2906 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2907 end = str + fromlen;
2908 if (DO_UTF8(fromstr)) {
2909 utf8_source = TRUE;
041457d9 2910 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2911 } else {
2912 utf8_source = FALSE;
2913 utf8_flags = 0; /* Unused, but keep compilers happy */
2914 }
2915 if (howlen == e_star) len = fromlen;
2916 field_len = (len+1)/2;
2917 GROWING(utf8, cat, start, cur, field_len);
2918 if (!utf8 && len > (I32)fromlen) len = fromlen;
2919 bits = 0;
2920 l = 0;
2921 if (datumtype == 'H')
2922 while (l++ < len) {
2923 if (utf8_source) {
95b63a38 2924 UV val = 0;
f337b084
TH
2925 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2926 if (val < 256 && isALPHA(val))
2927 bits |= (val + 9) & 0xf;
a6ec74c1 2928 else
f337b084
TH
2929 bits |= val & 0xf;
2930 } else if (isALPHA(*str))
2931 bits |= (*str++ + 9) & 0xf;
2932 else
2933 bits |= *str++ & 0xf;
2934 if (l & 1) bits <<= 4;
fc241834 2935 else {
f337b084
TH
2936 PUSH_BYTE(utf8, cur, bits);
2937 bits = 0;
a6ec74c1
JH
2938 }
2939 }
f337b084
TH
2940 else
2941 while (l++ < len) {
2942 if (utf8_source) {
95b63a38 2943 UV val = 0;
f337b084
TH
2944 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2945 if (val < 256 && isALPHA(val))
2946 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2947 else
f337b084
TH
2948 bits |= (val & 0xf) << 4;
2949 } else if (isALPHA(*str))
2950 bits |= ((*str++ + 9) & 0xf) << 4;
2951 else
2952 bits |= (*str++ & 0xf) << 4;
2953 if (l & 1) bits >>= 4;
fc241834 2954 else {
f337b084
TH
2955 PUSH_BYTE(utf8, cur, bits);
2956 bits = 0;
a6ec74c1 2957 }
fc241834 2958 }
f337b084
TH
2959 l--;
2960 if (l & 1) {
2961 PUSH_BYTE(utf8, cur, bits);
2962 l++;
2963 }
2964 /* Determine how many chars are left in the requested field */
2965 l /= 2;
2966 if (howlen == e_star) field_len = 0;
2967 else field_len -= l;
2968 Zero(cur, field_len, char);
2969 cur += field_len;
2970 break;
fc241834
RGS
2971 }
2972 case 'c':
f337b084
TH
2973 while (len-- > 0) {
2974 IV aiv;
2975 fromstr = NEXTFROM;
2976 aiv = SvIV(fromstr);
a2a5de95
NC
2977 if ((-128 > aiv || aiv > 127))
2978 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2979 "Character in 'c' format wrapped in pack");
585ec06d 2980 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2981 }
2982 break;
2983 case 'C':
f337b084
TH
2984 if (len == 0) {
2985 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2986 break;
2987 }
a6ec74c1 2988 while (len-- > 0) {
f337b084 2989 IV aiv;
a6ec74c1 2990 fromstr = NEXTFROM;
f337b084 2991 aiv = SvIV(fromstr);
a2a5de95
NC
2992 if ((0 > aiv || aiv > 0xff))
2993 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2994 "Character in 'C' format wrapped in pack");
1651fc44 2995 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2996 }
fc241834
RGS
2997 break;
2998 case 'W': {
2999 char *end;
670f1322 3000 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
3001
3002 end = start+SvLEN(cat)-1;
3003 if (utf8) end -= UTF8_MAXLEN-1;
3004 while (len-- > 0) {
3005 UV auv;
3006 fromstr = NEXTFROM;
3007 auv = SvUV(fromstr);
3008 if (in_bytes) auv = auv % 0x100;
3009 if (utf8) {
3010 W_utf8:
3011 if (cur > end) {
3012 *cur = '\0';
b162af07 3013 SvCUR_set(cat, cur - start);
fc241834
RGS
3014
3015 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3016 end = start+SvLEN(cat)-UTF8_MAXLEN;
3017 }
230e1fce
NC
3018 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3019 NATIVE_TO_UNI(auv),
041457d9 3020 warn_utf8 ?
230e1fce 3021 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3022 } else {
3023 if (auv >= 0x100) {
3024 if (!SvUTF8(cat)) {
3025 *cur = '\0';
b162af07 3026 SvCUR_set(cat, cur - start);
fc241834
RGS
3027 marked_upgrade(aTHX_ cat, symptr);
3028 lookahead.flags |= FLAG_DO_UTF8;
3029 lookahead.strbeg = symptr->strbeg;
3030 utf8 = 1;
3031 start = SvPVX(cat);
3032 cur = start + SvCUR(cat);
3033 end = start+SvLEN(cat)-UTF8_MAXLEN;
3034 goto W_utf8;
3035 }
a2a5de95
NC
3036 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3037 "Character in 'W' format wrapped in pack");
fc241834
RGS
3038 auv &= 0xff;
3039 }
3040 if (cur >= end) {
3041 *cur = '\0';
b162af07 3042 SvCUR_set(cat, cur - start);
fc241834
RGS
3043 GROWING(0, cat, start, cur, len+1);
3044 end = start+SvLEN(cat)-1;
3045 }
fe2774ed 3046 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3047 }
3048 }
3049 break;
fc241834
RGS
3050 }
3051 case 'U': {
3052 char *end;
3053
3054 if (len == 0) {
3055 if (!(symptr->flags & FLAG_DO_UTF8)) {
3056 marked_upgrade(aTHX_ cat, symptr);
3057 lookahead.flags |= FLAG_DO_UTF8;
3058 lookahead.strbeg = symptr->strbeg;
3059 }
3060 utf8 = 0;
3061 goto no_change;
3062 }
3063
3064 end = start+SvLEN(cat);
3065 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3066 while (len-- > 0) {
fc241834 3067 UV auv;
a6ec74c1 3068 fromstr = NEXTFROM;
fc241834
RGS
3069 auv = SvUV(fromstr);
3070 if (utf8) {
230e1fce 3071 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3072 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3073 warn_utf8 ?
fc241834
RGS
3074 0 : UNICODE_ALLOW_ANY);
3075 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3076 *cur = '\0';
b162af07 3077 SvCUR_set(cat, cur - start);
fc241834
RGS
3078 GROWING(0, cat, start, cur,
3079 len+(endb-buffer)*UTF8_EXPAND);
3080 end = start+SvLEN(cat);
3081 }
64844641 3082 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3083 } else {
3084 if (cur >= end) {
3085 *cur = '\0';
b162af07 3086 SvCUR_set(cat, cur - start);
fc241834
RGS
3087 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3088 end = start+SvLEN(cat)-UTF8_MAXLEN;
3089 }
230e1fce 3090 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3091 warn_utf8 ?
230e1fce 3092 0 : UNICODE_ALLOW_ANY);
fc241834 3093 }
a6ec74c1 3094 }
a6ec74c1 3095 break;
fc241834 3096 }
a6ec74c1
JH
3097 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3098 case 'f':
a6ec74c1 3099 while (len-- > 0) {
f337b084
TH
3100 float afloat;
3101 NV anv;
a6ec74c1 3102 fromstr = NEXTFROM;
f337b084 3103 anv = SvNV(fromstr);
85bba25f 3104# if defined(VMS) && !defined(_IEEE_FP)
f337b084 3105 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3106 * on Alpha; fake it if we don't have them.
3107 */
f337b084 3108 if (anv > FLT_MAX)
fc241834 3109 afloat = FLT_MAX;
f337b084 3110 else if (anv < -FLT_MAX)
fc241834 3111 afloat = -FLT_MAX;
f337b084 3112 else afloat = (float)anv;
baf3cf9c 3113# else
f337b084 3114 afloat = (float)anv;
baf3cf9c 3115# endif
1109a392 3116 DO_BO_PACK_N(afloat, float);
f337b084 3117 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3118 }
3119 break;
3120 case 'd':
a6ec74c1 3121 while (len-- > 0) {
f337b084
TH
3122 double adouble;
3123 NV anv;
a6ec74c1 3124 fromstr = NEXTFROM;
f337b084 3125 anv = SvNV(fromstr);
85bba25f 3126# if defined(VMS) && !defined(_IEEE_FP)
f337b084 3127 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3128 * on Alpha; fake it if we don't have them.
3129 */
f337b084 3130 if (anv > DBL_MAX)
fc241834 3131 adouble = DBL_MAX;
f337b084 3132 else if (anv < -DBL_MAX)
fc241834 3133 adouble = -DBL_MAX;
f337b084 3134 else adouble = (double)anv;
baf3cf9c 3135# else
f337b084 3136 adouble = (double)anv;
baf3cf9c 3137# endif
1109a392 3138 DO_BO_PACK_N(adouble, double);
f337b084 3139 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3140 }
3141 break;
fc241834 3142 case 'F': {
275663fa 3143 NV_bytes anv;
1109a392 3144 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3145 while (len-- > 0) {
3146 fromstr = NEXTFROM;
cd07c537
DM
3147#ifdef __GNUC__
3148 /* to work round a gcc/x86 bug; don't use SvNV */
3149 anv.nv = sv_2nv(fromstr);
3150#else
275663fa 3151 anv.nv = SvNV(fromstr);
cd07c537 3152#endif
1109a392 3153 DO_BO_PACK_N(anv, NV);
275663fa 3154 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
3155 }
3156 break;
fc241834 3157 }
92d41999 3158#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 3159 case 'D': {
275663fa 3160 ld_bytes aldouble;
1109a392
MHM
3161 /* long doubles can have unused bits, which may be nonzero */
3162 Zero(&aldouble, 1, long double);
92d41999
JH
3163 while (len-- > 0) {
3164 fromstr = NEXTFROM;
cd07c537
DM
3165# ifdef __GNUC__
3166 /* to work round a gcc/x86 bug; don't use SvNV */
3167 aldouble.ld = (long double)sv_2nv(fromstr);
3168# else
275663fa 3169 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 3170# endif
1109a392 3171 DO_BO_PACK_N(aldouble, long double);
275663fa 3172 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
3173 }
3174 break;
fc241834 3175 }
92d41999 3176#endif
7212898e 3177#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3178 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3179#endif
a6ec74c1
JH
3180 case 'n':
3181 while (len-- > 0) {
f337b084 3182 I16 ai16;
a6ec74c1 3183 fromstr = NEXTFROM;
ef108786 3184 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3185#ifdef HAS_HTONS
ef108786 3186 ai16 = PerlSock_htons(ai16);
a6ec74c1 3187#endif
f337b084 3188 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3189 }
3190 break;
7212898e 3191#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3192 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3193#endif
a6ec74c1
JH
3194 case 'v':
3195 while (len-- > 0) {
f337b084 3196 I16 ai16;
a6ec74c1 3197 fromstr = NEXTFROM;
ef108786 3198 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3199#ifdef HAS_HTOVS
ef108786 3200 ai16 = htovs(ai16);
a6ec74c1 3201#endif
f337b084 3202 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3203 }
3204 break;
49704364 3205 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3206#if SHORTSIZE != SIZE16
fc241834 3207 while (len-- > 0) {
f337b084 3208 unsigned short aushort;
fc241834
RGS
3209 fromstr = NEXTFROM;
3210 aushort = SvUV(fromstr);
3211 DO_BO_PACK(aushort, s);
f337b084 3212 PUSH_VAR(utf8, cur, aushort);
fc241834 3213 }
49704364
LW
3214 break;
3215#else
3216 /* Fall through! */
a6ec74c1 3217#endif
49704364 3218 case 'S':
fc241834 3219 while (len-- > 0) {
f337b084 3220 U16 au16;
fc241834
RGS
3221 fromstr = NEXTFROM;
3222 au16 = (U16)SvUV(fromstr);
3223 DO_BO_PACK(au16, 16);
f337b084 3224 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3225 }
3226 break;
49704364 3227 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3228#if SHORTSIZE != SIZE16
fc241834 3229 while (len-- > 0) {
f337b084 3230 short ashort;
fc241834
RGS
3231 fromstr = NEXTFROM;
3232 ashort = SvIV(fromstr);
3233 DO_BO_PACK(ashort, s);
f337b084 3234 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3235 }
49704364
LW
3236 break;
3237#else
3238 /* Fall through! */
a6ec74c1 3239#endif
49704364
LW
3240 case 's':
3241 while (len-- > 0) {
f337b084 3242 I16 ai16;
49704364 3243 fromstr = NEXTFROM;
ef108786
MHM
3244 ai16 = (I16)SvIV(fromstr);
3245 DO_BO_PACK(ai16, 16);
f337b084 3246 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3247 }
3248 break;
3249 case 'I':
49704364 3250 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3251 while (len-- > 0) {
f337b084 3252 unsigned int auint;
a6ec74c1
JH
3253 fromstr = NEXTFROM;
3254 auint = SvUV(fromstr);
1109a392 3255 DO_BO_PACK(auint, i);
f337b084 3256 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3257 }
3258 break;
92d41999
JH
3259 case 'j':
3260 while (len-- > 0) {
f337b084 3261 IV aiv;
92d41999
JH
3262 fromstr = NEXTFROM;
3263 aiv = SvIV(fromstr);
1109a392
MHM
3264#if IVSIZE == INTSIZE
3265 DO_BO_PACK(aiv, i);
3266#elif IVSIZE == LONGSIZE
3267 DO_BO_PACK(aiv, l);
3268#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3269 DO_BO_PACK(aiv, 64);
f337b084
TH
3270#else
3271 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3272#endif
f337b084 3273 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3274 }
3275 break;
3276 case 'J':
3277 while (len-- > 0) {
f337b084 3278 UV auv;
92d41999
JH
3279 fromstr = NEXTFROM;
3280 auv = SvUV(fromstr);
1109a392
MHM
3281#if UVSIZE == INTSIZE
3282 DO_BO_PACK(auv, i);
3283#elif UVSIZE == LONGSIZE
3284 DO_BO_PACK(auv, l);
3285#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3286 DO_BO_PACK(auv, 64);
f337b084
TH
3287#else
3288 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3289#endif
f337b084 3290 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3291 }
3292 break;
a6ec74c1
JH
3293 case 'w':
3294 while (len-- > 0) {
f337b084 3295 NV anv;
a6ec74c1 3296 fromstr = NEXTFROM;
15e9f109 3297 anv = SvNV(fromstr);
a6ec74c1 3298
f337b084
TH
3299 if (anv < 0) {
3300 *cur = '\0';
b162af07 3301 SvCUR_set(cat, cur - start);
49704364 3302 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3303 }
a6ec74c1 3304
196b62db
NC
3305 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3306 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3307 any negative IVs will have already been got by the croak()
3308 above. IOK is untrue for fractions, so we test them
3309 against UV_MAX_P1. */
f337b084
TH
3310 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3311 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3312 char *in = buf + sizeof(buf);
196b62db 3313 UV auv = SvUV(fromstr);
a6ec74c1
JH
3314
3315 do {
eb160463 3316 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3317 auv >>= 7;
3318 } while (auv);
3319 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3320 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3321 in, (buf + sizeof(buf)) - in);
3322 } else if (SvPOKp(fromstr))
3323 goto w_string;
a6ec74c1 3324 else if (SvNOKp(fromstr)) {
0258719b 3325 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3326 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3327 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3328 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3329 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3330 Some C compilers are strict about integral constant
3331 expressions so we conservatively divide by a slightly
3332 smaller integer instead of multiplying by the exact
3333 floating-point value.
0258719b
NC
3334 */
3335#ifdef NV_MAX_10_EXP
f337b084 3336 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3337 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3338#else
f337b084 3339 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3340 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3341#endif
a6ec74c1
JH
3342 char *in = buf + sizeof(buf);
3343
8b6e33c7 3344 anv = Perl_floor(anv);
a6ec74c1 3345 do {
8b6e33c7 3346 const NV next = Perl_floor(anv / 128);
a6ec74c1 3347 if (in <= buf) /* this cannot happen ;-) */
49704364 3348 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3349 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3350 anv = next;
3351 } while (anv > 0);
a6ec74c1 3352 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3353 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3354 in, (buf + sizeof(buf)) - in);
3355 } else {
8b6e33c7
AL
3356 const char *from;
3357 char *result, *in;
735b914b
JH
3358 SV *norm;
3359 STRLEN len;
3360 bool done;
3361
f337b084 3362 w_string:
735b914b 3363 /* Copy string and check for compliance */
349d4f2f 3364 from = SvPV_const(fromstr, len);
735b914b 3365 if ((norm = is_an_int(from, len)) == NULL)
49704364 3366 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3367
a02a5408 3368 Newx(result, len, char);
735b914b
JH
3369 in = result + len;
3370 done = FALSE;
f337b084 3371 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3372 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3373 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3374 in, (result + len) - in);
735b914b
JH
3375 Safefree(result);
3376 SvREFCNT_dec(norm); /* free norm */
fc241834 3377 }
a6ec74c1
JH
3378 }
3379 break;
3380 case 'i':
49704364 3381 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3382 while (len-- > 0) {
f337b084 3383 int aint;
a6ec74c1
JH
3384 fromstr = NEXTFROM;
3385 aint = SvIV(fromstr);
1109a392 3386 DO_BO_PACK(aint, i);
f337b084 3387 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3388 }
3389 break;
7212898e 3390#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3391 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3392#endif
a6ec74c1
JH
3393 case 'N':
3394 while (len-- > 0) {
f337b084 3395 U32 au32;
a6ec74c1 3396 fromstr = NEXTFROM;
ef108786 3397 au32 = SvUV(fromstr);
a6ec74c1 3398#ifdef HAS_HTONL
ef108786 3399 au32 = PerlSock_htonl(au32);
a6ec74c1 3400#endif
f337b084 3401 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3402 }
3403 break;
7212898e 3404#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3405 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3406#endif
a6ec74c1
JH
3407 case 'V':
3408 while (len-- > 0) {
f337b084 3409 U32 au32;
a6ec74c1 3410 fromstr = NEXTFROM;
ef108786 3411 au32 = SvUV(fromstr);
a6ec74c1 3412#ifdef HAS_HTOVL
ef108786 3413 au32 = htovl(au32);
a6ec74c1 3414#endif
f337b084 3415 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3416 }
3417 break;
49704364 3418 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3419#if LONGSIZE != SIZE32
fc241834 3420 while (len-- > 0) {
f337b084 3421 unsigned long aulong;
fc241834
RGS
3422 fromstr = NEXTFROM;
3423 aulong = SvUV(fromstr);
3424 DO_BO_PACK(aulong, l);
f337b084 3425 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3426 }
49704364
LW
3427 break;
3428#else
3429 /* Fall though! */
a6ec74c1 3430#endif
49704364 3431 case 'L':
fc241834 3432 while (len-- > 0) {
f337b084 3433 U32 au32;
fc241834
RGS
3434 fromstr = NEXTFROM;
3435 au32 = SvUV(fromstr);
3436 DO_BO_PACK(au32, 32);
f337b084 3437 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3438 }
3439 break;
49704364 3440 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3441#if LONGSIZE != SIZE32
fc241834 3442 while (len-- > 0) {
f337b084 3443 long along;
fc241834
RGS
3444 fromstr = NEXTFROM;
3445 along = SvIV(fromstr);
3446 DO_BO_PACK(along, l);
f337b084 3447 PUSH_VAR(utf8, cur, along);
a6ec74c1 3448 }
49704364
LW
3449 break;
3450#else
3451 /* Fall though! */
a6ec74c1 3452#endif
49704364
LW
3453 case 'l':
3454 while (len-- > 0) {
f337b084 3455 I32 ai32;
49704364 3456 fromstr = NEXTFROM;
ef108786
MHM
3457 ai32 = SvIV(fromstr);
3458 DO_BO_PACK(ai32, 32);
f337b084 3459 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3460 }
3461 break;
3462#ifdef HAS_QUAD
3463 case 'Q':
3464 while (len-- > 0) {
f337b084 3465 Uquad_t auquad;
a6ec74c1 3466 fromstr = NEXTFROM;
f337b084 3467 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3468 DO_BO_PACK(auquad, 64);
f337b084 3469 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3470 }
3471 break;
3472 case 'q':
3473 while (len-- > 0) {
f337b084 3474 Quad_t aquad;
a6ec74c1
JH
3475 fromstr = NEXTFROM;
3476 aquad = (Quad_t)SvIV(fromstr);
1109a392 3477 DO_BO_PACK(aquad, 64);
f337b084 3478 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3479 }
3480 break;
f337b084 3481#endif /* HAS_QUAD */
a6ec74c1
JH
3482 case 'P':
3483 len = 1; /* assume SV is correct length */
f337b084 3484 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3485 /* Fall through! */
a6ec74c1
JH
3486 case 'p':
3487 while (len-- > 0) {
83003860 3488 const char *aptr;
f337b084 3489
a6ec74c1 3490 fromstr = NEXTFROM;
28a4f200 3491 SvGETMAGIC(fromstr);
3492 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3493 else {
a6ec74c1
JH
3494 /* XXX better yet, could spirit away the string to
3495 * a safe spot and hang on to it until the result
3496 * of pack() (and all copies of the result) are
3497 * gone.
3498 */