This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
More pruning of the perl 5.11.3 delta file
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
7212898e 73#if PERL_VERSION >= 9
f337b084
TH
74# define PERL_PACK_CAN_BYTEORDER
75# define PERL_PACK_CAN_SHRIEKSIGN
76#endif
77
78#ifndef CHAR_BIT
79# define CHAR_BIT 8
7212898e 80#endif
3473cf63
RGS
81/* Maximum number of bytes to which a byte can grow due to upgrade */
82#define UTF8_EXPAND 2
7212898e 83
a6ec74c1 84/*
a6ec74c1
JH
85 * Offset for integer pack/unpack.
86 *
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
89 */
90
91/*
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
97 */
98/*
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
101 */
102#define SIZE16 2
103#define SIZE32 4
104
105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 --jhi Feb 1999 */
107
1109a392
MHM
108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
109# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
110# define OFF16(p) ((char*)(p))
111# define OFF32(p) ((char*)(p))
a6ec74c1 112# else
1109a392 113# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
114# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116# else
08ca2aa3 117 ++++ bad cray byte order
a6ec74c1
JH
118# endif
119# endif
a6ec74c1 120#else
08ca2aa3
TH
121# define OFF16(p) ((char *) (p))
122# define OFF32(p) ((char *) (p))
a6ec74c1
JH
123#endif
124
f337b084
TH
125/* Only to be used inside a loop (see the break) */
126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (utf8) { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 } else { \
130 Copy(s, OFF16(p), SIZE16, char); \
131 (s) += SIZE16; \
132 } \
133} STMT_END
134
135/* Only to be used inside a loop (see the break) */
136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (utf8) { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 } else { \
140 Copy(s, OFF32(p), SIZE32, char); \
141 (s) += SIZE32; \
142 } \
143} STMT_END
144
145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
147
148/* Only to be used inside a loop (see the break) */
f337b084 149#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084
TH
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
154 } else { \
155 Copy(s, (char *) &var, sizeof(var), char); \
156 s += sizeof(var); \
157 } \
08ca2aa3
TH
158} STMT_END
159
f337b084 160#define PUSH_VAR(utf8, aptr, var) \
230e1fce 161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 162
49704364
WL
163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164#define MAX_SUB_TEMPLATE_LEVEL 100
165
66c611c5 166/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
167#define FLAG_WAS_UTF8 0x40
168#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 169#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 170#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
171#define FLAG_SLASH 0x04
172#define FLAG_COMMA 0x02
173#define FLAG_PACK 0x01
174
a6ec74c1
JH
175STATIC SV *
176S_mul128(pTHX_ SV *sv, U8 m)
177{
178 STRLEN len;
179 char *s = SvPV(sv, len);
180 char *t;
a6ec74c1 181
7918f24d
NC
182 PERL_ARGS_ASSERT_MUL128;
183
a6ec74c1 184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 185 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
186
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
189 sv = tmpNew;
190 s = SvPV(sv, len);
191 }
192 t = s + len - 1;
193 while (!*t) /* trailing '\0'? */
194 t--;
195 while (t > s) {
f7fe979e 196 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
197 *(t--) = '0' + (char)(i % 10);
198 m = (char)(i / 10);
a6ec74c1
JH
199 }
200 return (sv);
201}
202
203/* Explosives and implosives. */
204
205#if 'I' == 73 && 'J' == 74
206/* On an ASCII/ISO kind of system */
207#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208#else
209/*
210 Some other sort of character set - use memchr() so we don't match
211 the null byte.
212 */
213#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214#endif
215
66c611c5 216/* type modifiers */
62f95557 217#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
218#define TYPE_IS_BIG_ENDIAN 0x200
219#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 220#define TYPE_IS_PACK 0x800
1109a392 221#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 222#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
223#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
224
7212898e 225#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 227#else
28be1210 228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
229#endif
230
231#ifndef PERL_PACK_CAN_BYTEORDER
232/* Put "can't" first because it is shorter */
233# define TYPE_ENDIANNESS(t) 0
234# define TYPE_NO_ENDIANNESS(t) (t)
235
236# define ENDIANNESS_ALLOWED_TYPES ""
237
238# define DO_BO_UNPACK(var, type)
239# define DO_BO_PACK(var, type)
07409e01
NC
240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
242# define DO_BO_UNPACK_N(var, type)
243# define DO_BO_PACK_N(var, type)
244# define DO_BO_UNPACK_P(var)
245# define DO_BO_PACK_P(var)
45f723e1
NC
246# define DO_BO_UNPACK_PC(var)
247# define DO_BO_PACK_PC(var)
66c611c5 248
f337b084 249#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
250
251# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
253
254# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
255
256# define DO_BO_UNPACK(var, type) \
1109a392 257 STMT_START { \
66c611c5 258 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 default: break; \
262 } \
263 } STMT_END
264
7212898e 265# define DO_BO_PACK(var, type) \
1109a392 266 STMT_START { \
66c611c5 267 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 default: break; \
271 } \
272 } STMT_END
273
07409e01 274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 275 STMT_START { \
66c611c5 276 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 277 case TYPE_IS_BIG_ENDIAN: \
07409e01 278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
282 break; \
283 default: \
284 break; \
285 } \
286 } STMT_END
287
07409e01 288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 289 STMT_START { \
66c611c5 290 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 291 case TYPE_IS_BIG_ENDIAN: \
07409e01 292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
303 STMT_START { \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 default: \
314 break; \
315 } \
316 } STMT_END
317
7212898e 318# if PTRSIZE == INTSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 323# elif PTRSIZE == LONGSIZE
fa58a56f
S
324# if LONGSIZE < IVSIZE && IVSIZE == 8
325# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
326# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
327# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
328# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
329# else
330# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
331# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
332# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
333# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
334# endif
2b00a750
JD
335# elif PTRSIZE == IVSIZE
336# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
337# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
338# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
339# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
340# else
341# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
342# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
343# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
344# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 345# endif
1109a392 346
7212898e 347# if defined(my_htolen) && defined(my_letohn) && \
1109a392 348 defined(my_htoben) && defined(my_betohn)
7212898e 349# define DO_BO_UNPACK_N(var, type) \
1109a392 350 STMT_START { \
66c611c5 351 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
352 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
353 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
354 default: break; \
355 } \
356 } STMT_END
357
7212898e 358# define DO_BO_PACK_N(var, type) \
1109a392 359 STMT_START { \
66c611c5 360 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
361 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
362 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
363 default: break; \
364 } \
365 } STMT_END
7212898e
NC
366# else
367# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
368# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
369# endif
370
f337b084 371#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 372
78d46eaa 373#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 374#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
375#define PACK_SIZE_MASK 0x3F
376
78d46eaa
NC
377/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
378 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
379
380#if TYPE_IS_SHRIEKING != 0x100
381 ++++shriek offset should be 256
382#endif
383
384typedef U8 packprops_t;
78d46eaa
NC
385#if 'J'-'I' == 1
386/* ASCII */
f43a9a31 387STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
388 /* normal */
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0,
1651fc44 394 /* C */ sizeof(unsigned char),
80a13697 395#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 396 /* D */ LONG_DOUBLESIZE,
80a13697 397#else
a7a3cfaa 398 0,
80a13697 399#endif
a7a3cfaa
TH
400 0,
401 /* F */ NVSIZE,
402 0, 0,
403 /* I */ sizeof(unsigned int),
404 /* J */ UVSIZE,
405 0,
406 /* L */ SIZE32,
407 0,
408 /* N */ SIZE32,
409 0, 0,
80a13697 410#if defined(HAS_QUAD)
a7a3cfaa 411 /* Q */ sizeof(Uquad_t),
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* S */ SIZE16,
417 0,
418 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
419 /* V */ SIZE32,
420 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
421 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
422 /* c */ sizeof(char),
423 /* d */ sizeof(double),
424 0,
425 /* f */ sizeof(float),
426 0, 0,
427 /* i */ sizeof(int),
428 /* j */ IVSIZE,
429 0,
430 /* l */ SIZE32,
431 0,
432 /* n */ SIZE16,
433 0,
434 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 435#if defined(HAS_QUAD)
a7a3cfaa 436 /* q */ sizeof(Quad_t),
80a13697 437#else
a7a3cfaa 438 0,
80a13697 439#endif
a7a3cfaa
TH
440 0,
441 /* s */ SIZE16,
442 0, 0,
443 /* v */ SIZE16,
444 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0,
454 /* shrieking */
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 /* I */ sizeof(unsigned int),
461 0, 0,
462 /* L */ sizeof(unsigned long),
463 0,
7212898e 464#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 465 /* N */ SIZE32,
7212898e 466#else
a7a3cfaa 467 0,
7212898e 468#endif
a7a3cfaa
TH
469 0, 0, 0, 0,
470 /* S */ sizeof(unsigned short),
471 0, 0,
7212898e 472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 473 /* V */ SIZE32,
7212898e 474#else
a7a3cfaa 475 0,
7212898e 476#endif
a7a3cfaa
TH
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0,
479 /* i */ sizeof(int),
480 0, 0,
481 /* l */ sizeof(long),
482 0,
7212898e 483#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 484 /* n */ SIZE16,
7212898e 485#else
a7a3cfaa 486 0,
7212898e 487#endif
a7a3cfaa
TH
488 0, 0, 0, 0,
489 /* s */ sizeof(short),
490 0, 0,
7212898e 491#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 492 /* v */ SIZE16,
7212898e 493#else
a7a3cfaa 494 0,
7212898e 495#endif
a7a3cfaa
TH
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
505};
506#else
507/* EBCDIC (or bust) */
f43a9a31 508STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
509 /* normal */
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0,
519 /* c */ sizeof(char),
520 /* d */ sizeof(double),
521 0,
522 /* f */ sizeof(float),
523 0, 0,
524 /* i */ sizeof(int),
525 0, 0, 0, 0, 0, 0, 0,
526 /* j */ IVSIZE,
527 0,
528 /* l */ SIZE32,
529 0,
530 /* n */ SIZE16,
531 0,
532 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 533#if defined(HAS_QUAD)
a7a3cfaa 534 /* q */ sizeof(Quad_t),
80a13697 535#else
a7a3cfaa 536 0,
80a13697 537#endif
a7a3cfaa
TH
538 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 /* s */ SIZE16,
540 0, 0,
541 /* v */ SIZE16,
542 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 545 /* C */ sizeof(unsigned char),
80a13697 546#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 547 /* D */ LONG_DOUBLESIZE,
80a13697 548#else
a7a3cfaa 549 0,
80a13697 550#endif
a7a3cfaa
TH
551 0,
552 /* F */ NVSIZE,
553 0, 0,
554 /* I */ sizeof(unsigned int),
555 0, 0, 0, 0, 0, 0, 0,
556 /* J */ UVSIZE,
557 0,
558 /* L */ SIZE32,
559 0,
560 /* N */ SIZE32,
561 0, 0,
80a13697 562#if defined(HAS_QUAD)
a7a3cfaa 563 /* Q */ sizeof(Uquad_t),
80a13697 564#else
a7a3cfaa 565 0,
80a13697 566#endif
a7a3cfaa
TH
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* S */ SIZE16,
569 0,
570 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
571 /* V */ SIZE32,
572 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* shrieking */
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* i */ sizeof(int),
586 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 /* l */ sizeof(long),
588 0,
7212898e 589#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 590 /* n */ SIZE16,
7212898e 591#else
a7a3cfaa 592 0,
7212898e 593#endif
a7a3cfaa
TH
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* s */ sizeof(short),
596 0, 0,
7212898e 597#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 598 /* v */ SIZE16,
7212898e 599#else
a7a3cfaa 600 0,
7212898e 601#endif
a7a3cfaa
TH
602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0,
605 /* I */ sizeof(unsigned int),
606 0, 0, 0, 0, 0, 0, 0, 0, 0,
607 /* L */ sizeof(unsigned long),
608 0,
7212898e 609#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 610 /* N */ SIZE32,
7212898e 611#else
a7a3cfaa 612 0,
7212898e 613#endif
a7a3cfaa
TH
614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
615 /* S */ sizeof(unsigned short),
616 0, 0,
7212898e 617#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 618 /* V */ SIZE32,
7212898e 619#else
a7a3cfaa 620 0,
7212898e 621#endif
a7a3cfaa
TH
622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
624};
625#endif
626
08ca2aa3 627STATIC U8
f7fe979e 628uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 629{
08ca2aa3 630 STRLEN retlen;
0bcc34c2 631 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 632 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
633 /* We try to process malformed UTF-8 as much as possible (preferrably with
634 warnings), but these two mean we make no progress in the string and
635 might enter an infinite loop */
636 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
637 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
638 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 639 if (val >= 0x100) {
a2a5de95
NC
640 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
641 "Character in '%c' format wrapped in unpack",
642 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
643 val &= 0xff;
644 }
645 *s += retlen;
fe2774ed 646 return (U8)val;
08ca2aa3
TH
647}
648
f337b084
TH
649#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
650 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
651 *(U8 *)(s)++)
652
653STATIC bool
f7fe979e 654uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
655{
656 UV val;
657 STRLEN retlen;
f7fe979e 658 const char *from = *s;
08ca2aa3 659 int bad = 0;
f7fe979e 660 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
661 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
662 for (;buf_len > 0; buf_len--) {
663 if (from >= end) return FALSE;
f337b084 664 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
665 if (retlen == (STRLEN) -1 || retlen == 0) {
666 from += UTF8SKIP(from);
667 bad |= 1;
668 } else from += retlen;
669 if (val >= 0x100) {
670 bad |= 2;
671 val &= 0xff;
672 }
fe2774ed 673 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
674 }
675 /* We have enough characters for the buffer. Did we have problems ? */
676 if (bad) {
677 if (bad & 1) {
678 /* Rewalk the string fragment while warning */
f7fe979e 679 const char *ptr;
9e27e96a 680 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
681 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
682 if (ptr >= end) break;
f337b084 683 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
684 }
685 if (from > end) from = end;
686 }
a2a5de95
NC
687 if ((bad & 2))
688 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 689 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
690 "Character(s) in '%c' format wrapped in %s",
691 (int) TYPE_NO_MODIFIERS(datumtype),
692 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
693 }
694 *s = from;
695 return TRUE;
696}
697
698STATIC bool
f7fe979e 699next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 700{
97aff369 701 dVAR;
08ca2aa3 702 STRLEN retlen;
0bcc34c2 703 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
704 if (val >= 0x100 || !ISUUCHAR(val) ||
705 retlen == (STRLEN) -1 || retlen == 0) {
706 *out = 0;
707 return FALSE;
708 }
709 *out = PL_uudmap[val] & 077;
f337b084 710 *s += retlen;
08ca2aa3
TH
711 return TRUE;
712}
78d46eaa 713
64844641 714STATIC char *
14333449 715S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 716 const U8 * const end = start + len;
64844641 717
7918f24d
NC
718 PERL_ARGS_ASSERT_BYTES_TO_UNI;
719
f337b084 720 while (start < end) {
48fa4626
TS
721 const UV uv = NATIVE_TO_ASCII(*start);
722 if (UNI_IS_INVARIANT(uv))
723 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
724 else {
725 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
726 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
727 }
728 start++;
729 }
64844641 730 return dest;
f337b084
TH
731}
732
230e1fce
NC
733#define PUSH_BYTES(utf8, cur, buf, len) \
734STMT_START { \
64844641
AL
735 if (utf8) \
736 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
737 else { \
738 Copy(buf, cur, len, char); \
739 (cur) += (len); \
740 } \
f337b084
TH
741} STMT_END
742
743#define GROWING(utf8, cat, start, cur, in_len) \
744STMT_START { \
745 STRLEN glen = (in_len); \
3473cf63 746 if (utf8) glen *= UTF8_EXPAND; \
f337b084 747 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 748 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
749 (cur) = (start) + SvCUR(cat); \
750 } \
751} STMT_END
752
753#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
754STMT_START { \
f7fe979e 755 const STRLEN glen = (in_len); \
f337b084 756 STRLEN gl = glen; \
3473cf63 757 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
758 if ((cur) + gl >= (start) + SvLEN(cat)) { \
759 *cur = '\0'; \
b162af07 760 SvCUR_set((cat), (cur) - (start)); \
0bd48802 761 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
762 (cur) = (start) + SvCUR(cat); \
763 } \
764 PUSH_BYTES(utf8, cur, buf, glen); \
765} STMT_END
766
767#define PUSH_BYTE(utf8, s, byte) \
768STMT_START { \
769 if (utf8) { \
f7fe979e 770 const U8 au8 = (byte); \
64844641 771 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
772 } else *(U8 *)(s)++ = (byte); \
773} STMT_END
774
775/* Only to be used inside a loop (see the break) */
776#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
777STMT_START { \
778 STRLEN retlen; \
779 if (str >= end) break; \
780 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
781 if (retlen == (STRLEN) -1 || retlen == 0) { \
782 *cur = '\0'; \
783 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
784 } \
785 str += retlen; \
786} STMT_END
787
f7fe979e
AL
788static const char *_action( const tempsym_t* symptr )
789{
10edeb5d 790 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
791}
792
206947d2 793/* Returns the sizeof() struct described by pat */
028d1f6d 794STATIC I32
f337b084 795S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 796{
f337b084 797 I32 total = 0;
206947d2 798
7918f24d
NC
799 PERL_ARGS_ASSERT_MEASURE_STRUCT;
800
49704364 801 while (next_symbol(symptr)) {
f337b084 802 I32 len;
f7fe979e 803 int size;
f337b084
TH
804
805 switch (symptr->howlen) {
fc241834 806 case e_star:
49704364 807 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 808 _action( symptr ) );
49704364 809 break;
f337b084
TH
810 default:
811 /* e_no_len and e_number */
812 len = symptr->length;
813 break;
49704364
WL
814 }
815
a7a3cfaa 816 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 817 if (!size) {
f7fe979e 818 int star;
80a13697
NC
819 /* endianness doesn't influence the size of a type */
820 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
821 default:
822 Perl_croak(aTHX_ "Invalid type '%c' in %s",
823 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 824 _action( symptr ) );
28be1210
TH
825#ifdef PERL_PACK_CAN_SHRIEKSIGN
826 case '.' | TYPE_IS_SHRIEKING:
827 case '@' | TYPE_IS_SHRIEKING:
828#endif
80a13697 829 case '@':
28be1210 830 case '.':
80a13697
NC
831 case '/':
832 case 'U': /* XXXX Is it correct? */
833 case 'w':
834 case 'u':
835 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 836 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 837 _action( symptr ) );
80a13697
NC
838 case '%':
839 size = 0;
840 break;
841 case '(':
fc241834
RGS
842 {
843 tempsym_t savsym = *symptr;
844 symptr->patptr = savsym.grpbeg;
845 symptr->patend = savsym.grpend;
846 /* XXXX Theoretically, we need to measure many times at
847 different positions, since the subexpression may contain
848 alignment commands, but be not of aligned length.
849 Need to detect this and croak(). */
850 size = measure_struct(symptr);
851 *symptr = savsym;
852 break;
853 }
80a13697
NC
854 case 'X' | TYPE_IS_SHRIEKING:
855 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
856 */
857 if (!len) /* Avoid division by 0 */
858 len = 1;
859 len = total % len; /* Assumed: the start is aligned. */
860 /* FALL THROUGH */
861 case 'X':
862 size = -1;
863 if (total < len)
f7fe979e 864 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
865 break;
866 case 'x' | TYPE_IS_SHRIEKING:
867 if (!len) /* Avoid division by 0 */
868 len = 1;
869 star = total % len; /* Assumed: the start is aligned. */
870 if (star) /* Other portable ways? */
871 len = len - star;
872 else
873 len = 0;
874 /* FALL THROUGH */
875 case 'x':
876 case 'A':
877 case 'Z':
878 case 'a':
80a13697
NC
879 size = 1;
880 break;
881 case 'B':
882 case 'b':
883 len = (len + 7)/8;
884 size = 1;
885 break;
886 case 'H':
887 case 'h':
888 len = (len + 1)/2;
889 size = 1;
890 break;
78d46eaa 891
80a13697
NC
892 case 'P':
893 len = 1;
894 size = sizeof(char*);
78d46eaa
NC
895 break;
896 }
206947d2
IZ
897 }
898 total += len * size;
899 }
900 return total;
901}
902
49704364
WL
903
904/* locate matching closing parenthesis or bracket
905 * returns char pointer to char after match, or NULL
906 */
f7fe979e
AL
907STATIC const char *
908S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 909{
7918f24d
NC
910 PERL_ARGS_ASSERT_GROUP_END;
911
49704364 912 while (patptr < patend) {
f7fe979e 913 const char c = *patptr++;
49704364
WL
914
915 if (isSPACE(c))
916 continue;
917 else if (c == ender)
918 return patptr-1;
919 else if (c == '#') {
920 while (patptr < patend && *patptr != '\n')
921 patptr++;
922 continue;
923 } else if (c == '(')
924 patptr = group_end(patptr, patend, ')') + 1;
925 else if (c == '[')
926 patptr = group_end(patptr, patend, ']') + 1;
18529408 927 }
49704364
WL
928 Perl_croak(aTHX_ "No group ending character '%c' found in template",
929 ender);
930 return 0;
18529408
IZ
931}
932
49704364
WL
933
934/* Convert unsigned decimal number to binary.
935 * Expects a pointer to the first digit and address of length variable
936 * Advances char pointer to 1st non-digit char and returns number
fc241834 937 */
f7fe979e
AL
938STATIC const char *
939S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
940{
941 I32 len = *patptr++ - '0';
7918f24d
NC
942
943 PERL_ARGS_ASSERT_GET_NUM;
944
49704364
WL
945 while (isDIGIT(*patptr)) {
946 if (len >= 0x7FFFFFFF/10)
947 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
948 len = (len * 10) + (*patptr++ - '0');
949 }
950 *lenptr = len;
951 return patptr;
952}
953
954/* The marvellous template parsing routine: Using state stored in *symptr,
955 * locates next template code and count
956 */
957STATIC bool
f337b084 958S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 959{
f7fe979e 960 const char* patptr = symptr->patptr;
0bcc34c2 961 const char* const patend = symptr->patend;
49704364 962
7918f24d
NC
963 PERL_ARGS_ASSERT_NEXT_SYMBOL;
964
49704364
WL
965 symptr->flags &= ~FLAG_SLASH;
966
967 while (patptr < patend) {
968 if (isSPACE(*patptr))
969 patptr++;
970 else if (*patptr == '#') {
971 patptr++;
972 while (patptr < patend && *patptr != '\n')
973 patptr++;
974 if (patptr < patend)
975 patptr++;
976 } else {
fc241834 977 /* We should have found a template code */
49704364 978 I32 code = *patptr++ & 0xFF;
66c611c5 979 U32 inherited_modifiers = 0;
49704364
WL
980
981 if (code == ','){ /* grandfather in commas but with a warning */
982 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
983 symptr->flags |= FLAG_COMMA;
984 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 985 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
986 }
987 continue;
988 }
fc241834 989
49704364 990 /* for '(', skip to ')' */
fc241834 991 if (code == '(') {
49704364
WL
992 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
993 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 994 _action( symptr ) );
49704364
WL
995 symptr->grpbeg = patptr;
996 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
997 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
998 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 999 _action( symptr ) );
49704364
WL
1000 }
1001
66c611c5
MHM
1002 /* look for group modifiers to inherit */
1003 if (TYPE_ENDIANNESS(symptr->flags)) {
1004 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1005 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1006 }
1007
1109a392
MHM
1008 /* look for modifiers */
1009 while (patptr < patend) {
b7787f18
AL
1010 const char *allowed;
1011 I32 modifier;
1109a392
MHM
1012 switch (*patptr) {
1013 case '!':
1014 modifier = TYPE_IS_SHRIEKING;
7212898e 1015 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1016 break;
7212898e 1017#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1018 case '>':
1019 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1020 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1021 break;
1022 case '<':
1023 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1024 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1025 break;
f337b084 1026#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1027 default:
b7787f18
AL
1028 allowed = "";
1029 modifier = 0;
1109a392
MHM
1030 break;
1031 }
66c611c5 1032
1109a392
MHM
1033 if (modifier == 0)
1034 break;
66c611c5 1035
1109a392
MHM
1036 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1037 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1038 allowed, _action( symptr ) );
66c611c5
MHM
1039
1040 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1041 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1042 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1043 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1044 TYPE_ENDIANNESS_MASK)
1045 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1046 *patptr, _action( symptr ) );
66c611c5 1047
a2a5de95
NC
1048 if ((code & modifier)) {
1049 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1050 "Duplicate modifier '%c' after '%c' in %s",
1051 *patptr, (int) TYPE_NO_MODIFIERS(code),
1052 _action( symptr ) );
1109a392 1053 }
66c611c5 1054
1109a392
MHM
1055 code |= modifier;
1056 patptr++;
49704364
WL
1057 }
1058
66c611c5
MHM
1059 /* inherit modifiers */
1060 code |= inherited_modifiers;
1061
fc241834 1062 /* look for count and/or / */
49704364
WL
1063 if (patptr < patend) {
1064 if (isDIGIT(*patptr)) {
1065 patptr = get_num( patptr, &symptr->length );
1066 symptr->howlen = e_number;
1067
1068 } else if (*patptr == '*') {
1069 patptr++;
1070 symptr->howlen = e_star;
1071
1072 } else if (*patptr == '[') {
f7fe979e 1073 const char* lenptr = ++patptr;
49704364
WL
1074 symptr->howlen = e_number;
1075 patptr = group_end( patptr, patend, ']' ) + 1;
1076 /* what kind of [] is it? */
1077 if (isDIGIT(*lenptr)) {
1078 lenptr = get_num( lenptr, &symptr->length );
1079 if( *lenptr != ']' )
1080 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1081 _action( symptr ) );
49704364
WL
1082 } else {
1083 tempsym_t savsym = *symptr;
1084 symptr->patend = patptr-1;
1085 symptr->patptr = lenptr;
1086 savsym.length = measure_struct(symptr);
1087 *symptr = savsym;
1088 }
1089 } else {
1090 symptr->howlen = e_no_len;
1091 symptr->length = 1;
1092 }
1093
1094 /* try to find / */
1095 while (patptr < patend) {
1096 if (isSPACE(*patptr))
1097 patptr++;
1098 else if (*patptr == '#') {
1099 patptr++;
1100 while (patptr < patend && *patptr != '\n')
1101 patptr++;
1102 if (patptr < patend)
1103 patptr++;
1104 } else {
66c611c5 1105 if (*patptr == '/') {
49704364
WL
1106 symptr->flags |= FLAG_SLASH;
1107 patptr++;
66c611c5
MHM
1108 if (patptr < patend &&
1109 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1110 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1111 _action( symptr ) );
49704364
WL
1112 }
1113 break;
1114 }
18529408 1115 }
49704364
WL
1116 } else {
1117 /* at end - no count, no / */
1118 symptr->howlen = e_no_len;
1119 symptr->length = 1;
1120 }
1121
1122 symptr->code = code;
fc241834 1123 symptr->patptr = patptr;
49704364 1124 return TRUE;
18529408 1125 }
49704364 1126 }
fc241834 1127 symptr->patptr = patptr;
49704364 1128 return FALSE;
18529408
IZ
1129}
1130
18529408 1131/*
fc241834 1132 There is no way to cleanly handle the case where we should process the
08ca2aa3 1133 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1134 (e.g. estimates like strend-s as an upper bound for the number of
1135 characters left wouldn't work). So if we foresee the need of this
1136 (pattern starts with U or contains U0), we want to work on the encoded
1137 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1138 themselves if they need to do a lot of unpacks like this on it
1139*/
fc241834 1140STATIC bool
08ca2aa3
TH
1141need_utf8(const char *pat, const char *patend)
1142{
1143 bool first = TRUE;
7918f24d
NC
1144
1145 PERL_ARGS_ASSERT_NEED_UTF8;
1146
08ca2aa3
TH
1147 while (pat < patend) {
1148 if (pat[0] == '#') {
1149 pat++;
f7fe979e 1150 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1151 if (!pat) return FALSE;
1152 } else if (pat[0] == 'U') {
1153 if (first || pat[1] == '0') return TRUE;
1154 } else first = FALSE;
1155 pat++;
1156 }
1157 return FALSE;
1158}
1159
1160STATIC char
1161first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1162 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1163
08ca2aa3
TH
1164 while (pat < patend) {
1165 if (pat[0] != '#') return pat[0];
1166 pat++;
f7fe979e 1167 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1168 if (!pat) return 0;
1169 pat++;
1170 }
1171 return 0;
1172}
1173
1174/*
7accc089
JH
1175=for apidoc unpackstring
1176
608d3aed
WL
1177The engine implementing unpack() Perl function. C<unpackstring> puts the
1178extracted list items on the stack and returns the number of elements.
1179Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1180
1181=cut */
1182
1183I32
f7fe979e 1184Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1185{
f7fe979e 1186 tempsym_t sym;
08ca2aa3 1187
7918f24d
NC
1188 PERL_ARGS_ASSERT_UNPACKSTRING;
1189
f337b084 1190 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1191 else if (need_utf8(pat, patend)) {
1192 /* We probably should try to avoid this in case a scalar context call
1193 wouldn't get to the "U0" */
1194 STRLEN len = strend - s;
230e1fce 1195 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1196 SAVEFREEPV(s);
1197 strend = s + len;
f337b084 1198 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1199 }
1200
f337b084
TH
1201 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1202 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1203
f7fe979e 1204 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1205
1206 return unpack_rec(&sym, s, s, strend, NULL );
1207}
1208
4136a0f7 1209STATIC I32
f7fe979e 1210S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1211{
27da23d5 1212 dVAR; dSP;
18529408 1213 SV *sv;
f7fe979e 1214 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1215 howlen_t howlen;
a6ec74c1 1216 I32 checksum = 0;
92d41999 1217 UV cuv = 0;
a6ec74c1 1218 NV cdouble = 0.0;
f337b084 1219 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1220 bool beyond = FALSE;
21c16052 1221 bool explicit_length;
9e27e96a 1222 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1223 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1224
1225 PERL_ARGS_ASSERT_UNPACK_REC;
1226
28be1210 1227 symptr->strbeg = s - strbeg;
49704364 1228
49704364 1229 while (next_symbol(symptr)) {
a7a3cfaa 1230 packprops_t props;
9e27e96a 1231 I32 len;
f337b084 1232 I32 datumtype = symptr->code;
206947d2 1233 /* do first one only unless in list context
08ca2aa3 1234 / is implemented by unpacking the count, then popping it from the
206947d2 1235 stack, so must check that we're not in the middle of a / */
49704364 1236 if ( unpack_only_one
206947d2 1237 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1238 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1239 break;
49704364 1240
f337b084 1241 switch (howlen = symptr->howlen) {
fc241834
RGS
1242 case e_star:
1243 len = strend - strbeg; /* long enough */
49704364 1244 break;
f337b084
TH
1245 default:
1246 /* e_no_len and e_number */
1247 len = symptr->length;
1248 break;
49704364 1249 }
18529408 1250
21c16052 1251 explicit_length = TRUE;
a6ec74c1 1252 redo_switch:
49704364 1253 beyond = s >= strend;
a7a3cfaa
TH
1254
1255 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1256 if (props) {
1257 /* props nonzero means we can process this letter. */
9e27e96a
AL
1258 const long size = props & PACK_SIZE_MASK;
1259 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1260 if (len > howmany)
1261 len = howmany;
1262
1263 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1264 if (len && unpack_only_one) len = 1;
1265 EXTEND(SP, len);
1266 EXTEND_MORTAL(len);
78d46eaa
NC
1267 }
1268 }
a7a3cfaa 1269
1109a392 1270 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1271 default:
1109a392 1272 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1273
a6ec74c1 1274 case '%':
49704364 1275 if (howlen == e_no_len)
18529408 1276 len = 16; /* len is not specified */
a6ec74c1 1277 checksum = len;
92d41999 1278 cuv = 0;
a6ec74c1 1279 cdouble = 0;
18529408 1280 continue;
a6ec74c1 1281 break;
18529408
IZ
1282 case '(':
1283 {
49704364 1284 tempsym_t savsym = *symptr;
9e27e96a 1285 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1286 symptr->flags |= group_modifiers;
49704364 1287 symptr->patend = savsym.grpend;
28be1210 1288 symptr->previous = &savsym;
49704364 1289 symptr->level++;
18529408 1290 PUTBACK;
c6f750d1 1291 if (len && unpack_only_one) len = 1;
18529408 1292 while (len--) {
49704364 1293 symptr->patptr = savsym.grpbeg;
f337b084
TH
1294 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1295 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1296 unpack_rec(symptr, s, strbeg, strend, &s);
1297 if (s == strend && savsym.howlen == e_star)
49704364 1298 break; /* No way to continue */
18529408
IZ
1299 }
1300 SPAGAIN;
28be1210 1301 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1302 *symptr = savsym;
18529408
IZ
1303 break;
1304 }
28be1210
TH
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 case '.' | TYPE_IS_SHRIEKING:
1307#endif
1308 case '.': {
9e27e96a 1309 const char *from;
28be1210
TH
1310 SV *sv;
1311#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1312 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1313#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1314 const bool u8 = utf8;
28be1210
TH
1315#endif
1316 if (howlen == e_star) from = strbeg;
1317 else if (len <= 0) from = s;
1318 else {
1319 tempsym_t *group = symptr;
1320
1321 while (--len && group) group = group->previous;
1322 from = group ? strbeg + group->strbeg : strbeg;
1323 }
1324 sv = from <= s ?
00646304
CB
1325 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1326 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1327 mXPUSHs(sv);
28be1210
TH
1328 break;
1329 }
1330#ifdef PERL_PACK_CAN_SHRIEKSIGN
1331 case '@' | TYPE_IS_SHRIEKING:
1332#endif
a6ec74c1 1333 case '@':
28be1210
TH
1334 s = strbeg + symptr->strbeg;
1335#ifdef PERL_PACK_CAN_SHRIEKSIGN
1336 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1337#else /* PERL_PACK_CAN_SHRIEKSIGN */
1338 if (utf8)
1339#endif
1340 {
08ca2aa3
TH
1341 while (len > 0) {
1342 if (s >= strend)
1343 Perl_croak(aTHX_ "'@' outside of string in unpack");
1344 s += UTF8SKIP(s);
1345 len--;
1346 }
1347 if (s > strend)
1348 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1349 } else {
28be1210 1350 if (strend-s < len)
fc241834 1351 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1352 s += len;
08ca2aa3 1353 }
a6ec74c1 1354 break;
62f95557
IZ
1355 case 'X' | TYPE_IS_SHRIEKING:
1356 if (!len) /* Avoid division by 0 */
1357 len = 1;
08ca2aa3 1358 if (utf8) {
f7fe979e 1359 const char *hop, *last;
f337b084
TH
1360 I32 l = len;
1361 hop = last = strbeg;
1362 while (hop < s) {
1363 hop += UTF8SKIP(hop);
1364 if (--l == 0) {
08ca2aa3 1365 last = hop;
f337b084
TH
1366 l = len;
1367 }
fc241834 1368 }
f337b084
TH
1369 if (last > s)
1370 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1371 s = last;
1372 break;
f337b084
TH
1373 }
1374 len = (s - strbeg) % len;
62f95557 1375 /* FALL THROUGH */
a6ec74c1 1376 case 'X':
08ca2aa3
TH
1377 if (utf8) {
1378 while (len > 0) {
1379 if (s <= strbeg)
1380 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1381 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1382 if (s <= strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack");
1384 }
1385 len--;
1386 }
1387 } else {
fc241834
RGS
1388 if (len > s - strbeg)
1389 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1390 s -= len;
08ca2aa3 1391 }
a6ec74c1 1392 break;
9e27e96a
AL
1393 case 'x' | TYPE_IS_SHRIEKING: {
1394 I32 ai32;
62f95557
IZ
1395 if (!len) /* Avoid division by 0 */
1396 len = 1;
230e1fce
NC
1397 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1398 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1399 if (ai32 == 0) break;
1400 len -= ai32;
9e27e96a 1401 }
62f95557 1402 /* FALL THROUGH */
a6ec74c1 1403 case 'x':
08ca2aa3
TH
1404 if (utf8) {
1405 while (len>0) {
1406 if (s >= strend)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1408 s += UTF8SKIP(s);
1409 len--;
1410 }
1411 } else {
fc241834
RGS
1412 if (len > strend - s)
1413 Perl_croak(aTHX_ "'x' outside of string in unpack");
1414 s += len;
f337b084 1415 }
a6ec74c1
JH
1416 break;
1417 case '/':
49704364
WL
1418 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1419 break;
a6ec74c1
JH
1420 case 'A':
1421 case 'Z':
1422 case 'a':
08ca2aa3
TH
1423 if (checksum) {
1424 /* Preliminary length estimate is assumed done in 'W' */
1425 if (len > strend - s) len = strend - s;
1426 goto W_checksum;
1427 }
1428 if (utf8) {
1429 I32 l;
f7fe979e 1430 const char *hop;
08ca2aa3
TH
1431 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1432 if (hop >= strend) {
1433 if (hop > strend)
1434 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1435 break;
fc241834 1436 }
a6ec74c1 1437 }
08ca2aa3
TH
1438 if (hop > strend)
1439 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1440 len = hop - s;
1441 } else if (len > strend - s)
1442 len = strend - s;
1443
1444 if (datumtype == 'Z') {
1445 /* 'Z' strips stuff after first null */
f7fe979e 1446 const char *ptr, *end;
f337b084
TH
1447 end = s + len;
1448 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1449 sv = newSVpvn(s, ptr-s);
1450 if (howlen == e_star) /* exact for 'Z*' */
1451 len = ptr-s + (ptr != strend ? 1 : 0);
1452 } else if (datumtype == 'A') {
1453 /* 'A' strips both nulls and spaces */
f7fe979e 1454 const char *ptr;
18bdf90a
TH
1455 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1456 for (ptr = s+len-1; ptr >= s; ptr--)
1457 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1458 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1459 if (ptr >= s) ptr += UTF8SKIP(ptr);
1460 else ptr++;
28be1210 1461 if (ptr > s+len)
18bdf90a
TH
1462 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1463 } else {
1464 for (ptr = s+len-1; ptr >= s; ptr--)
1465 if (*ptr != 0 && !isSPACE(*ptr)) break;
1466 ptr++;
1467 }
08ca2aa3
TH
1468 sv = newSVpvn(s, ptr-s);
1469 } else sv = newSVpvn(s, len);
1470
1471 if (utf8) {
1472 SvUTF8_on(sv);
1473 /* Undo any upgrade done due to need_utf8() */
f337b084 1474 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1475 sv_utf8_downgrade(sv, 0);
a6ec74c1 1476 }
6e449a3a 1477 mXPUSHs(sv);
08ca2aa3 1478 s += len;
a6ec74c1
JH
1479 break;
1480 case 'B':
08ca2aa3
TH
1481 case 'b': {
1482 char *str;
49704364 1483 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1484 len = (strend - s) * 8;
1485 if (checksum) {
f337b084 1486 if (utf8)
08ca2aa3 1487 while (len >= 8 && s < strend) {
f337b084 1488 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1489 len -= 8;
1490 }
f337b084 1491 else
fc241834 1492 while (len >= 8) {
08ca2aa3 1493 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1494 len -= 8;
1495 }
08ca2aa3
TH
1496 if (len && s < strend) {
1497 U8 bits;
f337b084
TH
1498 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1499 if (datumtype == 'b')
a6ec74c1 1500 while (len-- > 0) {
92d41999 1501 if (bits & 1) cuv++;
a6ec74c1
JH
1502 bits >>= 1;
1503 }
f337b084 1504 else
a6ec74c1 1505 while (len-- > 0) {
08ca2aa3 1506 if (bits & 0x80) cuv++;
a6ec74c1
JH
1507 bits <<= 1;
1508 }
fc241834 1509 }
a6ec74c1
JH
1510 break;
1511 }
08ca2aa3 1512
561b68a9 1513 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1514 SvPOK_on(sv);
1515 str = SvPVX(sv);
1516 if (datumtype == 'b') {
f337b084 1517 U8 bits = 0;
f7fe979e 1518 const I32 ai32 = len;
08ca2aa3
TH
1519 for (len = 0; len < ai32; len++) {
1520 if (len & 7) bits >>= 1;
1521 else if (utf8) {
1522 if (s >= strend) break;
f337b084 1523 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1524 } else bits = *(U8 *) s++;
1525 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1526 }
08ca2aa3 1527 } else {
f337b084 1528 U8 bits = 0;
f7fe979e 1529 const I32 ai32 = len;
08ca2aa3
TH
1530 for (len = 0; len < ai32; len++) {
1531 if (len & 7) bits <<= 1;
1532 else if (utf8) {
1533 if (s >= strend) break;
f337b084 1534 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1535 } else bits = *(U8 *) s++;
1536 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1537 }
1538 }
1539 *str = '\0';
aa07b2f6 1540 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1541 XPUSHs(sv);
a6ec74c1 1542 break;
08ca2aa3 1543 }
a6ec74c1 1544 case 'H':
08ca2aa3
TH
1545 case 'h': {
1546 char *str;
fc241834 1547 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1548 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1549 len = (strend - s) * 2;
561b68a9 1550 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1551 SvPOK_on(sv);
1552 str = SvPVX(sv);
1553 if (datumtype == 'h') {
f337b084 1554 U8 bits = 0;
9e27e96a 1555 I32 ai32 = len;
fc241834
RGS
1556 for (len = 0; len < ai32; len++) {
1557 if (len & 1) bits >>= 4;
1558 else if (utf8) {
1559 if (s >= strend) break;
f337b084 1560 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1561 } else bits = * (U8 *) s++;
a6ec74c1
JH
1562 *str++ = PL_hexdigit[bits & 15];
1563 }
08ca2aa3 1564 } else {
f337b084 1565 U8 bits = 0;
f7fe979e 1566 const I32 ai32 = len;
08ca2aa3
TH
1567 for (len = 0; len < ai32; len++) {
1568 if (len & 1) bits <<= 4;
1569 else if (utf8) {
1570 if (s >= strend) break;
f337b084 1571 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1572 } else bits = *(U8 *) s++;
a6ec74c1
JH
1573 *str++ = PL_hexdigit[(bits >> 4) & 15];
1574 }
1575 }
1576 *str = '\0';
aa07b2f6 1577 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1578 XPUSHs(sv);
a6ec74c1 1579 break;
08ca2aa3 1580 }
1651fc44
ML
1581 case 'C':
1582 if (len == 0) {
1583 if (explicit_length)
1584 /* Switch to "character" mode */
1585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1586 break;
1587 }
1588 /* FALL THROUGH */
a6ec74c1 1589 case 'c':
1651fc44
ML
1590 while (len-- > 0 && s < strend) {
1591 int aint;
1592 if (utf8)
1593 {
1594 STRLEN retlen;
1595 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1596 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1597 if (retlen == (STRLEN) -1 || retlen == 0)
1598 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1599 s += retlen;
1600 }
1601 else
1602 aint = *(U8 *)(s)++;
1603 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1604 aint -= 256;
08ca2aa3 1605 if (!checksum)
6e449a3a 1606 mPUSHi(aint);
73cb7263
NC
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV)aint;
1609 else
1610 cuv += aint;
a6ec74c1
JH
1611 }
1612 break;
08ca2aa3
TH
1613 case 'W':
1614 W_checksum:
1651fc44 1615 if (utf8) {
08ca2aa3 1616 while (len-- > 0 && s < strend) {
08ca2aa3 1617 STRLEN retlen;
f7fe979e 1618 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1619 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1620 if (retlen == (STRLEN) -1 || retlen == 0)
1621 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1622 s += retlen;
1623 if (!checksum)
6e449a3a 1624 mPUSHu(val);
08ca2aa3
TH
1625 else if (checksum > bits_in_uv)
1626 cdouble += (NV) val;
d6d3e8bd 1627 else
08ca2aa3 1628 cuv += val;
fc241834 1629 }
08ca2aa3 1630 } else if (!checksum)
a6ec74c1 1631 while (len-- > 0) {
f7fe979e 1632 const U8 ch = *(U8 *) s++;
6e449a3a 1633 mPUSHu(ch);
a6ec74c1 1634 }
08ca2aa3
TH
1635 else if (checksum > bits_in_uv)
1636 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1637 else
1638 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1639 break;
1640 case 'U':
35bcd338 1641 if (len == 0) {
08ca2aa3
TH
1642 if (explicit_length) {
1643 /* Switch to "bytes in UTF-8" mode */
f337b084 1644 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1645 else
1646 /* Should be impossible due to the need_utf8() test */
1647 Perl_croak(aTHX_ "U0 mode on a byte string");
1648 }
35bcd338
JH
1649 break;
1650 }
08ca2aa3 1651 if (len > strend - s) len = strend - s;
fc241834 1652 if (!checksum) {
08ca2aa3
TH
1653 if (len && unpack_only_one) len = 1;
1654 EXTEND(SP, len);
1655 EXTEND_MORTAL(len);
fc241834 1656 }
08ca2aa3
TH
1657 while (len-- > 0 && s < strend) {
1658 STRLEN retlen;
1659 UV auv;
1660 if (utf8) {
1661 U8 result[UTF8_MAXLEN];
f7fe979e 1662 const char *ptr = s;
08ca2aa3 1663 STRLEN len;
08ca2aa3
TH
1664 /* Bug: warns about bad utf8 even if we are short on bytes
1665 and will break out of the loop */
230e1fce
NC
1666 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1667 'U'))
08ca2aa3
TH
1668 break;
1669 len = UTF8SKIP(result);
fc241834 1670 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1671 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1672 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1673 s = ptr;
1674 } else {
1675 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1676 if (retlen == (STRLEN) -1 || retlen == 0)
1677 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1678 s += retlen;
1679 }
1680 if (!checksum)
6e449a3a 1681 mPUSHu(auv);
73cb7263 1682 else if (checksum > bits_in_uv)
08ca2aa3 1683 cdouble += (NV) auv;
73cb7263 1684 else
08ca2aa3 1685 cuv += auv;
a6ec74c1
JH
1686 }
1687 break;
49704364
WL
1688 case 's' | TYPE_IS_SHRIEKING:
1689#if SHORTSIZE != SIZE16
73cb7263 1690 while (len-- > 0) {
08ca2aa3 1691 short ashort;
f337b084
TH
1692 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1693 DO_BO_UNPACK(ashort, s);
08ca2aa3 1694 if (!checksum)
6e449a3a 1695 mPUSHi(ashort);
73cb7263
NC
1696 else if (checksum > bits_in_uv)
1697 cdouble += (NV)ashort;
1698 else
1699 cuv += ashort;
49704364
WL
1700 }
1701 break;
1702#else
1703 /* Fallthrough! */
a6ec74c1 1704#endif
49704364 1705 case 's':
73cb7263 1706 while (len-- > 0) {
08ca2aa3
TH
1707 I16 ai16;
1708
1709#if U16SIZE > SIZE16
1710 ai16 = 0;
1711#endif
f337b084 1712 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1713 DO_BO_UNPACK(ai16, 16);
1109a392 1714#if U16SIZE > SIZE16
73cb7263
NC
1715 if (ai16 > 32767)
1716 ai16 -= 65536;
a6ec74c1 1717#endif
08ca2aa3 1718 if (!checksum)
6e449a3a 1719 mPUSHi(ai16);
73cb7263
NC
1720 else if (checksum > bits_in_uv)
1721 cdouble += (NV)ai16;
1722 else
1723 cuv += ai16;
a6ec74c1
JH
1724 }
1725 break;
49704364
WL
1726 case 'S' | TYPE_IS_SHRIEKING:
1727#if SHORTSIZE != SIZE16
73cb7263 1728 while (len-- > 0) {
08ca2aa3 1729 unsigned short aushort;
f337b084
TH
1730 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1731 DO_BO_UNPACK(aushort, s);
08ca2aa3 1732 if (!checksum)
6e449a3a 1733 mPUSHu(aushort);
73cb7263
NC
1734 else if (checksum > bits_in_uv)
1735 cdouble += (NV)aushort;
1736 else
1737 cuv += aushort;
49704364
WL
1738 }
1739 break;
1740#else
1741 /* Fallhrough! */
1742#endif
a6ec74c1
JH
1743 case 'v':
1744 case 'n':
1745 case 'S':
73cb7263 1746 while (len-- > 0) {
08ca2aa3
TH
1747 U16 au16;
1748#if U16SIZE > SIZE16
1749 au16 = 0;
1750#endif
f337b084 1751 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1752 DO_BO_UNPACK(au16, 16);
a6ec74c1 1753#ifdef HAS_NTOHS
73cb7263
NC
1754 if (datumtype == 'n')
1755 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1756#endif
1757#ifdef HAS_VTOHS
73cb7263
NC
1758 if (datumtype == 'v')
1759 au16 = vtohs(au16);
a6ec74c1 1760#endif
08ca2aa3 1761 if (!checksum)
6e449a3a 1762 mPUSHu(au16);
73cb7263 1763 else if (checksum > bits_in_uv)
f337b084 1764 cdouble += (NV) au16;
73cb7263
NC
1765 else
1766 cuv += au16;
a6ec74c1
JH
1767 }
1768 break;
7212898e 1769#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1770 case 'v' | TYPE_IS_SHRIEKING:
1771 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1772 while (len-- > 0) {
08ca2aa3
TH
1773 I16 ai16;
1774# if U16SIZE > SIZE16
1775 ai16 = 0;
1776# endif
f337b084 1777 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1778# ifdef HAS_NTOHS
73cb7263 1779 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1780 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1781# endif /* HAS_NTOHS */
1782# ifdef HAS_VTOHS
73cb7263 1783 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1784 ai16 = (I16) vtohs((U16) ai16);
1785# endif /* HAS_VTOHS */
1786 if (!checksum)
6e449a3a 1787 mPUSHi(ai16);
73cb7263 1788 else if (checksum > bits_in_uv)
08ca2aa3 1789 cdouble += (NV) ai16;
73cb7263
NC
1790 else
1791 cuv += ai16;
068bd2e7
MHM
1792 }
1793 break;
08ca2aa3 1794#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1795 case 'i':
49704364 1796 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1797 while (len-- > 0) {
08ca2aa3 1798 int aint;
f337b084
TH
1799 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1800 DO_BO_UNPACK(aint, i);
08ca2aa3 1801 if (!checksum)
6e449a3a 1802 mPUSHi(aint);
73cb7263
NC
1803 else if (checksum > bits_in_uv)
1804 cdouble += (NV)aint;
1805 else
1806 cuv += aint;
a6ec74c1
JH
1807 }
1808 break;
1809 case 'I':
49704364 1810 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1811 while (len-- > 0) {
08ca2aa3 1812 unsigned int auint;
f337b084
TH
1813 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1814 DO_BO_UNPACK(auint, i);
08ca2aa3 1815 if (!checksum)
6e449a3a 1816 mPUSHu(auint);
73cb7263
NC
1817 else if (checksum > bits_in_uv)
1818 cdouble += (NV)auint;
1819 else
1820 cuv += auint;
a6ec74c1
JH
1821 }
1822 break;
92d41999 1823 case 'j':
73cb7263 1824 while (len-- > 0) {
08ca2aa3 1825 IV aiv;
f337b084 1826 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1827#if IVSIZE == INTSIZE
f337b084 1828 DO_BO_UNPACK(aiv, i);
1109a392 1829#elif IVSIZE == LONGSIZE
f337b084 1830 DO_BO_UNPACK(aiv, l);
1109a392 1831#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1832 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1833#else
1834 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1835#endif
08ca2aa3 1836 if (!checksum)
6e449a3a 1837 mPUSHi(aiv);
73cb7263
NC
1838 else if (checksum > bits_in_uv)
1839 cdouble += (NV)aiv;
1840 else
1841 cuv += aiv;
92d41999
JH
1842 }
1843 break;
1844 case 'J':
73cb7263 1845 while (len-- > 0) {
08ca2aa3 1846 UV auv;
f337b084 1847 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1848#if IVSIZE == INTSIZE
f337b084 1849 DO_BO_UNPACK(auv, i);
08ca2aa3 1850#elif IVSIZE == LONGSIZE
f337b084 1851 DO_BO_UNPACK(auv, l);
08ca2aa3 1852#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1853 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1854#else
1855 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1856#endif
08ca2aa3 1857 if (!checksum)
6e449a3a 1858 mPUSHu(auv);
73cb7263
NC
1859 else if (checksum > bits_in_uv)
1860 cdouble += (NV)auv;
1861 else
1862 cuv += auv;
92d41999
JH
1863 }
1864 break;
49704364
WL
1865 case 'l' | TYPE_IS_SHRIEKING:
1866#if LONGSIZE != SIZE32
73cb7263 1867 while (len-- > 0) {
08ca2aa3 1868 long along;
f337b084
TH
1869 SHIFT_VAR(utf8, s, strend, along, datumtype);
1870 DO_BO_UNPACK(along, l);
08ca2aa3 1871 if (!checksum)
6e449a3a 1872 mPUSHi(along);
73cb7263
NC
1873 else if (checksum > bits_in_uv)
1874 cdouble += (NV)along;
1875 else
1876 cuv += along;
49704364
WL
1877 }
1878 break;
1879#else
1880 /* Fallthrough! */
a6ec74c1 1881#endif
49704364 1882 case 'l':
73cb7263 1883 while (len-- > 0) {
08ca2aa3
TH
1884 I32 ai32;
1885#if U32SIZE > SIZE32
1886 ai32 = 0;
1887#endif
f337b084 1888 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1889 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1890#if U32SIZE > SIZE32
08ca2aa3 1891 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1892#endif
08ca2aa3 1893 if (!checksum)
6e449a3a 1894 mPUSHi(ai32);
73cb7263
NC
1895 else if (checksum > bits_in_uv)
1896 cdouble += (NV)ai32;
1897 else
1898 cuv += ai32;
a6ec74c1
JH
1899 }
1900 break;
49704364
WL
1901 case 'L' | TYPE_IS_SHRIEKING:
1902#if LONGSIZE != SIZE32
73cb7263 1903 while (len-- > 0) {
08ca2aa3 1904 unsigned long aulong;
f337b084
TH
1905 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1906 DO_BO_UNPACK(aulong, l);
08ca2aa3 1907 if (!checksum)
6e449a3a 1908 mPUSHu(aulong);
73cb7263
NC
1909 else if (checksum > bits_in_uv)
1910 cdouble += (NV)aulong;
1911 else
1912 cuv += aulong;
49704364
WL
1913 }
1914 break;
1915#else
1916 /* Fall through! */
1917#endif
a6ec74c1
JH
1918 case 'V':
1919 case 'N':
1920 case 'L':
73cb7263 1921 while (len-- > 0) {
08ca2aa3
TH
1922 U32 au32;
1923#if U32SIZE > SIZE32
1924 au32 = 0;
1925#endif
f337b084 1926 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1927 DO_BO_UNPACK(au32, 32);
a6ec74c1 1928#ifdef HAS_NTOHL
73cb7263
NC
1929 if (datumtype == 'N')
1930 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1931#endif
1932#ifdef HAS_VTOHL
73cb7263
NC
1933 if (datumtype == 'V')
1934 au32 = vtohl(au32);
a6ec74c1 1935#endif
08ca2aa3 1936 if (!checksum)
6e449a3a 1937 mPUSHu(au32);
fc241834
RGS
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)au32;
1940 else
1941 cuv += au32;
a6ec74c1
JH
1942 }
1943 break;
7212898e 1944#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1945 case 'V' | TYPE_IS_SHRIEKING:
1946 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1947 while (len-- > 0) {
08ca2aa3
TH
1948 I32 ai32;
1949# if U32SIZE > SIZE32
1950 ai32 = 0;
1951# endif
f337b084 1952 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1953# ifdef HAS_NTOHL
73cb7263
NC
1954 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1955 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1956# endif
1957# ifdef HAS_VTOHL
73cb7263
NC
1958 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1959 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1960# endif
1961 if (!checksum)
6e449a3a 1962 mPUSHi(ai32);
73cb7263
NC
1963 else if (checksum > bits_in_uv)
1964 cdouble += (NV)ai32;
1965 else
1966 cuv += ai32;
068bd2e7
MHM
1967 }
1968 break;
08ca2aa3 1969#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1970 case 'p':
a6ec74c1 1971 while (len-- > 0) {
f7fe979e 1972 const char *aptr;
f337b084 1973 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1974 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1975 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1976 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1977 }
1978 break;
1979 case 'w':
a6ec74c1
JH
1980 {
1981 UV auv = 0;
1982 U32 bytes = 0;
fc241834 1983
08ca2aa3
TH
1984 while (len > 0 && s < strend) {
1985 U8 ch;
f337b084 1986 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1987 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1988 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1989 if (ch < 0x80) {
a6ec74c1 1990 bytes = 0;
6e449a3a 1991 mPUSHu(auv);
a6ec74c1
JH
1992 len--;
1993 auv = 0;
08ca2aa3 1994 continue;
a6ec74c1 1995 }
08ca2aa3 1996 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1997 const char *t;
a6ec74c1 1998
f5992bc4 1999 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2000 while (s < strend) {
f337b084 2001 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2002 sv = mul128(sv, (U8)(ch & 0x7f));
2003 if (!(ch & 0x80)) {
a6ec74c1
JH
2004 bytes = 0;
2005 break;
2006 }
2007 }
10516c54 2008 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2009 while (*t == '0')
2010 t++;
2011 sv_chop(sv, t);
6e449a3a 2012 mPUSHs(sv);
a6ec74c1
JH
2013 len--;
2014 auv = 0;
2015 }
2016 }
2017 if ((s >= strend) && bytes)
49704364 2018 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2019 }
2020 break;
2021 case 'P':
49704364
WL
2022 if (symptr->howlen == e_star)
2023 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2024 EXTEND(SP, 1);
2d3e0934 2025 if (s + sizeof(char*) <= strend) {
08ca2aa3 2026 char *aptr;
f337b084 2027 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2028 DO_BO_UNPACK_PC(aptr);
fc241834 2029 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2030 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2031 }
a6ec74c1
JH
2032 break;
2033#ifdef HAS_QUAD
2034 case 'q':
73cb7263 2035 while (len-- > 0) {
08ca2aa3 2036 Quad_t aquad;
f337b084
TH
2037 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2038 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2039 if (!checksum)
6e449a3a
MHM
2040 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2041 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2042 else if (checksum > bits_in_uv)
2043 cdouble += (NV)aquad;
2044 else
2045 cuv += aquad;
2046 }
a6ec74c1
JH
2047 break;
2048 case 'Q':
73cb7263 2049 while (len-- > 0) {
08ca2aa3 2050 Uquad_t auquad;
f337b084
TH
2051 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2052 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2053 if (!checksum)
6e449a3a
MHM
2054 mPUSHs(auquad <= UV_MAX ?
2055 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2056 else if (checksum > bits_in_uv)
2057 cdouble += (NV)auquad;
2058 else
2059 cuv += auquad;
a6ec74c1
JH
2060 }
2061 break;
08ca2aa3 2062#endif /* HAS_QUAD */
a6ec74c1
JH
2063 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2064 case 'f':
73cb7263 2065 while (len-- > 0) {
08ca2aa3 2066 float afloat;
f337b084 2067 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2068 DO_BO_UNPACK_N(afloat, float);
2069 if (!checksum)
6e449a3a 2070 mPUSHn(afloat);
08ca2aa3 2071 else
73cb7263 2072 cdouble += afloat;
fc241834 2073 }
a6ec74c1
JH
2074 break;
2075 case 'd':
73cb7263 2076 while (len-- > 0) {
08ca2aa3 2077 double adouble;
f337b084 2078 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2079 DO_BO_UNPACK_N(adouble, double);
2080 if (!checksum)
6e449a3a 2081 mPUSHn(adouble);
08ca2aa3 2082 else
73cb7263 2083 cdouble += adouble;
fc241834 2084 }
a6ec74c1 2085 break;
92d41999 2086 case 'F':
73cb7263 2087 while (len-- > 0) {
08ca2aa3 2088 NV anv;
f337b084 2089 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2090 DO_BO_UNPACK_N(anv, NV);
2091 if (!checksum)
6e449a3a 2092 mPUSHn(anv);
08ca2aa3 2093 else
73cb7263 2094 cdouble += anv;
fc241834 2095 }
92d41999
JH
2096 break;
2097#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2098 case 'D':
73cb7263 2099 while (len-- > 0) {
08ca2aa3 2100 long double aldouble;
f337b084 2101 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2102 DO_BO_UNPACK_N(aldouble, long double);
2103 if (!checksum)
6e449a3a 2104 mPUSHn(aldouble);
08ca2aa3
TH
2105 else
2106 cdouble += aldouble;
92d41999
JH
2107 }
2108 break;
2109#endif
a6ec74c1 2110 case 'u':
08ca2aa3 2111 {
f7fe979e 2112 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2113 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2114 if (l) SvPOK_on(sv);
2115 }
2116 if (utf8) {
2117 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2118 I32 a, b, c, d;
db187877 2119 char hunk[3];
08ca2aa3 2120
08ca2aa3
TH
2121 while (len > 0) {
2122 next_uni_uu(aTHX_ &s, strend, &a);
2123 next_uni_uu(aTHX_ &s, strend, &b);
2124 next_uni_uu(aTHX_ &s, strend, &c);
2125 next_uni_uu(aTHX_ &s, strend, &d);
2126 hunk[0] = (char)((a << 2) | (b >> 4));
2127 hunk[1] = (char)((b << 4) | (c >> 2));
2128 hunk[2] = (char)((c << 6) | d);
2129 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2130 len -= 3;
2131 }
2132 if (s < strend) {
f7fe979e
AL
2133 if (*s == '\n') {
2134 s++;
2135 }
08ca2aa3
TH
2136 else {
2137 /* possible checksum byte */
f7fe979e
AL
2138 const char *skip = s+UTF8SKIP(s);
2139 if (skip < strend && *skip == '\n')
2140 s = skip+1;
08ca2aa3
TH
2141 }
2142 }
2143 }
2144 } else {
fc241834
RGS
2145 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2146 I32 a, b, c, d;
db187877 2147 char hunk[3];
a6ec74c1 2148
fc241834
RGS
2149 len = PL_uudmap[*(U8*)s++] & 077;
2150 while (len > 0) {
2151 if (s < strend && ISUUCHAR(*s))
2152 a = PL_uudmap[*(U8*)s++] & 077;
2153 else
2154 a = 0;
2155 if (s < strend && ISUUCHAR(*s))
2156 b = PL_uudmap[*(U8*)s++] & 077;
2157 else
2158 b = 0;
2159 if (s < strend && ISUUCHAR(*s))
2160 c = PL_uudmap[*(U8*)s++] & 077;
2161 else
2162 c = 0;
2163 if (s < strend && ISUUCHAR(*s))
2164 d = PL_uudmap[*(U8*)s++] & 077;
2165 else
2166 d = 0;
2167 hunk[0] = (char)((a << 2) | (b >> 4));
2168 hunk[1] = (char)((b << 4) | (c >> 2));
2169 hunk[2] = (char)((c << 6) | d);
2170 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2171 len -= 3;
2172 }
2173 if (*s == '\n')
2174 s++;
2175 else /* possible checksum byte */
2176 if (s + 1 < strend && s[1] == '\n')
2177 s += 2;
a6ec74c1 2178 }
08ca2aa3
TH
2179 }
2180 XPUSHs(sv);
a6ec74c1
JH
2181 break;
2182 }
49704364 2183
a6ec74c1 2184 if (checksum) {
1109a392 2185 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2186 (checksum > bits_in_uv &&
08ca2aa3
TH
2187 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2188 NV trouble, anv;
a6ec74c1 2189
08ca2aa3 2190 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2191 while (checksum >= 16) {
2192 checksum -= 16;
08ca2aa3 2193 anv *= 65536.0;
a6ec74c1 2194 }
a6ec74c1 2195 while (cdouble < 0.0)
08ca2aa3
TH
2196 cdouble += anv;
2197 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2198 sv = newSVnv(cdouble);
a6ec74c1
JH
2199 }
2200 else {
fa8ec7c1
NC
2201 if (checksum < bits_in_uv) {
2202 UV mask = ((UV)1 << checksum) - 1;
92d41999 2203 cuv &= mask;
a6ec74c1 2204 }
c4c5f44a 2205 sv = newSVuv(cuv);
a6ec74c1 2206 }
6e449a3a 2207 mXPUSHs(sv);
a6ec74c1
JH
2208 checksum = 0;
2209 }
fc241834 2210
49704364
WL
2211 if (symptr->flags & FLAG_SLASH){
2212 if (SP - PL_stack_base - start_sp_offset <= 0)
2213 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2214 if( next_symbol(symptr) ){
2215 if( symptr->howlen == e_number )
2216 Perl_croak(aTHX_ "Count after length/code in unpack" );
2217 if( beyond ){
2218 /* ...end of char buffer then no decent length available */
2219 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2220 } else {
2221 /* take top of stack (hope it's numeric) */
2222 len = POPi;
2223 if( len < 0 )
2224 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2225 }
2226 } else {
2227 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2228 }
2229 datumtype = symptr->code;
21c16052 2230 explicit_length = FALSE;
49704364
WL
2231 goto redo_switch;
2232 }
a6ec74c1 2233 }
49704364 2234
18529408
IZ
2235 if (new_s)
2236 *new_s = s;
2237 PUTBACK;
2238 return SP - PL_stack_base - start_sp_offset;
2239}
2240
2241PP(pp_unpack)
2242{
97aff369 2243 dVAR;
18529408 2244 dSP;
bab9c0ac 2245 dPOPPOPssrl;
18529408
IZ
2246 I32 gimme = GIMME_V;
2247 STRLEN llen;
2248 STRLEN rlen;
5c144d81
NC
2249 const char *pat = SvPV_const(left, llen);
2250 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2251 const char *strend = s + rlen;
2252 const char *patend = pat + llen;
08ca2aa3 2253 I32 cnt;
18529408
IZ
2254
2255 PUTBACK;
7accc089 2256 cnt = unpackstring(pat, patend, s, strend,
49704364 2257 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2258 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2259
18529408
IZ
2260 SPAGAIN;
2261 if ( !cnt && gimme == G_SCALAR )
2262 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2263 RETURN;
2264}
2265
f337b084 2266STATIC U8 *
f7fe979e 2267doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2268{
f337b084 2269 *h++ = PL_uuemap[len];
a6ec74c1 2270 while (len > 2) {
f337b084
TH
2271 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2272 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2273 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2274 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2275 s += 3;
2276 len -= 3;
2277 }
2278 if (len > 0) {
f7fe979e 2279 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2280 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2281 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2282 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2283 *h++ = PL_uuemap[0];
a6ec74c1 2284 }
f337b084
TH
2285 *h++ = '\n';
2286 return h;
a6ec74c1
JH
2287}
2288
2289STATIC SV *
f7fe979e 2290S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2291{
8b6e33c7
AL
2292 SV *result = newSVpvn(s, l);
2293 char *const result_c = SvPV_nolen(result); /* convenience */
2294 char *out = result_c;
2295 bool skip = 1;
2296 bool ignore = 0;
a6ec74c1 2297
7918f24d
NC
2298 PERL_ARGS_ASSERT_IS_AN_INT;
2299
a6ec74c1
JH
2300 while (*s) {
2301 switch (*s) {
2302 case ' ':
2303 break;
2304 case '+':
2305 if (!skip) {
2306 SvREFCNT_dec(result);
2307 return (NULL);
2308 }
2309 break;
2310 case '0':
2311 case '1':
2312 case '2':
2313 case '3':
2314 case '4':
2315 case '5':
2316 case '6':
2317 case '7':
2318 case '8':
2319 case '9':
2320 skip = 0;
2321 if (!ignore) {
2322 *(out++) = *s;
2323 }
2324 break;
2325 case '.':
2326 ignore = 1;
2327 break;
2328 default:
2329 SvREFCNT_dec(result);
2330 return (NULL);
2331 }
2332 s++;
2333 }
2334 *(out++) = '\0';
2335 SvCUR_set(result, out - result_c);
2336 return (result);
2337}
2338
2339/* pnum must be '\0' terminated */
2340STATIC int
2341S_div128(pTHX_ SV *pnum, bool *done)
2342{
8b6e33c7
AL
2343 STRLEN len;
2344 char * const s = SvPV(pnum, len);
2345 char *t = s;
2346 int m = 0;
2347
7918f24d
NC
2348 PERL_ARGS_ASSERT_DIV128;
2349
8b6e33c7
AL
2350 *done = 1;
2351 while (*t) {
2352 const int i = m * 10 + (*t - '0');
2353 const int r = (i >> 7); /* r < 10 */
2354 m = i & 0x7F;
2355 if (r) {
2356 *done = 0;
2357 }
2358 *(t++) = '0' + r;
a6ec74c1 2359 }
8b6e33c7
AL
2360 *(t++) = '\0';
2361 SvCUR_set(pnum, (STRLEN) (t - s));
2362 return (m);
a6ec74c1
JH
2363}
2364
18529408 2365/*
7accc089
JH
2366=for apidoc packlist
2367
2368The engine implementing pack() Perl function.
2369
bfce84ec
AL
2370=cut
2371*/
7accc089
JH
2372
2373void
f7fe979e 2374Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2375{
97aff369 2376 dVAR;
aadb217d
JH
2377 tempsym_t sym;
2378
7918f24d
NC
2379 PERL_ARGS_ASSERT_PACKLIST;
2380
f7fe979e 2381 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2382
f337b084
TH
2383 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2384 Also make sure any UTF8 flag is loaded */
56eb0262 2385 SvPV_force_nolen(cat);
bfce84ec
AL
2386 if (DO_UTF8(cat))
2387 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2388
49704364
WL
2389 (void)pack_rec( cat, &sym, beglist, endlist );
2390}
2391
f337b084
TH
2392/* like sv_utf8_upgrade, but also repoint the group start markers */
2393STATIC void
2394marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2395 STRLEN len;
2396 tempsym_t *group;
f7fe979e
AL
2397 const char *from_ptr, *from_start, *from_end, **marks, **m;
2398 char *to_start, *to_ptr;
f337b084
TH
2399
2400 if (SvUTF8(sv)) return;
2401
aa07b2f6 2402 from_start = SvPVX_const(sv);
f337b084
TH
2403 from_end = from_start + SvCUR(sv);
2404 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2405 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2406 if (from_ptr == from_end) {
2407 /* Simple case: no character needs to be changed */
2408 SvUTF8_on(sv);
2409 return;
2410 }
2411
3473cf63 2412 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2413 Newx(to_start, len, char);
f337b084
TH
2414 Copy(from_start, to_start, from_ptr-from_start, char);
2415 to_ptr = to_start + (from_ptr-from_start);
2416
a02a5408 2417 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2418 for (group=sym_ptr; group; group = group->previous)
2419 marks[group->level] = from_start + group->strbeg;
2420 marks[sym_ptr->level+1] = from_end+1;
2421 for (m = marks; *m < from_ptr; m++)
2422 *m = to_start + (*m-from_start);
2423
2424 for (;from_ptr < from_end; from_ptr++) {
2425 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2426 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2427 }
2428 *to_ptr = 0;
2429
2430 while (*m == from_ptr) *m++ = to_ptr;
2431 if (m != marks + sym_ptr->level+1) {
2432 Safefree(marks);
2433 Safefree(to_start);
5f5991a0 2434 Perl_croak(aTHX_ "panic: marks beyond string end");
f337b084
TH
2435 }
2436 for (group=sym_ptr; group; group = group->previous)
2437 group->strbeg = marks[group->level] - to_start;
2438 Safefree(marks);
2439
2440 if (SvOOK(sv)) {
2441 if (SvIVX(sv)) {
b162af07 2442 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2443 from_start -= SvIVX(sv);
2444 SvIV_set(sv, 0);
2445 }
2446 SvFLAGS(sv) &= ~SVf_OOK;
2447 }
2448 if (SvLEN(sv) != 0)
2449 Safefree(from_start);
f880fe2f 2450 SvPV_set(sv, to_start);
b162af07
SP
2451 SvCUR_set(sv, to_ptr - to_start);
2452 SvLEN_set(sv, len);
f337b084
TH
2453 SvUTF8_on(sv);
2454}
2455
2456/* Exponential string grower. Makes string extension effectively O(n)
2457 needed says how many extra bytes we need (not counting the final '\0')
2458 Only grows the string if there is an actual lack of space
2459*/
2460STATIC char *
0bd48802 2461S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2462 const STRLEN cur = SvCUR(sv);
2463 const STRLEN len = SvLEN(sv);
f337b084 2464 STRLEN extend;
7918f24d
NC
2465
2466 PERL_ARGS_ASSERT_SV_EXP_GROW;
2467
f337b084
TH
2468 if (len - cur > needed) return SvPVX(sv);
2469 extend = needed > len ? needed : len;
2470 return SvGROW(sv, len+extend+1);
2471}
49704364
WL
2472
2473STATIC
2474SV **
f337b084 2475S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2476{
97aff369 2477 dVAR;
49704364 2478 tempsym_t lookahead;
f337b084
TH
2479 I32 items = endlist - beglist;
2480 bool found = next_symbol(symptr);
2481 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2482 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2483
7918f24d
NC
2484 PERL_ARGS_ASSERT_PACK_REC;
2485
f337b084
TH
2486 if (symptr->level == 0 && found && symptr->code == 'U') {
2487 marked_upgrade(aTHX_ cat, symptr);
2488 symptr->flags |= FLAG_DO_UTF8;
2489 utf8 = 0;
49704364 2490 }
f337b084 2491 symptr->strbeg = SvCUR(cat);
49704364
WL
2492
2493 while (found) {
f337b084
TH
2494 SV *fromstr;
2495 STRLEN fromlen;
2496 I32 len;
a0714e2c 2497 SV *lengthcode = NULL;
49704364 2498 I32 datumtype = symptr->code;
f337b084
TH
2499 howlen_t howlen = symptr->howlen;
2500 char *start = SvPVX(cat);
2501 char *cur = start + SvCUR(cat);
49704364 2502
f337b084
TH
2503#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2504
2505 switch (howlen) {
fc241834 2506 case e_star:
f337b084
TH
2507 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2508 0 : items;
2509 break;
2510 default:
2511 /* e_no_len and e_number */
2512 len = symptr->length;
49704364
WL
2513 break;
2514 }
2515
f337b084 2516 if (len) {
a7a3cfaa 2517 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2518
a7a3cfaa
TH
2519 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2520 /* We can process this letter. */
2521 STRLEN size = props & PACK_SIZE_MASK;
2522 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2523 }
f337b084
TH
2524 }
2525
49704364
WL
2526 /* Look ahead for next symbol. Do we have code/code? */
2527 lookahead = *symptr;
2528 found = next_symbol(&lookahead);
246f24af
TH
2529 if (symptr->flags & FLAG_SLASH) {
2530 IV count;
f337b084 2531 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2532 if (strchr("aAZ", lookahead.code)) {
2533 if (lookahead.howlen == e_number) count = lookahead.length;
2534 else {
ce399ba6
NC
2535 if (items > 0) {
2536 if (SvGAMAGIC(*beglist)) {
2537 /* Avoid reading the active data more than once
2538 by copying it to a temporary. */
2539 STRLEN len;
2540 const char *const pv = SvPV_const(*beglist, len);
740cce10 2541 SV *const temp
59cd0e26
NC
2542 = newSVpvn_flags(pv, len,
2543 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2544 *beglist = temp;
2545 }
246f24af
TH
2546 count = DO_UTF8(*beglist) ?
2547 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2548 }
246f24af
TH
2549 else count = 0;
2550 if (lookahead.code == 'Z') count++;
2551 }
2552 } else {
2553 if (lookahead.howlen == e_number && lookahead.length < items)
2554 count = lookahead.length;
2555 else count = items;
2556 }
2557 lookahead.howlen = e_number;
2558 lookahead.length = count;
2559 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2560 }
49704364 2561
fc241834
RGS
2562 /* Code inside the switch must take care to properly update
2563 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2564 doesn't simply leave using break */
1109a392 2565 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2566 default:
f337b084
TH
2567 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2568 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2569 case '%':
49704364 2570 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2571 {
2572 char *from;
2573#ifdef PERL_PACK_CAN_SHRIEKSIGN
2574 case '.' | TYPE_IS_SHRIEKING:
2575#endif
2576 case '.':
2577 if (howlen == e_star) from = start;
2578 else if (len == 0) from = cur;
2579 else {
2580 tempsym_t *group = symptr;
2581
2582 while (--len && group) group = group->previous;
2583 from = group ? start + group->strbeg : start;
2584 }
2585 fromstr = NEXTFROM;
2586 len = SvIV(fromstr);
2587 goto resize;
2588#ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 case '@' | TYPE_IS_SHRIEKING:
2590#endif
a6ec74c1 2591 case '@':
28be1210
TH
2592 from = start + symptr->strbeg;
2593 resize:
2594#ifdef PERL_PACK_CAN_SHRIEKSIGN
2595 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2596#else /* PERL_PACK_CAN_SHRIEKSIGN */
2597 if (utf8)
2598#endif
2599 if (len >= 0) {
2600 while (len && from < cur) {
2601 from += UTF8SKIP(from);
2602 len--;
2603 }
2604 if (from > cur)
2605 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2606 if (len) {
2607 /* Here we know from == cur */
2608 grow:
2609 GROWING(0, cat, start, cur, len);
2610 Zero(cur, len, char);
2611 cur += len;
2612 } else if (from < cur) {
2613 len = cur - from;
2614 goto shrink;
2615 } else goto no_change;
2616 } else {
2617 cur = from;
2618 len = -len;
2619 goto utf8_shrink;
f337b084 2620 }
28be1210
TH
2621 else {
2622 len -= cur - from;
f337b084 2623 if (len > 0) goto grow;
28be1210 2624 if (len == 0) goto no_change;
fc241834 2625 len = -len;
28be1210 2626 goto shrink;
f337b084 2627 }
a6ec74c1 2628 break;
28be1210 2629 }
fc241834 2630 case '(': {
49704364 2631 tempsym_t savsym = *symptr;
66c611c5
MHM
2632 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2633 symptr->flags |= group_modifiers;
49704364
WL
2634 symptr->patend = savsym.grpend;
2635 symptr->level++;
f337b084 2636 symptr->previous = &lookahead;
18529408 2637 while (len--) {
f337b084
TH
2638 U32 was_utf8;
2639 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2640 else symptr->flags &= ~FLAG_PARSE_UTF8;
2641 was_utf8 = SvUTF8(cat);
49704364 2642 symptr->patptr = savsym.grpbeg;
f337b084
TH
2643 beglist = pack_rec(cat, symptr, beglist, endlist);
2644 if (SvUTF8(cat) != was_utf8)
2645 /* This had better be an upgrade while in utf8==0 mode */
2646 utf8 = 1;
2647
49704364 2648 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2649 break; /* No way to continue */
2650 }
ee790063 2651 items = endlist - beglist;
f337b084
TH
2652 lookahead.flags = symptr->flags & ~group_modifiers;
2653 goto no_change;
18529408 2654 }
62f95557
IZ
2655 case 'X' | TYPE_IS_SHRIEKING:
2656 if (!len) /* Avoid division by 0 */
2657 len = 1;
f337b084
TH
2658 if (utf8) {
2659 char *hop, *last;
2660 I32 l = len;
2661 hop = last = start;
2662 while (hop < cur) {
2663 hop += UTF8SKIP(hop);
2664 if (--l == 0) {
2665 last = hop;
2666 l = len;
2667 }
2668 }
2669 if (last > cur)
2670 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2671 cur = last;
2672 break;
2673 }
2674 len = (cur-start) % len;
62f95557 2675 /* FALL THROUGH */
a6ec74c1 2676 case 'X':
f337b084
TH
2677 if (utf8) {
2678 if (len < 1) goto no_change;
28be1210 2679 utf8_shrink:
f337b084
TH
2680 while (len > 0) {
2681 if (cur <= start)
28be1210
TH
2682 Perl_croak(aTHX_ "'%c' outside of string in pack",
2683 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2684 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2685 if (cur <= start)
28be1210
TH
2686 Perl_croak(aTHX_ "'%c' outside of string in pack",
2687 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2688 }
2689 len--;
2690 }
2691 } else {
fc241834 2692 shrink:
f337b084 2693 if (cur - start < len)
28be1210
TH
2694 Perl_croak(aTHX_ "'%c' outside of string in pack",
2695 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2696 cur -= len;
2697 }
2698 if (cur < start+symptr->strbeg) {
2699 /* Make sure group starts don't point into the void */
2700 tempsym_t *group;
9e27e96a 2701 const STRLEN length = cur-start;
f337b084
TH
2702 for (group = symptr;
2703 group && length < group->strbeg;
2704 group = group->previous) group->strbeg = length;
2705 lookahead.strbeg = length;
2706 }
a6ec74c1 2707 break;
fc241834
RGS
2708 case 'x' | TYPE_IS_SHRIEKING: {
2709 I32 ai32;
62f95557
IZ
2710 if (!len) /* Avoid division by 0 */
2711 len = 1;
230e1fce 2712 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2713 else ai32 = (cur - start) % len;
2714 if (ai32 == 0) goto no_change;
2715 len -= ai32;
2716 }
2717 /* FALL THROUGH */
a6ec74c1 2718 case 'x':
f337b084 2719 goto grow;
a6ec74c1
JH
2720 case 'A':
2721 case 'Z':
f337b084 2722 case 'a': {
f7fe979e 2723 const char *aptr;
f337b084 2724
a6ec74c1 2725 fromstr = NEXTFROM;
e62f0680 2726 aptr = SvPV_const(fromstr, fromlen);
f337b084 2727 if (DO_UTF8(fromstr)) {
f7fe979e 2728 const char *end, *s;
f337b084
TH
2729
2730 if (!utf8 && !SvUTF8(cat)) {
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2734 utf8 = 1;
2735 start = SvPVX(cat);
2736 cur = start + SvCUR(cat);
2737 }
fc241834 2738 if (howlen == e_star) {
f337b084
TH
2739 if (utf8) goto string_copy;
2740 len = fromlen+1;
2741 }
2742 s = aptr;
2743 end = aptr + fromlen;
2744 fromlen = datumtype == 'Z' ? len-1 : len;
2745 while ((I32) fromlen > 0 && s < end) {
2746 s += UTF8SKIP(s);
2747 fromlen--;
2748 }
2749 if (s > end)
2750 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2751 if (utf8) {
fc241834 2752 len = fromlen;
f337b084
TH
2753 if (datumtype == 'Z') len++;
2754 fromlen = s-aptr;
2755 len += fromlen;
fc241834 2756
f337b084 2757 goto string_copy;
fc241834 2758 }
f337b084
TH
2759 fromlen = len - fromlen;
2760 if (datumtype == 'Z') fromlen--;
2761 if (howlen == e_star) {
2762 len = fromlen;
2763 if (datumtype == 'Z') len++;
fc241834 2764 }
f337b084 2765 GROWING(0, cat, start, cur, len);
fc241834 2766 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2767 datumtype | TYPE_IS_PACK))
5f5991a0 2768 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
f337b084 2769 cur += fromlen;
a6ec74c1 2770 len -= fromlen;
f337b084
TH
2771 } else if (utf8) {
2772 if (howlen == e_star) {
2773 len = fromlen;
2774 if (datumtype == 'Z') len++;
a6ec74c1 2775 }
f337b084
TH
2776 if (len <= (I32) fromlen) {
2777 fromlen = len;
2778 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2779 }
fc241834 2780 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2781 upgrade, so:
2782 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2783 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2784 len -= fromlen;
2785 while (fromlen > 0) {
230e1fce 2786 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2787 aptr++;
2788 fromlen--;
fc241834 2789 }
f337b084
TH
2790 } else {
2791 string_copy:
2792 if (howlen == e_star) {
2793 len = fromlen;
2794 if (datumtype == 'Z') len++;
2795 }
2796 if (len <= (I32) fromlen) {
2797 fromlen = len;
2798 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2799 }
f337b084
TH
2800 GROWING(0, cat, start, cur, len);
2801 Copy(aptr, cur, fromlen, char);
2802 cur += fromlen;
2803 len -= fromlen;
a6ec74c1 2804 }
f337b084
TH
2805 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2806 cur += len;
3c4fb04a 2807 SvTAINT(cat);
a6ec74c1 2808 break;
f337b084 2809 }
a6ec74c1 2810 case 'B':
f337b084 2811 case 'b': {
b83604b4 2812 const char *str, *end;
f337b084
TH
2813 I32 l, field_len;
2814 U8 bits;
2815 bool utf8_source;
2816 U32 utf8_flags;
a6ec74c1 2817
fc241834 2818 fromstr = NEXTFROM;
b83604b4 2819 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2820 end = str + fromlen;
2821 if (DO_UTF8(fromstr)) {
2822 utf8_source = TRUE;
041457d9 2823 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2824 } else {
2825 utf8_source = FALSE;
2826 utf8_flags = 0; /* Unused, but keep compilers happy */
2827 }
2828 if (howlen == e_star) len = fromlen;
2829 field_len = (len+7)/8;
2830 GROWING(utf8, cat, start, cur, field_len);
2831 if (len > (I32)fromlen) len = fromlen;
2832 bits = 0;
2833 l = 0;
2834 if (datumtype == 'B')
2835 while (l++ < len) {
2836 if (utf8_source) {
95b63a38 2837 UV val = 0;
f337b084
TH
2838 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2839 bits |= val & 1;
2840 } else bits |= *str++ & 1;
2841 if (l & 7) bits <<= 1;
fc241834 2842 else {
f337b084
TH
2843 PUSH_BYTE(utf8, cur, bits);
2844 bits = 0;
a6ec74c1
JH
2845 }
2846 }
f337b084
TH
2847 else
2848 /* datumtype == 'b' */
2849 while (l++ < len) {
2850 if (utf8_source) {
95b63a38 2851 UV val = 0;
f337b084
TH
2852 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2853 if (val & 1) bits |= 0x80;
2854 } else if (*str++ & 1)
2855 bits |= 0x80;
2856 if (l & 7) bits >>= 1;
fc241834 2857 else {
f337b084
TH
2858 PUSH_BYTE(utf8, cur, bits);
2859 bits = 0;
a6ec74c1
JH
2860 }
2861 }
f337b084
TH
2862 l--;
2863 if (l & 7) {
fc241834 2864 if (datumtype == 'B')
f337b084 2865 bits <<= 7 - (l & 7);
fc241834 2866 else
f337b084
TH
2867 bits >>= 7 - (l & 7);
2868 PUSH_BYTE(utf8, cur, bits);
2869 l += 7;
a6ec74c1 2870 }
f337b084
TH
2871 /* Determine how many chars are left in the requested field */
2872 l /= 8;
2873 if (howlen == e_star) field_len = 0;
2874 else field_len -= l;
2875 Zero(cur, field_len, char);
2876 cur += field_len;
a6ec74c1 2877 break;
f337b084 2878 }
a6ec74c1 2879 case 'H':
f337b084 2880 case 'h': {
10516c54 2881 const char *str, *end;
f337b084
TH
2882 I32 l, field_len;
2883 U8 bits;
2884 bool utf8_source;
2885 U32 utf8_flags;
a6ec74c1 2886
fc241834 2887 fromstr = NEXTFROM;
10516c54 2888 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2889 end = str + fromlen;
2890 if (DO_UTF8(fromstr)) {
2891 utf8_source = TRUE;
041457d9 2892 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2893 } else {
2894 utf8_source = FALSE;
2895 utf8_flags = 0; /* Unused, but keep compilers happy */
2896 }
2897 if (howlen == e_star) len = fromlen;
2898 field_len = (len+1)/2;
2899 GROWING(utf8, cat, start, cur, field_len);
2900 if (!utf8 && len > (I32)fromlen) len = fromlen;
2901 bits = 0;
2902 l = 0;
2903 if (datumtype == 'H')
2904 while (l++ < len) {
2905 if (utf8_source) {
95b63a38 2906 UV val = 0;
f337b084
TH
2907 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2908 if (val < 256 && isALPHA(val))
2909 bits |= (val + 9) & 0xf;
a6ec74c1 2910 else
f337b084
TH
2911 bits |= val & 0xf;
2912 } else if (isALPHA(*str))
2913 bits |= (*str++ + 9) & 0xf;
2914 else
2915 bits |= *str++ & 0xf;
2916 if (l & 1) bits <<= 4;
fc241834 2917 else {
f337b084
TH
2918 PUSH_BYTE(utf8, cur, bits);
2919 bits = 0;
a6ec74c1
JH
2920 }
2921 }
f337b084
TH
2922 else
2923 while (l++ < len) {
2924 if (utf8_source) {
95b63a38 2925 UV val = 0;
f337b084
TH
2926 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2927 if (val < 256 && isALPHA(val))
2928 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2929 else
f337b084
TH
2930 bits |= (val & 0xf) << 4;
2931 } else if (isALPHA(*str))
2932 bits |= ((*str++ + 9) & 0xf) << 4;
2933 else
2934 bits |= (*str++ & 0xf) << 4;
2935 if (l & 1) bits >>= 4;
fc241834 2936 else {
f337b084
TH
2937 PUSH_BYTE(utf8, cur, bits);
2938 bits = 0;
a6ec74c1 2939 }
fc241834 2940 }
f337b084
TH
2941 l--;
2942 if (l & 1) {
2943 PUSH_BYTE(utf8, cur, bits);
2944 l++;
2945 }
2946 /* Determine how many chars are left in the requested field */
2947 l /= 2;
2948 if (howlen == e_star) field_len = 0;
2949 else field_len -= l;
2950 Zero(cur, field_len, char);
2951 cur += field_len;
2952 break;
fc241834
RGS
2953 }
2954 case 'c':
f337b084
TH
2955 while (len-- > 0) {
2956 IV aiv;
2957 fromstr = NEXTFROM;
2958 aiv = SvIV(fromstr);
a2a5de95
NC
2959 if ((-128 > aiv || aiv > 127))
2960 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2961 "Character in 'c' format wrapped in pack");
585ec06d 2962 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2963 }
2964 break;
2965 case 'C':
f337b084
TH
2966 if (len == 0) {
2967 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2968 break;
2969 }
a6ec74c1 2970 while (len-- > 0) {
f337b084 2971 IV aiv;
a6ec74c1 2972 fromstr = NEXTFROM;
f337b084 2973 aiv = SvIV(fromstr);
a2a5de95
NC
2974 if ((0 > aiv || aiv > 0xff))
2975 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2976 "Character in 'C' format wrapped in pack");
1651fc44 2977 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2978 }
fc241834
RGS
2979 break;
2980 case 'W': {
2981 char *end;
670f1322 2982 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2983
2984 end = start+SvLEN(cat)-1;
2985 if (utf8) end -= UTF8_MAXLEN-1;
2986 while (len-- > 0) {
2987 UV auv;
2988 fromstr = NEXTFROM;
2989 auv = SvUV(fromstr);
2990 if (in_bytes) auv = auv % 0x100;
2991 if (utf8) {
2992 W_utf8:
2993 if (cur > end) {
2994 *cur = '\0';
b162af07 2995 SvCUR_set(cat, cur - start);
fc241834
RGS
2996
2997 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2998 end = start+SvLEN(cat)-UTF8_MAXLEN;
2999 }
230e1fce
NC
3000 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3001 NATIVE_TO_UNI(auv),
041457d9 3002 warn_utf8 ?
230e1fce 3003 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3004 } else {
3005 if (auv >= 0x100) {
3006 if (!SvUTF8(cat)) {
3007 *cur = '\0';
b162af07 3008 SvCUR_set(cat, cur - start);
fc241834
RGS
3009 marked_upgrade(aTHX_ cat, symptr);
3010 lookahead.flags |= FLAG_DO_UTF8;
3011 lookahead.strbeg = symptr->strbeg;
3012 utf8 = 1;
3013 start = SvPVX(cat);
3014 cur = start + SvCUR(cat);
3015 end = start+SvLEN(cat)-UTF8_MAXLEN;
3016 goto W_utf8;
3017 }
a2a5de95
NC
3018 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3019 "Character in 'W' format wrapped in pack");
fc241834
RGS
3020 auv &= 0xff;
3021 }
3022 if (cur >= end) {
3023 *cur = '\0';
b162af07 3024 SvCUR_set(cat, cur - start);
fc241834
RGS
3025 GROWING(0, cat, start, cur, len+1);
3026 end = start+SvLEN(cat)-1;
3027 }
fe2774ed 3028 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3029 }
3030 }
3031 break;
fc241834
RGS
3032 }
3033 case 'U': {
3034 char *end;
3035
3036 if (len == 0) {
3037 if (!(symptr->flags & FLAG_DO_UTF8)) {
3038 marked_upgrade(aTHX_ cat, symptr);
3039 lookahead.flags |= FLAG_DO_UTF8;
3040 lookahead.strbeg = symptr->strbeg;
3041 }
3042 utf8 = 0;
3043 goto no_change;
3044 }
3045
3046 end = start+SvLEN(cat);
3047 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3048 while (len-- > 0) {
fc241834 3049 UV auv;
a6ec74c1 3050 fromstr = NEXTFROM;
fc241834
RGS
3051 auv = SvUV(fromstr);
3052 if (utf8) {
230e1fce 3053 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3054 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3055 warn_utf8 ?
fc241834
RGS
3056 0 : UNICODE_ALLOW_ANY);
3057 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3058 *cur = '\0';
b162af07 3059 SvCUR_set(cat, cur - start);
fc241834
RGS
3060 GROWING(0, cat, start, cur,
3061 len+(endb-buffer)*UTF8_EXPAND);
3062 end = start+SvLEN(cat);
3063 }
64844641 3064 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3065 } else {
3066 if (cur >= end) {
3067 *cur = '\0';
b162af07 3068 SvCUR_set(cat, cur - start);
fc241834
RGS
3069 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3070 end = start+SvLEN(cat)-UTF8_MAXLEN;
3071 }
230e1fce 3072 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3073 warn_utf8 ?
230e1fce 3074 0 : UNICODE_ALLOW_ANY);
fc241834 3075 }
a6ec74c1 3076 }
a6ec74c1 3077 break;
fc241834 3078 }
a6ec74c1
JH
3079 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3080 case 'f':
a6ec74c1 3081 while (len-- > 0) {
f337b084
TH
3082 float afloat;
3083 NV anv;
a6ec74c1 3084 fromstr = NEXTFROM;
f337b084 3085 anv = SvNV(fromstr);
5cdb9e01 3086#ifdef __VOS__
f337b084 3087 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3088 during conversion from double to float into infinity, so we
3089 do it by hand. This code should either be generalized for
3090 any OS that needs it, or removed if and when VOS implements
3091 posix-976 (suggestion to support mapping to infinity).
3092 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3093{
3094extern const float _float_constants[];
f337b084 3095 if (anv > FLT_MAX)
fc241834 3096 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3097 else if (anv < -FLT_MAX)
fc241834 3098 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3099 else afloat = (float) anv;
3722f0dc 3100}
f337b084 3101#else /* __VOS__ */
baf3cf9c 3102# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3103 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3104 * on Alpha; fake it if we don't have them.
3105 */
f337b084 3106 if (anv > FLT_MAX)
fc241834 3107 afloat = FLT_MAX;
f337b084 3108 else if (anv < -FLT_MAX)
fc241834 3109 afloat = -FLT_MAX;
f337b084 3110 else afloat = (float)anv;
baf3cf9c 3111# else
f337b084 3112 afloat = (float)anv;
baf3cf9c 3113# endif
f337b084 3114#endif /* __VOS__ */
1109a392 3115 DO_BO_PACK_N(afloat, float);
f337b084 3116 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3117 }
3118 break;
3119 case 'd':
a6ec74c1 3120 while (len-- > 0) {
f337b084
TH
3121 double adouble;
3122 NV anv;
a6ec74c1 3123 fromstr = NEXTFROM;
f337b084 3124 anv = SvNV(fromstr);
5cdb9e01 3125#ifdef __VOS__
f337b084 3126 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3127 during conversion from long double to double into infinity,
3128 so we do it by hand. This code should either be generalized
3129 for any OS that needs it, or removed if and when VOS
3130 implements posix-976 (suggestion to support mapping to
3131 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3132{
3133extern const double _double_constants[];
f337b084 3134 if (anv > DBL_MAX)
fc241834 3135 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3136 else if (anv < -DBL_MAX)
fc241834 3137 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3138 else adouble = (double) anv;
3722f0dc 3139}
f337b084 3140#else /* __VOS__ */
baf3cf9c 3141# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3142 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3143 * on Alpha; fake it if we don't have them.
3144 */
f337b084 3145 if (anv > DBL_MAX)
fc241834 3146 adouble = DBL_MAX;
f337b084 3147 else if (anv < -DBL_MAX)
fc241834 3148 adouble = -DBL_MAX;
f337b084 3149 else adouble = (double)anv;
baf3cf9c 3150# else
f337b084 3151 adouble = (double)anv;
baf3cf9c 3152# endif
f337b084 3153#endif /* __VOS__ */
1109a392 3154 DO_BO_PACK_N(adouble, double);
f337b084 3155 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3156 }
3157 break;
fc241834
RGS
3158 case 'F': {
3159 NV anv;
1109a392 3160 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3161 while (len-- > 0) {
3162 fromstr = NEXTFROM;
3163 anv = SvNV(fromstr);
1109a392 3164 DO_BO_PACK_N(anv, NV);
fc241834 3165 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3166 }
3167 break;
fc241834 3168 }
92d41999 3169#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3170 case 'D': {
3171 long double aldouble;
1109a392
MHM
3172 /* long doubles can have unused bits, which may be nonzero */
3173 Zero(&aldouble, 1, long double);
92d41999
JH
3174 while (len-- > 0) {
3175 fromstr = NEXTFROM;
3176 aldouble = (long double)SvNV(fromstr);
1109a392 3177 DO_BO_PACK_N(aldouble, long double);
fc241834 3178 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3179 }
3180 break;
fc241834 3181 }
92d41999 3182#endif
7212898e 3183#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3184 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3185#endif
a6ec74c1
JH
3186 case 'n':
3187 while (len-- > 0) {
f337b084 3188 I16 ai16;
a6ec74c1 3189 fromstr = NEXTFROM;
ef108786 3190 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3191#ifdef HAS_HTONS
ef108786 3192 ai16 = PerlSock_htons(ai16);
a6ec74c1 3193#endif
f337b084 3194 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3195 }
3196 break;
7212898e 3197#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3198 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3199#endif
a6ec74c1
JH
3200 case 'v':
3201 while (len-- > 0) {
f337b084 3202 I16 ai16;
a6ec74c1 3203 fromstr = NEXTFROM;
ef108786 3204 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3205#ifdef HAS_HTOVS
ef108786 3206 ai16 = htovs(ai16);
a6ec74c1 3207#endif
f337b084 3208 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3209 }
3210 break;
49704364 3211 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3212#if SHORTSIZE != SIZE16
fc241834 3213 while (len-- > 0) {
f337b084 3214 unsigned short aushort;
fc241834
RGS
3215 fromstr = NEXTFROM;
3216 aushort = SvUV(fromstr);
3217 DO_BO_PACK(aushort, s);
f337b084 3218 PUSH_VAR(utf8, cur, aushort);
fc241834 3219 }
49704364
WL
3220 break;
3221#else
3222 /* Fall through! */
a6ec74c1 3223#endif
49704364 3224 case 'S':
fc241834 3225 while (len-- > 0) {
f337b084 3226 U16 au16;
fc241834
RGS
3227 fromstr = NEXTFROM;
3228 au16 = (U16)SvUV(fromstr);
3229 DO_BO_PACK(au16, 16);
f337b084 3230 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3231 }
3232 break;
49704364 3233 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3234#if SHORTSIZE != SIZE16
fc241834 3235 while (len-- > 0) {
f337b084 3236 short ashort;
fc241834
RGS
3237 fromstr = NEXTFROM;
3238 ashort = SvIV(fromstr);
3239 DO_BO_PACK(ashort, s);
f337b084 3240 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3241 }
49704364
WL
3242 break;
3243#else
3244 /* Fall through! */
a6ec74c1 3245#endif
49704364
WL
3246 case 's':
3247 while (len-- > 0) {
f337b084 3248 I16 ai16;
49704364 3249 fromstr = NEXTFROM;
ef108786
MHM
3250 ai16 = (I16)SvIV(fromstr);
3251 DO_BO_PACK(ai16, 16);
f337b084 3252 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3253 }
3254 break;
3255 case 'I':
49704364 3256 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3257 while (len-- > 0) {
f337b084 3258 unsigned int auint;
a6ec74c1
JH
3259 fromstr = NEXTFROM;
3260 auint = SvUV(fromstr);
1109a392 3261 DO_BO_PACK(auint, i);
f337b084 3262 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3263 }
3264 break;
92d41999
JH
3265 case 'j':
3266 while (len-- > 0) {
f337b084 3267 IV aiv;
92d41999
JH
3268 fromstr = NEXTFROM;
3269 aiv = SvIV(fromstr);
1109a392
MHM
3270#if IVSIZE == INTSIZE
3271 DO_BO_PACK(aiv, i);
3272#elif IVSIZE == LONGSIZE
3273 DO_BO_PACK(aiv, l);
3274#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3275 DO_BO_PACK(aiv, 64);
f337b084
TH
3276#else
3277 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3278#endif
f337b084 3279 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3280 }
3281 break;
3282 case 'J':
3283 while (len-- > 0) {
f337b084 3284 UV auv;
92d41999
JH
3285 fromstr = NEXTFROM;
3286 auv = SvUV(fromstr);
1109a392
MHM
3287#if UVSIZE == INTSIZE
3288 DO_BO_PACK(auv, i);
3289#elif UVSIZE == LONGSIZE
3290 DO_BO_PACK(auv, l);
3291#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3292 DO_BO_PACK(auv, 64);
f337b084
TH
3293#else
3294 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3295#endif
f337b084 3296 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3297 }
3298 break;
a6ec74c1
JH
3299 case 'w':
3300 while (len-- > 0) {
f337b084 3301 NV anv;
a6ec74c1 3302 fromstr = NEXTFROM;
15e9f109 3303 anv = SvNV(fromstr);
a6ec74c1 3304
f337b084
TH
3305 if (anv < 0) {
3306 *cur = '\0';
b162af07 3307 SvCUR_set(cat, cur - start);
49704364 3308 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3309 }
a6ec74c1 3310
196b62db
NC
3311 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3312 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3313 any negative IVs will have already been got by the croak()
3314 above. IOK is untrue for fractions, so we test them
3315 against UV_MAX_P1. */
f337b084
TH
3316 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3317 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3318 char *in = buf + sizeof(buf);
196b62db 3319 UV auv = SvUV(fromstr);
a6ec74c1
JH
3320
3321 do {
eb160463 3322 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3323 auv >>= 7;
3324 } while (auv);
3325 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3326 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3327 in, (buf + sizeof(buf)) - in);
3328 } else if (SvPOKp(fromstr))
3329 goto w_string;
a6ec74c1 3330 else if (SvNOKp(fromstr)) {
0258719b
NC
3331 /* 10**NV_MAX_10_EXP is the largest power of 10
3332 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3333 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3334 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3335 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3336 Some C compilers are strict about integral constant
3337 expressions so we conservatively divide by a slightly
3338 smaller integer instead of multiplying by the exact
3339 floating-point value.
0258719b
NC
3340 */
3341#ifdef NV_MAX_10_EXP
f337b084 3342 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3343 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3344#else
f337b084 3345 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3346 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3347#endif
a6ec74c1
JH
3348 char *in = buf + sizeof(buf);
3349
8b6e33c7 3350 anv = Perl_floor(anv);
a6ec74c1 3351 do {
8b6e33c7 3352 const NV next = Perl_floor(anv / 128);
a6ec74c1 3353 if (in <= buf) /* this cannot happen ;-) */
49704364 3354 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3355 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3356 anv = next;
3357 } while (anv > 0);
a6ec74c1 3358 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3359 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3360 in, (buf + sizeof(buf)) - in);
3361 } else {
8b6e33c7
AL
3362 const char *from;
3363 char *result, *in;
735b914b
JH
3364 SV *norm;
3365 STRLEN len;
3366 bool done;
3367
f337b084 3368 w_string:
735b914b 3369 /* Copy string and check for compliance */
349d4f2f 3370 from = SvPV_const(fromstr, len);
735b914b 3371 if ((norm = is_an_int(from, len)) == NULL)
49704364 3372 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3373
a02a5408 3374 Newx(result, len, char);
735b914b
JH
3375 in = result + len;
3376 done = FALSE;
f337b084 3377 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3378 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3379 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3380 in, (result + len) - in);
735b914b
JH
3381 Safefree(result);
3382 SvREFCNT_dec(norm); /* free norm */
fc241834 3383 }
a6ec74c1
JH
3384 }
3385 break;
3386 case 'i':
49704364 3387 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3388 while (len-- > 0) {
f337b084 3389 int aint;
a6ec74c1
JH
3390 fromstr = NEXTFROM;
3391 aint = SvIV(fromstr);
1109a392 3392 DO_BO_PACK(aint, i);
f337b084 3393 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3394 }
3395 break;
7212898e 3396#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3397 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3398#endif
a6ec74c1
JH
3399 case 'N':
3400 while (len-- > 0) {
f337b084 3401 U32 au32;
a6ec74c1 3402 fromstr = NEXTFROM;
ef108786 3403 au32 = SvUV(fromstr);
a6ec74c1 3404#ifdef HAS_HTONL
ef108786 3405 au32 = PerlSock_htonl(au32);
a6ec74c1 3406#endif
f337b084 3407 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3408 }
3409 break;
7212898e 3410#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3411 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3412#endif
a6ec74c1
JH
3413 case 'V':
3414 while (len-- > 0) {
f337b084 3415 U32 au32;
a6ec74c1 3416 fromstr = NEXTFROM;
ef108786 3417 au32 = SvUV(fromstr);
a6ec74c1 3418#ifdef HAS_HTOVL
ef108786 3419 au32 = htovl(au32);
a6ec74c1 3420#endif
f337b084 3421 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3422 }
3423 break;
49704364 3424 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3425#if LONGSIZE != SIZE32
fc241834 3426 while (len-- > 0) {
f337b084 3427 unsigned long aulong;
fc241834
RGS
3428 fromstr = NEXTFROM;
3429 aulong = SvUV(fromstr);
3430 DO_BO_PACK(aulong, l);
f337b084 3431 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3432 }
49704364
WL
3433 break;
3434#else
3435 /* Fall though! */
a6ec74c1 3436#endif
49704364 3437 case 'L':
fc241834 3438 while (len-- > 0) {
f337b084 3439 U32 au32;
fc241834
RGS
3440 fromstr = NEXTFROM;
3441 au32 = SvUV(fromstr);
3442 DO_BO_PACK(au32, 32);
f337b084 3443 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3444 }
3445 break;
49704364 3446 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3447#if LONGSIZE != SIZE32
fc241834 3448 while (len-- > 0) {
f337b084 3449 long along;
fc241834
RGS
3450 fromstr = NEXTFROM;
3451 along = SvIV(fromstr);
3452 DO_BO_PACK(along, l);
f337b084 3453 PUSH_VAR(utf8, cur, along);
a6ec74c1 3454 }
49704364
WL
3455 break;
3456#else
3457 /* Fall though! */
a6ec74c1 3458#endif
49704364
WL
3459 case 'l':
3460 while (len-- > 0) {
f337b084 3461 I32 ai32;
49704364 3462 fromstr = NEXTFROM;
ef108786
MHM
3463 ai32 = SvIV(fromstr);
3464 DO_BO_PACK(ai32, 32);
f337b084 3465 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3466 }
3467 break;
3468#ifdef HAS_QUAD
3469 case 'Q':
3470 while (len-- > 0) {
f337b084 3471 Uquad_t auquad;
a6ec74c1 3472 fromstr = NEXTFROM;
f337b084 3473 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3474 DO_BO_PACK(auquad, 64);
f337b084 3475 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3476 }
3477 break;
3478 case 'q':
3479 while (len-- > 0) {
f337b084 3480 Quad_t aquad;
a6ec74c1
JH
3481 fromstr = NEXTFROM;
3482 aquad = (Quad_t)SvIV(fromstr);
1109a392 3483 DO_BO_PACK(aquad, 64);
f337b084 3484 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3485 }
3486 break;
f337b084 3487#endif /* HAS_QUAD */
a6ec74c1
JH
3488 case 'P':
3489 len = 1; /* assume SV is correct length */
f337b084 3490 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3491 /* Fall through! */
a6ec74c1
JH
3492 case 'p':
3493 while (len-- > 0) {
83003860 3494 const char *aptr;
f337b084 3495
a6ec74c1 3496 fromstr = NEXTFROM;
28a4f200
TH
3497 SvGETMAGIC(fromstr);
3498 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3499 else {
a6ec74c1
JH
3500 /* XXX better yet, could spirit away the string to
3501 * a safe spot and hang on to it until the result
3502 * of pack() (and all copies of the result) are
3503 * gone.
3504 */
041457d9 3505 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3506 !SvREADONLY(fromstr)))) {
3507 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3508 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3509 }
3510 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3511 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3512 else
2596d9fe 3513 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3514 }
07409e01 3515 DO_BO_PACK_PC(aptr);
f337b084 3516 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3517 }
3518 break;
fc241834 3519 case 'u': {
f7fe979e 3520 const char *aptr, *aend;
fc241834 3521 bool from_utf8;
f337b084 3522
a6ec74c1 3523 fromstr = NEXTFROM;
fc241834
RGS
3524 if (len <= 2) len = 45;
3525 else len = len / 3 * 3;
3526 if (len >= 64) {
a2a5de95
NC
3527 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3528 "Field too wide in 'u' format in pack");
fc241834
RGS
3529 len = 63;
3530 }
83003860 3531 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3532 from_utf8 = DO_UTF8(fromstr);
3533 if (from_utf8) {
3534 aend = aptr + fromlen;
3535 fromlen = sv_len_utf8(fromstr);
3536 } else aend = NULL; /* Unused, but keep compilers happy */
3537 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3538 while (fromlen > 0) {
fc241834 3539 U8 *end;
a6ec74c1 3540 I32 todo;
fc241834 3541 U8 hunk[1+63/3*4+1];
a6ec74c1 3542
eb160463 3543 if ((I32)fromlen > len)
a6ec74c1
JH
3544 todo = len;
3545 else
3546 todo = fromlen;
fc241834
RGS
3547 if (from_utf8) {
3548 char buffer[64];
3549 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3550 'u' | TYPE_IS_PACK)) {
3551 *cur = '\0';
b162af07 3552 SvCUR_set(cat, cur - start);
5f5991a0 3553 Perl_croak(aTHX_ "panic: string is shorter than advertised");
fc241834
RGS
3554 }
3555 end = doencodes(hunk, buffer, todo);
3556 } else {
3557 end = doencodes(hunk, aptr, todo);
3558 aptr += todo;
3559 }
3560 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3561 fromlen -= todo;
3562 }
a6ec74c1
JH
3563 break;
3564 }
f337b084
TH
3565 }
3566 *cur = '\0';
b162af07 3567 SvCUR_set(cat, cur - start);
f337b084 3568 no_change:
49704364 3569 *symptr = lookahead;
a6ec74c1 3570 }
49704364 3571 return beglist;
18529408
IZ
3572}
3573#undef NEXTFROM
3574
3575
3576PP(pp_pack)
3577{
97aff369 3578 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3579 register SV *cat = TARG;
3580 STRLEN fromlen;
349d4f2f
NC
3581 SV *pat_sv = *++MARK;
3582 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3583 register const char *patend = pat + fromlen;
18529408
IZ
3584
3585 MARK++;
76f68e9b 3586 sv_setpvs(cat, "");
f337b084 3587 SvUTF8_off(cat);
18529408 3588
7accc089 3589 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3590
a6ec74c1
JH
3591 SvSETMAGIC(cat);
3592 SP = ORIGMARK;
3593 PUSHs(cat);
3594 RETURN;
3595}
a6ec74c1 3596
73cb7263
NC
3597/*
3598 * Local variables:
3599 * c-indentation-style: bsd
3600 * c-basic-offset: 4
3601 * indent-tabs-mode: t
3602 * End:
3603 *
37442d52
RGS
3604 * ex: set ts=8 sts=4 sw=4 noet:
3605 */