This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
7212898e 73#if PERL_VERSION >= 9
f337b084
TH
74# define PERL_PACK_CAN_BYTEORDER
75# define PERL_PACK_CAN_SHRIEKSIGN
76#endif
77
78#ifndef CHAR_BIT
79# define CHAR_BIT 8
7212898e 80#endif
3473cf63
RGS
81/* Maximum number of bytes to which a byte can grow due to upgrade */
82#define UTF8_EXPAND 2
7212898e 83
a6ec74c1 84/*
a6ec74c1
JH
85 * Offset for integer pack/unpack.
86 *
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
89 */
90
91/*
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
97 */
98/*
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
101 */
102#define SIZE16 2
103#define SIZE32 4
104
105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 --jhi Feb 1999 */
107
1109a392
MHM
108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
109# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
110# define OFF16(p) ((char*)(p))
111# define OFF32(p) ((char*)(p))
a6ec74c1 112# else
1109a392 113# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
114# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116# else
08ca2aa3 117 ++++ bad cray byte order
a6ec74c1
JH
118# endif
119# endif
a6ec74c1 120#else
08ca2aa3
TH
121# define OFF16(p) ((char *) (p))
122# define OFF32(p) ((char *) (p))
a6ec74c1
JH
123#endif
124
f337b084
TH
125/* Only to be used inside a loop (see the break) */
126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (utf8) { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 } else { \
130 Copy(s, OFF16(p), SIZE16, char); \
131 (s) += SIZE16; \
132 } \
133} STMT_END
134
135/* Only to be used inside a loop (see the break) */
136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (utf8) { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 } else { \
140 Copy(s, OFF32(p), SIZE32, char); \
141 (s) += SIZE32; \
142 } \
143} STMT_END
144
145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
147
148/* Only to be used inside a loop (see the break) */
f337b084 149#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084
TH
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
154 } else { \
155 Copy(s, (char *) &var, sizeof(var), char); \
156 s += sizeof(var); \
157 } \
08ca2aa3
TH
158} STMT_END
159
f337b084 160#define PUSH_VAR(utf8, aptr, var) \
230e1fce 161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 162
49704364
WL
163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164#define MAX_SUB_TEMPLATE_LEVEL 100
165
66c611c5 166/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
167#define FLAG_WAS_UTF8 0x40
168#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 169#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 170#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
171#define FLAG_SLASH 0x04
172#define FLAG_COMMA 0x02
173#define FLAG_PACK 0x01
174
a6ec74c1
JH
175STATIC SV *
176S_mul128(pTHX_ SV *sv, U8 m)
177{
178 STRLEN len;
179 char *s = SvPV(sv, len);
180 char *t;
a6ec74c1 181
7918f24d
NC
182 PERL_ARGS_ASSERT_MUL128;
183
a6ec74c1 184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 185 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
186
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
189 sv = tmpNew;
190 s = SvPV(sv, len);
191 }
192 t = s + len - 1;
193 while (!*t) /* trailing '\0'? */
194 t--;
195 while (t > s) {
f7fe979e 196 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
197 *(t--) = '0' + (char)(i % 10);
198 m = (char)(i / 10);
a6ec74c1
JH
199 }
200 return (sv);
201}
202
203/* Explosives and implosives. */
204
205#if 'I' == 73 && 'J' == 74
206/* On an ASCII/ISO kind of system */
207#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208#else
209/*
210 Some other sort of character set - use memchr() so we don't match
211 the null byte.
212 */
213#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214#endif
215
66c611c5 216/* type modifiers */
62f95557 217#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
218#define TYPE_IS_BIG_ENDIAN 0x200
219#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 220#define TYPE_IS_PACK 0x800
1109a392 221#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 222#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
223#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
224
7212898e 225#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 227#else
28be1210 228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
229#endif
230
231#ifndef PERL_PACK_CAN_BYTEORDER
232/* Put "can't" first because it is shorter */
233# define TYPE_ENDIANNESS(t) 0
234# define TYPE_NO_ENDIANNESS(t) (t)
235
236# define ENDIANNESS_ALLOWED_TYPES ""
237
238# define DO_BO_UNPACK(var, type)
239# define DO_BO_PACK(var, type)
07409e01
NC
240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
242# define DO_BO_UNPACK_N(var, type)
243# define DO_BO_PACK_N(var, type)
244# define DO_BO_UNPACK_P(var)
245# define DO_BO_PACK_P(var)
45f723e1
NC
246# define DO_BO_UNPACK_PC(var)
247# define DO_BO_PACK_PC(var)
66c611c5 248
f337b084 249#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
250
251# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
253
254# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
255
256# define DO_BO_UNPACK(var, type) \
1109a392 257 STMT_START { \
66c611c5 258 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 default: break; \
262 } \
263 } STMT_END
264
7212898e 265# define DO_BO_PACK(var, type) \
1109a392 266 STMT_START { \
66c611c5 267 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 default: break; \
271 } \
272 } STMT_END
273
07409e01 274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 275 STMT_START { \
66c611c5 276 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 277 case TYPE_IS_BIG_ENDIAN: \
07409e01 278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
282 break; \
283 default: \
284 break; \
285 } \
286 } STMT_END
287
07409e01 288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 289 STMT_START { \
66c611c5 290 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 291 case TYPE_IS_BIG_ENDIAN: \
07409e01 292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
303 STMT_START { \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 default: \
314 break; \
315 } \
316 } STMT_END
317
7212898e 318# if PTRSIZE == INTSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 323# elif PTRSIZE == LONGSIZE
07409e01
NC
324# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
328# elif PTRSIZE == IVSIZE
329# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
333# else
334# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
336# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 338# endif
1109a392 339
7212898e 340# if defined(my_htolen) && defined(my_letohn) && \
1109a392 341 defined(my_htoben) && defined(my_betohn)
7212898e 342# define DO_BO_UNPACK_N(var, type) \
1109a392 343 STMT_START { \
66c611c5 344 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347 default: break; \
348 } \
349 } STMT_END
350
7212898e 351# define DO_BO_PACK_N(var, type) \
1109a392 352 STMT_START { \
66c611c5 353 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356 default: break; \
357 } \
358 } STMT_END
7212898e
NC
359# else
360# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
362# endif
363
f337b084 364#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 365
78d46eaa 366#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 367#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
368#define PACK_SIZE_MASK 0x3F
369
78d46eaa
NC
370/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
372
373#if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
375#endif
376
377typedef U8 packprops_t;
78d46eaa
NC
378#if 'J'-'I' == 1
379/* ASCII */
f43a9a31 380STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
381 /* normal */
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0,
1651fc44 387 /* C */ sizeof(unsigned char),
80a13697 388#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 389 /* D */ LONG_DOUBLESIZE,
80a13697 390#else
a7a3cfaa 391 0,
80a13697 392#endif
a7a3cfaa
TH
393 0,
394 /* F */ NVSIZE,
395 0, 0,
396 /* I */ sizeof(unsigned int),
397 /* J */ UVSIZE,
398 0,
399 /* L */ SIZE32,
400 0,
401 /* N */ SIZE32,
402 0, 0,
80a13697 403#if defined(HAS_QUAD)
a7a3cfaa 404 /* Q */ sizeof(Uquad_t),
80a13697 405#else
a7a3cfaa 406 0,
80a13697 407#endif
a7a3cfaa
TH
408 0,
409 /* S */ SIZE16,
410 0,
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
412 /* V */ SIZE32,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
417 0,
418 /* f */ sizeof(float),
419 0, 0,
420 /* i */ sizeof(int),
421 /* j */ IVSIZE,
422 0,
423 /* l */ SIZE32,
424 0,
425 /* n */ SIZE16,
426 0,
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 428#if defined(HAS_QUAD)
a7a3cfaa 429 /* q */ sizeof(Quad_t),
80a13697 430#else
a7a3cfaa 431 0,
80a13697 432#endif
a7a3cfaa
TH
433 0,
434 /* s */ SIZE16,
435 0, 0,
436 /* v */ SIZE16,
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
447 /* shrieking */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
454 0, 0,
455 /* L */ sizeof(unsigned long),
456 0,
7212898e 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 458 /* N */ SIZE32,
7212898e 459#else
a7a3cfaa 460 0,
7212898e 461#endif
a7a3cfaa
TH
462 0, 0, 0, 0,
463 /* S */ sizeof(unsigned short),
464 0, 0,
7212898e 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 466 /* V */ SIZE32,
7212898e 467#else
a7a3cfaa 468 0,
7212898e 469#endif
a7a3cfaa
TH
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0,
472 /* i */ sizeof(int),
473 0, 0,
474 /* l */ sizeof(long),
475 0,
7212898e 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 477 /* n */ SIZE16,
7212898e 478#else
a7a3cfaa 479 0,
7212898e 480#endif
a7a3cfaa
TH
481 0, 0, 0, 0,
482 /* s */ sizeof(short),
483 0, 0,
7212898e 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 485 /* v */ SIZE16,
7212898e 486#else
a7a3cfaa 487 0,
7212898e 488#endif
a7a3cfaa
TH
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
498};
499#else
500/* EBCDIC (or bust) */
f43a9a31 501STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
502 /* normal */
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
514 0,
515 /* f */ sizeof(float),
516 0, 0,
517 /* i */ sizeof(int),
518 0, 0, 0, 0, 0, 0, 0,
519 /* j */ IVSIZE,
520 0,
521 /* l */ SIZE32,
522 0,
523 /* n */ SIZE16,
524 0,
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 526#if defined(HAS_QUAD)
a7a3cfaa 527 /* q */ sizeof(Quad_t),
80a13697 528#else
a7a3cfaa 529 0,
80a13697 530#endif
a7a3cfaa
TH
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* s */ SIZE16,
533 0, 0,
534 /* v */ SIZE16,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 538 /* C */ sizeof(unsigned char),
80a13697 539#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 540 /* D */ LONG_DOUBLESIZE,
80a13697 541#else
a7a3cfaa 542 0,
80a13697 543#endif
a7a3cfaa
TH
544 0,
545 /* F */ NVSIZE,
546 0, 0,
547 /* I */ sizeof(unsigned int),
548 0, 0, 0, 0, 0, 0, 0,
549 /* J */ UVSIZE,
550 0,
551 /* L */ SIZE32,
552 0,
553 /* N */ SIZE32,
554 0, 0,
80a13697 555#if defined(HAS_QUAD)
a7a3cfaa 556 /* Q */ sizeof(Uquad_t),
80a13697 557#else
a7a3cfaa 558 0,
80a13697 559#endif
a7a3cfaa
TH
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 /* S */ SIZE16,
562 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
564 /* V */ SIZE32,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* shrieking */
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* i */ sizeof(int),
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
581 0,
7212898e 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 583 /* n */ SIZE16,
7212898e 584#else
a7a3cfaa 585 0,
7212898e 586#endif
a7a3cfaa
TH
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
589 0, 0,
7212898e 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 591 /* v */ SIZE16,
7212898e 592#else
a7a3cfaa 593 0,
7212898e 594#endif
a7a3cfaa
TH
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
601 0,
7212898e 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 603 /* N */ SIZE32,
7212898e 604#else
a7a3cfaa 605 0,
7212898e 606#endif
a7a3cfaa
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
609 0, 0,
7212898e 610#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 611 /* V */ SIZE32,
7212898e 612#else
a7a3cfaa 613 0,
7212898e 614#endif
a7a3cfaa
TH
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
617};
618#endif
619
08ca2aa3 620STATIC U8
f7fe979e 621uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 622{
08ca2aa3 623 STRLEN retlen;
0bcc34c2 624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 632 if (val >= 0x100) {
f337b084 633 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
f337b084 636 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
637 val &= 0xff;
638 }
639 *s += retlen;
fe2774ed 640 return (U8)val;
08ca2aa3
TH
641}
642
f337b084
TH
643#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
645 *(U8 *)(s)++)
646
647STATIC bool
f7fe979e 648uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
649{
650 UV val;
651 STRLEN retlen;
f7fe979e 652 const char *from = *s;
08ca2aa3 653 int bad = 0;
f7fe979e 654 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
f337b084 658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
661 bad |= 1;
662 } else from += retlen;
663 if (val >= 0x100) {
664 bad |= 2;
665 val &= 0xff;
666 }
fe2774ed 667 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
668 }
669 /* We have enough characters for the buffer. Did we have problems ? */
670 if (bad) {
671 if (bad & 1) {
672 /* Rewalk the string fragment while warning */
f7fe979e 673 const char *ptr;
9e27e96a 674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
f337b084 677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
678 }
679 if (from > end) from = end;
680 }
681 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
683 WARN_PACK : WARN_UNPACK),
684 "Character(s) in '%c' format wrapped in %s",
fc241834 685 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 686 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
687 }
688 *s = from;
689 return TRUE;
690}
691
692STATIC bool
f7fe979e 693next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 694{
97aff369 695 dVAR;
08ca2aa3 696 STRLEN retlen;
0bcc34c2 697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
698 if (val >= 0x100 || !ISUUCHAR(val) ||
699 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = 0;
701 return FALSE;
702 }
703 *out = PL_uudmap[val] & 077;
f337b084 704 *s += retlen;
08ca2aa3
TH
705 return TRUE;
706}
78d46eaa 707
64844641 708STATIC char *
14333449 709S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 710 const U8 * const end = start + len;
64844641 711
7918f24d
NC
712 PERL_ARGS_ASSERT_BYTES_TO_UNI;
713
f337b084 714 while (start < end) {
48fa4626
TS
715 const UV uv = NATIVE_TO_ASCII(*start);
716 if (UNI_IS_INVARIANT(uv))
717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
718 else {
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
721 }
722 start++;
723 }
64844641 724 return dest;
f337b084
TH
725}
726
230e1fce
NC
727#define PUSH_BYTES(utf8, cur, buf, len) \
728STMT_START { \
64844641
AL
729 if (utf8) \
730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
731 else { \
732 Copy(buf, cur, len, char); \
733 (cur) += (len); \
734 } \
f337b084
TH
735} STMT_END
736
737#define GROWING(utf8, cat, start, cur, in_len) \
738STMT_START { \
739 STRLEN glen = (in_len); \
3473cf63 740 if (utf8) glen *= UTF8_EXPAND; \
f337b084 741 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 742 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
743 (cur) = (start) + SvCUR(cat); \
744 } \
745} STMT_END
746
747#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
748STMT_START { \
f7fe979e 749 const STRLEN glen = (in_len); \
f337b084 750 STRLEN gl = glen; \
3473cf63 751 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
752 if ((cur) + gl >= (start) + SvLEN(cat)) { \
753 *cur = '\0'; \
b162af07 754 SvCUR_set((cat), (cur) - (start)); \
0bd48802 755 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
756 (cur) = (start) + SvCUR(cat); \
757 } \
758 PUSH_BYTES(utf8, cur, buf, glen); \
759} STMT_END
760
761#define PUSH_BYTE(utf8, s, byte) \
762STMT_START { \
763 if (utf8) { \
f7fe979e 764 const U8 au8 = (byte); \
64844641 765 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
766 } else *(U8 *)(s)++ = (byte); \
767} STMT_END
768
769/* Only to be used inside a loop (see the break) */
770#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
771STMT_START { \
772 STRLEN retlen; \
773 if (str >= end) break; \
774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
775 if (retlen == (STRLEN) -1 || retlen == 0) { \
776 *cur = '\0'; \
777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
778 } \
779 str += retlen; \
780} STMT_END
781
f7fe979e
AL
782static const char *_action( const tempsym_t* symptr )
783{
10edeb5d 784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
785}
786
206947d2 787/* Returns the sizeof() struct described by pat */
028d1f6d 788STATIC I32
f337b084 789S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 790{
f337b084 791 I32 total = 0;
206947d2 792
7918f24d
NC
793 PERL_ARGS_ASSERT_MEASURE_STRUCT;
794
49704364 795 while (next_symbol(symptr)) {
f337b084 796 I32 len;
f7fe979e 797 int size;
f337b084
TH
798
799 switch (symptr->howlen) {
fc241834 800 case e_star:
49704364 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 802 _action( symptr ) );
49704364 803 break;
f337b084
TH
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
49704364
WL
808 }
809
a7a3cfaa 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 811 if (!size) {
f7fe979e 812 int star;
80a13697
NC
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 818 _action( symptr ) );
28be1210
TH
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
80a13697 823 case '@':
28be1210 824 case '.':
80a13697
NC
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 830 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 831 _action( symptr ) );
80a13697
NC
832 case '%':
833 size = 0;
834 break;
835 case '(':
fc241834
RGS
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
80a13697
NC
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
f7fe979e 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
80a13697
NC
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
78d46eaa 885
80a13697
NC
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
78d46eaa
NC
889 break;
890 }
206947d2
IZ
891 }
892 total += len * size;
893 }
894 return total;
895}
896
49704364
WL
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
f7fe979e
AL
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 903{
7918f24d
NC
904 PERL_ARGS_ASSERT_GROUP_END;
905
49704364 906 while (patptr < patend) {
f7fe979e 907 const char c = *patptr++;
49704364
WL
908
909 if (isSPACE(c))
910 continue;
911 else if (c == ender)
912 return patptr-1;
913 else if (c == '#') {
914 while (patptr < patend && *patptr != '\n')
915 patptr++;
916 continue;
917 } else if (c == '(')
918 patptr = group_end(patptr, patend, ')') + 1;
919 else if (c == '[')
920 patptr = group_end(patptr, patend, ']') + 1;
18529408 921 }
49704364
WL
922 Perl_croak(aTHX_ "No group ending character '%c' found in template",
923 ender);
924 return 0;
18529408
IZ
925}
926
49704364
WL
927
928/* Convert unsigned decimal number to binary.
929 * Expects a pointer to the first digit and address of length variable
930 * Advances char pointer to 1st non-digit char and returns number
fc241834 931 */
f7fe979e
AL
932STATIC const char *
933S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
934{
935 I32 len = *patptr++ - '0';
7918f24d
NC
936
937 PERL_ARGS_ASSERT_GET_NUM;
938
49704364
WL
939 while (isDIGIT(*patptr)) {
940 if (len >= 0x7FFFFFFF/10)
941 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942 len = (len * 10) + (*patptr++ - '0');
943 }
944 *lenptr = len;
945 return patptr;
946}
947
948/* The marvellous template parsing routine: Using state stored in *symptr,
949 * locates next template code and count
950 */
951STATIC bool
f337b084 952S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 953{
f7fe979e 954 const char* patptr = symptr->patptr;
0bcc34c2 955 const char* const patend = symptr->patend;
49704364 956
7918f24d
NC
957 PERL_ARGS_ASSERT_NEXT_SYMBOL;
958
49704364
WL
959 symptr->flags &= ~FLAG_SLASH;
960
961 while (patptr < patend) {
962 if (isSPACE(*patptr))
963 patptr++;
964 else if (*patptr == '#') {
965 patptr++;
966 while (patptr < patend && *patptr != '\n')
967 patptr++;
968 if (patptr < patend)
969 patptr++;
970 } else {
fc241834 971 /* We should have found a template code */
49704364 972 I32 code = *patptr++ & 0xFF;
66c611c5 973 U32 inherited_modifiers = 0;
49704364
WL
974
975 if (code == ','){ /* grandfather in commas but with a warning */
976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977 symptr->flags |= FLAG_COMMA;
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 979 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
980 }
981 continue;
982 }
fc241834 983
49704364 984 /* for '(', skip to ')' */
fc241834 985 if (code == '(') {
49704364
WL
986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 988 _action( symptr ) );
49704364
WL
989 symptr->grpbeg = patptr;
990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 993 _action( symptr ) );
49704364
WL
994 }
995
66c611c5
MHM
996 /* look for group modifiers to inherit */
997 if (TYPE_ENDIANNESS(symptr->flags)) {
998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1000 }
1001
1109a392
MHM
1002 /* look for modifiers */
1003 while (patptr < patend) {
b7787f18
AL
1004 const char *allowed;
1005 I32 modifier;
1109a392
MHM
1006 switch (*patptr) {
1007 case '!':
1008 modifier = TYPE_IS_SHRIEKING;
7212898e 1009 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1010 break;
7212898e 1011#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1012 case '>':
1013 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1014 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1015 break;
1016 case '<':
1017 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1018 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1019 break;
f337b084 1020#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1021 default:
b7787f18
AL
1022 allowed = "";
1023 modifier = 0;
1109a392
MHM
1024 break;
1025 }
66c611c5 1026
1109a392
MHM
1027 if (modifier == 0)
1028 break;
66c611c5 1029
1109a392
MHM
1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1032 allowed, _action( symptr ) );
66c611c5
MHM
1033
1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038 TYPE_ENDIANNESS_MASK)
1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1040 *patptr, _action( symptr ) );
66c611c5 1041
041457d9 1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044 "Duplicate modifier '%c' after '%c' in %s",
1045 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1046 _action( symptr ) );
1109a392 1047 }
66c611c5 1048
1109a392
MHM
1049 code |= modifier;
1050 patptr++;
49704364
WL
1051 }
1052
66c611c5
MHM
1053 /* inherit modifiers */
1054 code |= inherited_modifiers;
1055
fc241834 1056 /* look for count and/or / */
49704364
WL
1057 if (patptr < patend) {
1058 if (isDIGIT(*patptr)) {
1059 patptr = get_num( patptr, &symptr->length );
1060 symptr->howlen = e_number;
1061
1062 } else if (*patptr == '*') {
1063 patptr++;
1064 symptr->howlen = e_star;
1065
1066 } else if (*patptr == '[') {
f7fe979e 1067 const char* lenptr = ++patptr;
49704364
WL
1068 symptr->howlen = e_number;
1069 patptr = group_end( patptr, patend, ']' ) + 1;
1070 /* what kind of [] is it? */
1071 if (isDIGIT(*lenptr)) {
1072 lenptr = get_num( lenptr, &symptr->length );
1073 if( *lenptr != ']' )
1074 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1075 _action( symptr ) );
49704364
WL
1076 } else {
1077 tempsym_t savsym = *symptr;
1078 symptr->patend = patptr-1;
1079 symptr->patptr = lenptr;
1080 savsym.length = measure_struct(symptr);
1081 *symptr = savsym;
1082 }
1083 } else {
1084 symptr->howlen = e_no_len;
1085 symptr->length = 1;
1086 }
1087
1088 /* try to find / */
1089 while (patptr < patend) {
1090 if (isSPACE(*patptr))
1091 patptr++;
1092 else if (*patptr == '#') {
1093 patptr++;
1094 while (patptr < patend && *patptr != '\n')
1095 patptr++;
1096 if (patptr < patend)
1097 patptr++;
1098 } else {
66c611c5 1099 if (*patptr == '/') {
49704364
WL
1100 symptr->flags |= FLAG_SLASH;
1101 patptr++;
66c611c5
MHM
1102 if (patptr < patend &&
1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1105 _action( symptr ) );
49704364
WL
1106 }
1107 break;
1108 }
18529408 1109 }
49704364
WL
1110 } else {
1111 /* at end - no count, no / */
1112 symptr->howlen = e_no_len;
1113 symptr->length = 1;
1114 }
1115
1116 symptr->code = code;
fc241834 1117 symptr->patptr = patptr;
49704364 1118 return TRUE;
18529408 1119 }
49704364 1120 }
fc241834 1121 symptr->patptr = patptr;
49704364 1122 return FALSE;
18529408
IZ
1123}
1124
18529408 1125/*
fc241834 1126 There is no way to cleanly handle the case where we should process the
08ca2aa3 1127 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1128 (e.g. estimates like strend-s as an upper bound for the number of
1129 characters left wouldn't work). So if we foresee the need of this
1130 (pattern starts with U or contains U0), we want to work on the encoded
1131 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1132 themselves if they need to do a lot of unpacks like this on it
1133*/
fc241834 1134STATIC bool
08ca2aa3
TH
1135need_utf8(const char *pat, const char *patend)
1136{
1137 bool first = TRUE;
7918f24d
NC
1138
1139 PERL_ARGS_ASSERT_NEED_UTF8;
1140
08ca2aa3
TH
1141 while (pat < patend) {
1142 if (pat[0] == '#') {
1143 pat++;
f7fe979e 1144 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1145 if (!pat) return FALSE;
1146 } else if (pat[0] == 'U') {
1147 if (first || pat[1] == '0') return TRUE;
1148 } else first = FALSE;
1149 pat++;
1150 }
1151 return FALSE;
1152}
1153
1154STATIC char
1155first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1156 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1157
08ca2aa3
TH
1158 while (pat < patend) {
1159 if (pat[0] != '#') return pat[0];
1160 pat++;
f7fe979e 1161 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1162 if (!pat) return 0;
1163 pat++;
1164 }
1165 return 0;
1166}
1167
1168/*
7accc089
JH
1169=for apidoc unpackstring
1170
608d3aed
WL
1171The engine implementing unpack() Perl function. C<unpackstring> puts the
1172extracted list items on the stack and returns the number of elements.
1173Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1174
1175=cut */
1176
1177I32
f7fe979e 1178Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1179{
f7fe979e 1180 tempsym_t sym;
08ca2aa3 1181
7918f24d
NC
1182 PERL_ARGS_ASSERT_UNPACKSTRING;
1183
f337b084 1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1185 else if (need_utf8(pat, patend)) {
1186 /* We probably should try to avoid this in case a scalar context call
1187 wouldn't get to the "U0" */
1188 STRLEN len = strend - s;
230e1fce 1189 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1190 SAVEFREEPV(s);
1191 strend = s + len;
f337b084 1192 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1193 }
1194
f337b084
TH
1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1197
f7fe979e 1198 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1199
1200 return unpack_rec(&sym, s, s, strend, NULL );
1201}
1202
4136a0f7 1203STATIC I32
f7fe979e 1204S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1205{
27da23d5 1206 dVAR; dSP;
18529408 1207 SV *sv;
f7fe979e 1208 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1209 howlen_t howlen;
a6ec74c1 1210 I32 checksum = 0;
92d41999 1211 UV cuv = 0;
a6ec74c1 1212 NV cdouble = 0.0;
f337b084 1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1214 bool beyond = FALSE;
21c16052 1215 bool explicit_length;
9e27e96a 1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1218
1219 PERL_ARGS_ASSERT_UNPACK_REC;
1220
28be1210 1221 symptr->strbeg = s - strbeg;
49704364 1222
49704364 1223 while (next_symbol(symptr)) {
a7a3cfaa 1224 packprops_t props;
9e27e96a 1225 I32 len;
f337b084 1226 I32 datumtype = symptr->code;
206947d2 1227 /* do first one only unless in list context
08ca2aa3 1228 / is implemented by unpacking the count, then popping it from the
206947d2 1229 stack, so must check that we're not in the middle of a / */
49704364 1230 if ( unpack_only_one
206947d2 1231 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1232 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1233 break;
49704364 1234
f337b084 1235 switch (howlen = symptr->howlen) {
fc241834
RGS
1236 case e_star:
1237 len = strend - strbeg; /* long enough */
49704364 1238 break;
f337b084
TH
1239 default:
1240 /* e_no_len and e_number */
1241 len = symptr->length;
1242 break;
49704364 1243 }
18529408 1244
21c16052 1245 explicit_length = TRUE;
a6ec74c1 1246 redo_switch:
49704364 1247 beyond = s >= strend;
a7a3cfaa
TH
1248
1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1250 if (props) {
1251 /* props nonzero means we can process this letter. */
9e27e96a
AL
1252 const long size = props & PACK_SIZE_MASK;
1253 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1254 if (len > howmany)
1255 len = howmany;
1256
1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 if (len && unpack_only_one) len = 1;
1259 EXTEND(SP, len);
1260 EXTEND_MORTAL(len);
78d46eaa
NC
1261 }
1262 }
a7a3cfaa 1263
1109a392 1264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1265 default:
1109a392 1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1267
a6ec74c1 1268 case '%':
49704364 1269 if (howlen == e_no_len)
18529408 1270 len = 16; /* len is not specified */
a6ec74c1 1271 checksum = len;
92d41999 1272 cuv = 0;
a6ec74c1 1273 cdouble = 0;
18529408 1274 continue;
a6ec74c1 1275 break;
18529408
IZ
1276 case '(':
1277 {
49704364 1278 tempsym_t savsym = *symptr;
9e27e96a 1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1280 symptr->flags |= group_modifiers;
49704364 1281 symptr->patend = savsym.grpend;
28be1210 1282 symptr->previous = &savsym;
49704364 1283 symptr->level++;
18529408 1284 PUTBACK;
c6f750d1 1285 if (len && unpack_only_one) len = 1;
18529408 1286 while (len--) {
49704364 1287 symptr->patptr = savsym.grpbeg;
f337b084
TH
1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1289 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1290 unpack_rec(symptr, s, strbeg, strend, &s);
1291 if (s == strend && savsym.howlen == e_star)
49704364 1292 break; /* No way to continue */
18529408
IZ
1293 }
1294 SPAGAIN;
28be1210 1295 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1296 *symptr = savsym;
18529408
IZ
1297 break;
1298 }
28be1210
TH
1299#ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '.' | TYPE_IS_SHRIEKING:
1301#endif
1302 case '.': {
9e27e96a 1303 const char *from;
28be1210
TH
1304 SV *sv;
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1307#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1308 const bool u8 = utf8;
28be1210
TH
1309#endif
1310 if (howlen == e_star) from = strbeg;
1311 else if (len <= 0) from = s;
1312 else {
1313 tempsym_t *group = symptr;
1314
1315 while (--len && group) group = group->previous;
1316 from = group ? strbeg + group->strbeg : strbeg;
1317 }
1318 sv = from <= s ?
00646304
CB
1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1321 mXPUSHs(sv);
28be1210
TH
1322 break;
1323 }
1324#ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 case '@' | TYPE_IS_SHRIEKING:
1326#endif
a6ec74c1 1327 case '@':
28be1210
TH
1328 s = strbeg + symptr->strbeg;
1329#ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1331#else /* PERL_PACK_CAN_SHRIEKSIGN */
1332 if (utf8)
1333#endif
1334 {
08ca2aa3
TH
1335 while (len > 0) {
1336 if (s >= strend)
1337 Perl_croak(aTHX_ "'@' outside of string in unpack");
1338 s += UTF8SKIP(s);
1339 len--;
1340 }
1341 if (s > strend)
1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1343 } else {
28be1210 1344 if (strend-s < len)
fc241834 1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1346 s += len;
08ca2aa3 1347 }
a6ec74c1 1348 break;
62f95557
IZ
1349 case 'X' | TYPE_IS_SHRIEKING:
1350 if (!len) /* Avoid division by 0 */
1351 len = 1;
08ca2aa3 1352 if (utf8) {
f7fe979e 1353 const char *hop, *last;
f337b084
TH
1354 I32 l = len;
1355 hop = last = strbeg;
1356 while (hop < s) {
1357 hop += UTF8SKIP(hop);
1358 if (--l == 0) {
08ca2aa3 1359 last = hop;
f337b084
TH
1360 l = len;
1361 }
fc241834 1362 }
f337b084
TH
1363 if (last > s)
1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1365 s = last;
1366 break;
f337b084
TH
1367 }
1368 len = (s - strbeg) % len;
62f95557 1369 /* FALL THROUGH */
a6ec74c1 1370 case 'X':
08ca2aa3
TH
1371 if (utf8) {
1372 while (len > 0) {
1373 if (s <= strbeg)
1374 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1375 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1376 if (s <= strbeg)
1377 Perl_croak(aTHX_ "'X' outside of string in unpack");
1378 }
1379 len--;
1380 }
1381 } else {
fc241834
RGS
1382 if (len > s - strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1384 s -= len;
08ca2aa3 1385 }
a6ec74c1 1386 break;
9e27e96a
AL
1387 case 'x' | TYPE_IS_SHRIEKING: {
1388 I32 ai32;
62f95557
IZ
1389 if (!len) /* Avoid division by 0 */
1390 len = 1;
230e1fce
NC
1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1393 if (ai32 == 0) break;
1394 len -= ai32;
9e27e96a 1395 }
62f95557 1396 /* FALL THROUGH */
a6ec74c1 1397 case 'x':
08ca2aa3
TH
1398 if (utf8) {
1399 while (len>0) {
1400 if (s >= strend)
1401 Perl_croak(aTHX_ "'x' outside of string in unpack");
1402 s += UTF8SKIP(s);
1403 len--;
1404 }
1405 } else {
fc241834
RGS
1406 if (len > strend - s)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1408 s += len;
f337b084 1409 }
a6ec74c1
JH
1410 break;
1411 case '/':
49704364
WL
1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1413 break;
a6ec74c1
JH
1414 case 'A':
1415 case 'Z':
1416 case 'a':
08ca2aa3
TH
1417 if (checksum) {
1418 /* Preliminary length estimate is assumed done in 'W' */
1419 if (len > strend - s) len = strend - s;
1420 goto W_checksum;
1421 }
1422 if (utf8) {
1423 I32 l;
f7fe979e 1424 const char *hop;
08ca2aa3
TH
1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 if (hop >= strend) {
1427 if (hop > strend)
1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1429 break;
fc241834 1430 }
a6ec74c1 1431 }
08ca2aa3
TH
1432 if (hop > strend)
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 len = hop - s;
1435 } else if (len > strend - s)
1436 len = strend - s;
1437
1438 if (datumtype == 'Z') {
1439 /* 'Z' strips stuff after first null */
f7fe979e 1440 const char *ptr, *end;
f337b084
TH
1441 end = s + len;
1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1443 sv = newSVpvn(s, ptr-s);
1444 if (howlen == e_star) /* exact for 'Z*' */
1445 len = ptr-s + (ptr != strend ? 1 : 0);
1446 } else if (datumtype == 'A') {
1447 /* 'A' strips both nulls and spaces */
f7fe979e 1448 const char *ptr;
18bdf90a
TH
1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 for (ptr = s+len-1; ptr >= s; ptr--)
1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1452 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1453 if (ptr >= s) ptr += UTF8SKIP(ptr);
1454 else ptr++;
28be1210 1455 if (ptr > s+len)
18bdf90a
TH
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 } else {
1458 for (ptr = s+len-1; ptr >= s; ptr--)
1459 if (*ptr != 0 && !isSPACE(*ptr)) break;
1460 ptr++;
1461 }
08ca2aa3
TH
1462 sv = newSVpvn(s, ptr-s);
1463 } else sv = newSVpvn(s, len);
1464
1465 if (utf8) {
1466 SvUTF8_on(sv);
1467 /* Undo any upgrade done due to need_utf8() */
f337b084 1468 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1469 sv_utf8_downgrade(sv, 0);
a6ec74c1 1470 }
6e449a3a 1471 mXPUSHs(sv);
08ca2aa3 1472 s += len;
a6ec74c1
JH
1473 break;
1474 case 'B':
08ca2aa3
TH
1475 case 'b': {
1476 char *str;
49704364 1477 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1478 len = (strend - s) * 8;
1479 if (checksum) {
1480 if (!PL_bitcount) {
08ca2aa3 1481 int bits;
a02a5408 1482 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1483 for (bits = 1; bits < 256; bits++) {
1484 if (bits & 1) PL_bitcount[bits]++;
1485 if (bits & 2) PL_bitcount[bits]++;
1486 if (bits & 4) PL_bitcount[bits]++;
1487 if (bits & 8) PL_bitcount[bits]++;
1488 if (bits & 16) PL_bitcount[bits]++;
1489 if (bits & 32) PL_bitcount[bits]++;
1490 if (bits & 64) PL_bitcount[bits]++;
1491 if (bits & 128) PL_bitcount[bits]++;
1492 }
1493 }
f337b084 1494 if (utf8)
08ca2aa3 1495 while (len >= 8 && s < strend) {
f337b084 1496 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1497 len -= 8;
1498 }
f337b084 1499 else
fc241834 1500 while (len >= 8) {
08ca2aa3 1501 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1502 len -= 8;
1503 }
08ca2aa3
TH
1504 if (len && s < strend) {
1505 U8 bits;
f337b084
TH
1506 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1507 if (datumtype == 'b')
a6ec74c1 1508 while (len-- > 0) {
92d41999 1509 if (bits & 1) cuv++;
a6ec74c1
JH
1510 bits >>= 1;
1511 }
f337b084 1512 else
a6ec74c1 1513 while (len-- > 0) {
08ca2aa3 1514 if (bits & 0x80) cuv++;
a6ec74c1
JH
1515 bits <<= 1;
1516 }
fc241834 1517 }
a6ec74c1
JH
1518 break;
1519 }
08ca2aa3 1520
561b68a9 1521 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1522 SvPOK_on(sv);
1523 str = SvPVX(sv);
1524 if (datumtype == 'b') {
f337b084 1525 U8 bits = 0;
f7fe979e 1526 const I32 ai32 = len;
08ca2aa3
TH
1527 for (len = 0; len < ai32; len++) {
1528 if (len & 7) bits >>= 1;
1529 else if (utf8) {
1530 if (s >= strend) break;
f337b084 1531 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1532 } else bits = *(U8 *) s++;
1533 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1534 }
08ca2aa3 1535 } else {
f337b084 1536 U8 bits = 0;
f7fe979e 1537 const I32 ai32 = len;
08ca2aa3
TH
1538 for (len = 0; len < ai32; len++) {
1539 if (len & 7) bits <<= 1;
1540 else if (utf8) {
1541 if (s >= strend) break;
f337b084 1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1543 } else bits = *(U8 *) s++;
1544 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1545 }
1546 }
1547 *str = '\0';
aa07b2f6 1548 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1549 XPUSHs(sv);
a6ec74c1 1550 break;
08ca2aa3 1551 }
a6ec74c1 1552 case 'H':
08ca2aa3
TH
1553 case 'h': {
1554 char *str;
fc241834 1555 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1556 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1557 len = (strend - s) * 2;
561b68a9 1558 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1559 SvPOK_on(sv);
1560 str = SvPVX(sv);
1561 if (datumtype == 'h') {
f337b084 1562 U8 bits = 0;
9e27e96a 1563 I32 ai32 = len;
fc241834
RGS
1564 for (len = 0; len < ai32; len++) {
1565 if (len & 1) bits >>= 4;
1566 else if (utf8) {
1567 if (s >= strend) break;
f337b084 1568 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1569 } else bits = * (U8 *) s++;
a6ec74c1
JH
1570 *str++ = PL_hexdigit[bits & 15];
1571 }
08ca2aa3 1572 } else {
f337b084 1573 U8 bits = 0;
f7fe979e 1574 const I32 ai32 = len;
08ca2aa3
TH
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits <<= 4;
1577 else if (utf8) {
1578 if (s >= strend) break;
f337b084 1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1580 } else bits = *(U8 *) s++;
a6ec74c1
JH
1581 *str++ = PL_hexdigit[(bits >> 4) & 15];
1582 }
1583 }
1584 *str = '\0';
aa07b2f6 1585 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1586 XPUSHs(sv);
a6ec74c1 1587 break;
08ca2aa3 1588 }
1651fc44
ML
1589 case 'C':
1590 if (len == 0) {
1591 if (explicit_length)
1592 /* Switch to "character" mode */
1593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1594 break;
1595 }
1596 /* FALL THROUGH */
a6ec74c1 1597 case 'c':
1651fc44
ML
1598 while (len-- > 0 && s < strend) {
1599 int aint;
1600 if (utf8)
1601 {
1602 STRLEN retlen;
1603 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1604 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1605 if (retlen == (STRLEN) -1 || retlen == 0)
1606 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1607 s += retlen;
1608 }
1609 else
1610 aint = *(U8 *)(s)++;
1611 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1612 aint -= 256;
08ca2aa3 1613 if (!checksum)
6e449a3a 1614 mPUSHi(aint);
73cb7263
NC
1615 else if (checksum > bits_in_uv)
1616 cdouble += (NV)aint;
1617 else
1618 cuv += aint;
a6ec74c1
JH
1619 }
1620 break;
08ca2aa3
TH
1621 case 'W':
1622 W_checksum:
1651fc44 1623 if (utf8) {
08ca2aa3 1624 while (len-- > 0 && s < strend) {
08ca2aa3 1625 STRLEN retlen;
f7fe979e 1626 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1627 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1628 if (retlen == (STRLEN) -1 || retlen == 0)
1629 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1630 s += retlen;
1631 if (!checksum)
6e449a3a 1632 mPUSHu(val);
08ca2aa3
TH
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV) val;
d6d3e8bd 1635 else
08ca2aa3 1636 cuv += val;
fc241834 1637 }
08ca2aa3 1638 } else if (!checksum)
a6ec74c1 1639 while (len-- > 0) {
f7fe979e 1640 const U8 ch = *(U8 *) s++;
6e449a3a 1641 mPUSHu(ch);
a6ec74c1 1642 }
08ca2aa3
TH
1643 else if (checksum > bits_in_uv)
1644 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1645 else
1646 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1647 break;
1648 case 'U':
35bcd338 1649 if (len == 0) {
08ca2aa3
TH
1650 if (explicit_length) {
1651 /* Switch to "bytes in UTF-8" mode */
f337b084 1652 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1653 else
1654 /* Should be impossible due to the need_utf8() test */
1655 Perl_croak(aTHX_ "U0 mode on a byte string");
1656 }
35bcd338
JH
1657 break;
1658 }
08ca2aa3 1659 if (len > strend - s) len = strend - s;
fc241834 1660 if (!checksum) {
08ca2aa3
TH
1661 if (len && unpack_only_one) len = 1;
1662 EXTEND(SP, len);
1663 EXTEND_MORTAL(len);
fc241834 1664 }
08ca2aa3
TH
1665 while (len-- > 0 && s < strend) {
1666 STRLEN retlen;
1667 UV auv;
1668 if (utf8) {
1669 U8 result[UTF8_MAXLEN];
f7fe979e 1670 const char *ptr = s;
08ca2aa3 1671 STRLEN len;
08ca2aa3
TH
1672 /* Bug: warns about bad utf8 even if we are short on bytes
1673 and will break out of the loop */
230e1fce
NC
1674 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1675 'U'))
08ca2aa3
TH
1676 break;
1677 len = UTF8SKIP(result);
fc241834 1678 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1679 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1680 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1681 s = ptr;
1682 } else {
1683 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 if (retlen == (STRLEN) -1 || retlen == 0)
1685 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1686 s += retlen;
1687 }
1688 if (!checksum)
6e449a3a 1689 mPUSHu(auv);
73cb7263 1690 else if (checksum > bits_in_uv)
08ca2aa3 1691 cdouble += (NV) auv;
73cb7263 1692 else
08ca2aa3 1693 cuv += auv;
a6ec74c1
JH
1694 }
1695 break;
49704364
WL
1696 case 's' | TYPE_IS_SHRIEKING:
1697#if SHORTSIZE != SIZE16
73cb7263 1698 while (len-- > 0) {
08ca2aa3 1699 short ashort;
f337b084
TH
1700 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1701 DO_BO_UNPACK(ashort, s);
08ca2aa3 1702 if (!checksum)
6e449a3a 1703 mPUSHi(ashort);
73cb7263
NC
1704 else if (checksum > bits_in_uv)
1705 cdouble += (NV)ashort;
1706 else
1707 cuv += ashort;
49704364
WL
1708 }
1709 break;
1710#else
1711 /* Fallthrough! */
a6ec74c1 1712#endif
49704364 1713 case 's':
73cb7263 1714 while (len-- > 0) {
08ca2aa3
TH
1715 I16 ai16;
1716
1717#if U16SIZE > SIZE16
1718 ai16 = 0;
1719#endif
f337b084 1720 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1721 DO_BO_UNPACK(ai16, 16);
1109a392 1722#if U16SIZE > SIZE16
73cb7263
NC
1723 if (ai16 > 32767)
1724 ai16 -= 65536;
a6ec74c1 1725#endif
08ca2aa3 1726 if (!checksum)
6e449a3a 1727 mPUSHi(ai16);
73cb7263
NC
1728 else if (checksum > bits_in_uv)
1729 cdouble += (NV)ai16;
1730 else
1731 cuv += ai16;
a6ec74c1
JH
1732 }
1733 break;
49704364
WL
1734 case 'S' | TYPE_IS_SHRIEKING:
1735#if SHORTSIZE != SIZE16
73cb7263 1736 while (len-- > 0) {
08ca2aa3 1737 unsigned short aushort;
f337b084
TH
1738 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1739 DO_BO_UNPACK(aushort, s);
08ca2aa3 1740 if (!checksum)
6e449a3a 1741 mPUSHu(aushort);
73cb7263
NC
1742 else if (checksum > bits_in_uv)
1743 cdouble += (NV)aushort;
1744 else
1745 cuv += aushort;
49704364
WL
1746 }
1747 break;
1748#else
1749 /* Fallhrough! */
1750#endif
a6ec74c1
JH
1751 case 'v':
1752 case 'n':
1753 case 'S':
73cb7263 1754 while (len-- > 0) {
08ca2aa3
TH
1755 U16 au16;
1756#if U16SIZE > SIZE16
1757 au16 = 0;
1758#endif
f337b084 1759 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1760 DO_BO_UNPACK(au16, 16);
a6ec74c1 1761#ifdef HAS_NTOHS
73cb7263
NC
1762 if (datumtype == 'n')
1763 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1764#endif
1765#ifdef HAS_VTOHS
73cb7263
NC
1766 if (datumtype == 'v')
1767 au16 = vtohs(au16);
a6ec74c1 1768#endif
08ca2aa3 1769 if (!checksum)
6e449a3a 1770 mPUSHu(au16);
73cb7263 1771 else if (checksum > bits_in_uv)
f337b084 1772 cdouble += (NV) au16;
73cb7263
NC
1773 else
1774 cuv += au16;
a6ec74c1
JH
1775 }
1776 break;
7212898e 1777#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1778 case 'v' | TYPE_IS_SHRIEKING:
1779 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1780 while (len-- > 0) {
08ca2aa3
TH
1781 I16 ai16;
1782# if U16SIZE > SIZE16
1783 ai16 = 0;
1784# endif
f337b084 1785 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1786# ifdef HAS_NTOHS
73cb7263 1787 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1788 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1789# endif /* HAS_NTOHS */
1790# ifdef HAS_VTOHS
73cb7263 1791 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1792 ai16 = (I16) vtohs((U16) ai16);
1793# endif /* HAS_VTOHS */
1794 if (!checksum)
6e449a3a 1795 mPUSHi(ai16);
73cb7263 1796 else if (checksum > bits_in_uv)
08ca2aa3 1797 cdouble += (NV) ai16;
73cb7263
NC
1798 else
1799 cuv += ai16;
068bd2e7
MHM
1800 }
1801 break;
08ca2aa3 1802#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1803 case 'i':
49704364 1804 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1805 while (len-- > 0) {
08ca2aa3 1806 int aint;
f337b084
TH
1807 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1808 DO_BO_UNPACK(aint, i);
08ca2aa3 1809 if (!checksum)
6e449a3a 1810 mPUSHi(aint);
73cb7263
NC
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)aint;
1813 else
1814 cuv += aint;
a6ec74c1
JH
1815 }
1816 break;
1817 case 'I':
49704364 1818 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1819 while (len-- > 0) {
08ca2aa3 1820 unsigned int auint;
f337b084
TH
1821 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1822 DO_BO_UNPACK(auint, i);
08ca2aa3 1823 if (!checksum)
6e449a3a 1824 mPUSHu(auint);
73cb7263
NC
1825 else if (checksum > bits_in_uv)
1826 cdouble += (NV)auint;
1827 else
1828 cuv += auint;
a6ec74c1
JH
1829 }
1830 break;
92d41999 1831 case 'j':
73cb7263 1832 while (len-- > 0) {
08ca2aa3 1833 IV aiv;
f337b084 1834 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1835#if IVSIZE == INTSIZE
f337b084 1836 DO_BO_UNPACK(aiv, i);
1109a392 1837#elif IVSIZE == LONGSIZE
f337b084 1838 DO_BO_UNPACK(aiv, l);
1109a392 1839#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1840 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1841#else
1842 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1843#endif
08ca2aa3 1844 if (!checksum)
6e449a3a 1845 mPUSHi(aiv);
73cb7263
NC
1846 else if (checksum > bits_in_uv)
1847 cdouble += (NV)aiv;
1848 else
1849 cuv += aiv;
92d41999
JH
1850 }
1851 break;
1852 case 'J':
73cb7263 1853 while (len-- > 0) {
08ca2aa3 1854 UV auv;
f337b084 1855 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1856#if IVSIZE == INTSIZE
f337b084 1857 DO_BO_UNPACK(auv, i);
08ca2aa3 1858#elif IVSIZE == LONGSIZE
f337b084 1859 DO_BO_UNPACK(auv, l);
08ca2aa3 1860#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1861 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1862#else
1863 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1864#endif
08ca2aa3 1865 if (!checksum)
6e449a3a 1866 mPUSHu(auv);
73cb7263
NC
1867 else if (checksum > bits_in_uv)
1868 cdouble += (NV)auv;
1869 else
1870 cuv += auv;
92d41999
JH
1871 }
1872 break;
49704364
WL
1873 case 'l' | TYPE_IS_SHRIEKING:
1874#if LONGSIZE != SIZE32
73cb7263 1875 while (len-- > 0) {
08ca2aa3 1876 long along;
f337b084
TH
1877 SHIFT_VAR(utf8, s, strend, along, datumtype);
1878 DO_BO_UNPACK(along, l);
08ca2aa3 1879 if (!checksum)
6e449a3a 1880 mPUSHi(along);
73cb7263
NC
1881 else if (checksum > bits_in_uv)
1882 cdouble += (NV)along;
1883 else
1884 cuv += along;
49704364
WL
1885 }
1886 break;
1887#else
1888 /* Fallthrough! */
a6ec74c1 1889#endif
49704364 1890 case 'l':
73cb7263 1891 while (len-- > 0) {
08ca2aa3
TH
1892 I32 ai32;
1893#if U32SIZE > SIZE32
1894 ai32 = 0;
1895#endif
f337b084 1896 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1897 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1898#if U32SIZE > SIZE32
08ca2aa3 1899 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1900#endif
08ca2aa3 1901 if (!checksum)
6e449a3a 1902 mPUSHi(ai32);
73cb7263
NC
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)ai32;
1905 else
1906 cuv += ai32;
a6ec74c1
JH
1907 }
1908 break;
49704364
WL
1909 case 'L' | TYPE_IS_SHRIEKING:
1910#if LONGSIZE != SIZE32
73cb7263 1911 while (len-- > 0) {
08ca2aa3 1912 unsigned long aulong;
f337b084
TH
1913 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1914 DO_BO_UNPACK(aulong, l);
08ca2aa3 1915 if (!checksum)
6e449a3a 1916 mPUSHu(aulong);
73cb7263
NC
1917 else if (checksum > bits_in_uv)
1918 cdouble += (NV)aulong;
1919 else
1920 cuv += aulong;
49704364
WL
1921 }
1922 break;
1923#else
1924 /* Fall through! */
1925#endif
a6ec74c1
JH
1926 case 'V':
1927 case 'N':
1928 case 'L':
73cb7263 1929 while (len-- > 0) {
08ca2aa3
TH
1930 U32 au32;
1931#if U32SIZE > SIZE32
1932 au32 = 0;
1933#endif
f337b084 1934 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1935 DO_BO_UNPACK(au32, 32);
a6ec74c1 1936#ifdef HAS_NTOHL
73cb7263
NC
1937 if (datumtype == 'N')
1938 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1939#endif
1940#ifdef HAS_VTOHL
73cb7263
NC
1941 if (datumtype == 'V')
1942 au32 = vtohl(au32);
a6ec74c1 1943#endif
08ca2aa3 1944 if (!checksum)
6e449a3a 1945 mPUSHu(au32);
fc241834
RGS
1946 else if (checksum > bits_in_uv)
1947 cdouble += (NV)au32;
1948 else
1949 cuv += au32;
a6ec74c1
JH
1950 }
1951 break;
7212898e 1952#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1953 case 'V' | TYPE_IS_SHRIEKING:
1954 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1955 while (len-- > 0) {
08ca2aa3
TH
1956 I32 ai32;
1957# if U32SIZE > SIZE32
1958 ai32 = 0;
1959# endif
f337b084 1960 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1961# ifdef HAS_NTOHL
73cb7263
NC
1962 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1963 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1964# endif
1965# ifdef HAS_VTOHL
73cb7263
NC
1966 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1967 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1968# endif
1969 if (!checksum)
6e449a3a 1970 mPUSHi(ai32);
73cb7263
NC
1971 else if (checksum > bits_in_uv)
1972 cdouble += (NV)ai32;
1973 else
1974 cuv += ai32;
068bd2e7
MHM
1975 }
1976 break;
08ca2aa3 1977#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1978 case 'p':
a6ec74c1 1979 while (len-- > 0) {
f7fe979e 1980 const char *aptr;
f337b084 1981 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1982 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1983 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1984 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1985 }
1986 break;
1987 case 'w':
a6ec74c1
JH
1988 {
1989 UV auv = 0;
1990 U32 bytes = 0;
fc241834 1991
08ca2aa3
TH
1992 while (len > 0 && s < strend) {
1993 U8 ch;
f337b084 1994 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1995 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1996 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1997 if (ch < 0x80) {
a6ec74c1 1998 bytes = 0;
6e449a3a 1999 mPUSHu(auv);
a6ec74c1
JH
2000 len--;
2001 auv = 0;
08ca2aa3 2002 continue;
a6ec74c1 2003 }
08ca2aa3 2004 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2005 const char *t;
a6ec74c1 2006
f5992bc4 2007 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2008 while (s < strend) {
f337b084 2009 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2010 sv = mul128(sv, (U8)(ch & 0x7f));
2011 if (!(ch & 0x80)) {
a6ec74c1
JH
2012 bytes = 0;
2013 break;
2014 }
2015 }
10516c54 2016 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2017 while (*t == '0')
2018 t++;
2019 sv_chop(sv, t);
6e449a3a 2020 mPUSHs(sv);
a6ec74c1
JH
2021 len--;
2022 auv = 0;
2023 }
2024 }
2025 if ((s >= strend) && bytes)
49704364 2026 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2027 }
2028 break;
2029 case 'P':
49704364
WL
2030 if (symptr->howlen == e_star)
2031 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2032 EXTEND(SP, 1);
2d3e0934 2033 if (s + sizeof(char*) <= strend) {
08ca2aa3 2034 char *aptr;
f337b084 2035 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2036 DO_BO_UNPACK_PC(aptr);
fc241834 2037 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2038 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2039 }
a6ec74c1
JH
2040 break;
2041#ifdef HAS_QUAD
2042 case 'q':
73cb7263 2043 while (len-- > 0) {
08ca2aa3 2044 Quad_t aquad;
f337b084
TH
2045 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2046 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2047 if (!checksum)
6e449a3a
MHM
2048 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2049 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2050 else if (checksum > bits_in_uv)
2051 cdouble += (NV)aquad;
2052 else
2053 cuv += aquad;
2054 }
a6ec74c1
JH
2055 break;
2056 case 'Q':
73cb7263 2057 while (len-- > 0) {
08ca2aa3 2058 Uquad_t auquad;
f337b084
TH
2059 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2060 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2061 if (!checksum)
6e449a3a
MHM
2062 mPUSHs(auquad <= UV_MAX ?
2063 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2064 else if (checksum > bits_in_uv)
2065 cdouble += (NV)auquad;
2066 else
2067 cuv += auquad;
a6ec74c1
JH
2068 }
2069 break;
08ca2aa3 2070#endif /* HAS_QUAD */
a6ec74c1
JH
2071 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2072 case 'f':
73cb7263 2073 while (len-- > 0) {
08ca2aa3 2074 float afloat;
f337b084 2075 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2076 DO_BO_UNPACK_N(afloat, float);
2077 if (!checksum)
6e449a3a 2078 mPUSHn(afloat);
08ca2aa3 2079 else
73cb7263 2080 cdouble += afloat;
fc241834 2081 }
a6ec74c1
JH
2082 break;
2083 case 'd':
73cb7263 2084 while (len-- > 0) {
08ca2aa3 2085 double adouble;
f337b084 2086 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2087 DO_BO_UNPACK_N(adouble, double);
2088 if (!checksum)
6e449a3a 2089 mPUSHn(adouble);
08ca2aa3 2090 else
73cb7263 2091 cdouble += adouble;
fc241834 2092 }
a6ec74c1 2093 break;
92d41999 2094 case 'F':
73cb7263 2095 while (len-- > 0) {
08ca2aa3 2096 NV anv;
f337b084 2097 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2098 DO_BO_UNPACK_N(anv, NV);
2099 if (!checksum)
6e449a3a 2100 mPUSHn(anv);
08ca2aa3 2101 else
73cb7263 2102 cdouble += anv;
fc241834 2103 }
92d41999
JH
2104 break;
2105#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2106 case 'D':
73cb7263 2107 while (len-- > 0) {
08ca2aa3 2108 long double aldouble;
f337b084 2109 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2110 DO_BO_UNPACK_N(aldouble, long double);
2111 if (!checksum)
6e449a3a 2112 mPUSHn(aldouble);
08ca2aa3
TH
2113 else
2114 cdouble += aldouble;
92d41999
JH
2115 }
2116 break;
2117#endif
a6ec74c1 2118 case 'u':
08ca2aa3 2119 {
f7fe979e 2120 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2121 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2122 if (l) SvPOK_on(sv);
2123 }
2124 if (utf8) {
2125 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2126 I32 a, b, c, d;
db187877 2127 char hunk[3];
08ca2aa3 2128
08ca2aa3
TH
2129 while (len > 0) {
2130 next_uni_uu(aTHX_ &s, strend, &a);
2131 next_uni_uu(aTHX_ &s, strend, &b);
2132 next_uni_uu(aTHX_ &s, strend, &c);
2133 next_uni_uu(aTHX_ &s, strend, &d);
2134 hunk[0] = (char)((a << 2) | (b >> 4));
2135 hunk[1] = (char)((b << 4) | (c >> 2));
2136 hunk[2] = (char)((c << 6) | d);
2137 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2138 len -= 3;
2139 }
2140 if (s < strend) {
f7fe979e
AL
2141 if (*s == '\n') {
2142 s++;
2143 }
08ca2aa3
TH
2144 else {
2145 /* possible checksum byte */
f7fe979e
AL
2146 const char *skip = s+UTF8SKIP(s);
2147 if (skip < strend && *skip == '\n')
2148 s = skip+1;
08ca2aa3
TH
2149 }
2150 }
2151 }
2152 } else {
fc241834
RGS
2153 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2154 I32 a, b, c, d;
db187877 2155 char hunk[3];
a6ec74c1 2156
fc241834
RGS
2157 len = PL_uudmap[*(U8*)s++] & 077;
2158 while (len > 0) {
2159 if (s < strend && ISUUCHAR(*s))
2160 a = PL_uudmap[*(U8*)s++] & 077;
2161 else
2162 a = 0;
2163 if (s < strend && ISUUCHAR(*s))
2164 b = PL_uudmap[*(U8*)s++] & 077;
2165 else
2166 b = 0;
2167 if (s < strend && ISUUCHAR(*s))
2168 c = PL_uudmap[*(U8*)s++] & 077;
2169 else
2170 c = 0;
2171 if (s < strend && ISUUCHAR(*s))
2172 d = PL_uudmap[*(U8*)s++] & 077;
2173 else
2174 d = 0;
2175 hunk[0] = (char)((a << 2) | (b >> 4));
2176 hunk[1] = (char)((b << 4) | (c >> 2));
2177 hunk[2] = (char)((c << 6) | d);
2178 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2179 len -= 3;
2180 }
2181 if (*s == '\n')
2182 s++;
2183 else /* possible checksum byte */
2184 if (s + 1 < strend && s[1] == '\n')
2185 s += 2;
a6ec74c1 2186 }
08ca2aa3
TH
2187 }
2188 XPUSHs(sv);
a6ec74c1
JH
2189 break;
2190 }
49704364 2191
a6ec74c1 2192 if (checksum) {
1109a392 2193 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2194 (checksum > bits_in_uv &&
08ca2aa3
TH
2195 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2196 NV trouble, anv;
a6ec74c1 2197
08ca2aa3 2198 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2199 while (checksum >= 16) {
2200 checksum -= 16;
08ca2aa3 2201 anv *= 65536.0;
a6ec74c1 2202 }
a6ec74c1 2203 while (cdouble < 0.0)
08ca2aa3
TH
2204 cdouble += anv;
2205 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2206 sv = newSVnv(cdouble);
a6ec74c1
JH
2207 }
2208 else {
fa8ec7c1
NC
2209 if (checksum < bits_in_uv) {
2210 UV mask = ((UV)1 << checksum) - 1;
92d41999 2211 cuv &= mask;
a6ec74c1 2212 }
c4c5f44a 2213 sv = newSVuv(cuv);
a6ec74c1 2214 }
6e449a3a 2215 mXPUSHs(sv);
a6ec74c1
JH
2216 checksum = 0;
2217 }
fc241834 2218
49704364
WL
2219 if (symptr->flags & FLAG_SLASH){
2220 if (SP - PL_stack_base - start_sp_offset <= 0)
2221 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2222 if( next_symbol(symptr) ){
2223 if( symptr->howlen == e_number )
2224 Perl_croak(aTHX_ "Count after length/code in unpack" );
2225 if( beyond ){
2226 /* ...end of char buffer then no decent length available */
2227 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2228 } else {
2229 /* take top of stack (hope it's numeric) */
2230 len = POPi;
2231 if( len < 0 )
2232 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2233 }
2234 } else {
2235 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2236 }
2237 datumtype = symptr->code;
21c16052 2238 explicit_length = FALSE;
49704364
WL
2239 goto redo_switch;
2240 }
a6ec74c1 2241 }
49704364 2242
18529408
IZ
2243 if (new_s)
2244 *new_s = s;
2245 PUTBACK;
2246 return SP - PL_stack_base - start_sp_offset;
2247}
2248
2249PP(pp_unpack)
2250{
97aff369 2251 dVAR;
18529408 2252 dSP;
bab9c0ac 2253 dPOPPOPssrl;
18529408
IZ
2254 I32 gimme = GIMME_V;
2255 STRLEN llen;
2256 STRLEN rlen;
5c144d81
NC
2257 const char *pat = SvPV_const(left, llen);
2258 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2259 const char *strend = s + rlen;
2260 const char *patend = pat + llen;
08ca2aa3 2261 I32 cnt;
18529408
IZ
2262
2263 PUTBACK;
7accc089 2264 cnt = unpackstring(pat, patend, s, strend,
49704364 2265 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2266 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2267
18529408
IZ
2268 SPAGAIN;
2269 if ( !cnt && gimme == G_SCALAR )
2270 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2271 RETURN;
2272}
2273
f337b084 2274STATIC U8 *
f7fe979e 2275doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2276{
f337b084 2277 *h++ = PL_uuemap[len];
a6ec74c1 2278 while (len > 2) {
f337b084
TH
2279 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2280 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2281 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2282 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2283 s += 3;
2284 len -= 3;
2285 }
2286 if (len > 0) {
f7fe979e 2287 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2288 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2289 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2290 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2291 *h++ = PL_uuemap[0];
a6ec74c1 2292 }
f337b084
TH
2293 *h++ = '\n';
2294 return h;
a6ec74c1
JH
2295}
2296
2297STATIC SV *
f7fe979e 2298S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2299{
8b6e33c7
AL
2300 SV *result = newSVpvn(s, l);
2301 char *const result_c = SvPV_nolen(result); /* convenience */
2302 char *out = result_c;
2303 bool skip = 1;
2304 bool ignore = 0;
a6ec74c1 2305
7918f24d
NC
2306 PERL_ARGS_ASSERT_IS_AN_INT;
2307
a6ec74c1
JH
2308 while (*s) {
2309 switch (*s) {
2310 case ' ':
2311 break;
2312 case '+':
2313 if (!skip) {
2314 SvREFCNT_dec(result);
2315 return (NULL);
2316 }
2317 break;
2318 case '0':
2319 case '1':
2320 case '2':
2321 case '3':
2322 case '4':
2323 case '5':
2324 case '6':
2325 case '7':
2326 case '8':
2327 case '9':
2328 skip = 0;
2329 if (!ignore) {
2330 *(out++) = *s;
2331 }
2332 break;
2333 case '.':
2334 ignore = 1;
2335 break;
2336 default:
2337 SvREFCNT_dec(result);
2338 return (NULL);
2339 }
2340 s++;
2341 }
2342 *(out++) = '\0';
2343 SvCUR_set(result, out - result_c);
2344 return (result);
2345}
2346
2347/* pnum must be '\0' terminated */
2348STATIC int
2349S_div128(pTHX_ SV *pnum, bool *done)
2350{
8b6e33c7
AL
2351 STRLEN len;
2352 char * const s = SvPV(pnum, len);
2353 char *t = s;
2354 int m = 0;
2355
7918f24d
NC
2356 PERL_ARGS_ASSERT_DIV128;
2357
8b6e33c7
AL
2358 *done = 1;
2359 while (*t) {
2360 const int i = m * 10 + (*t - '0');
2361 const int r = (i >> 7); /* r < 10 */
2362 m = i & 0x7F;
2363 if (r) {
2364 *done = 0;
2365 }
2366 *(t++) = '0' + r;
a6ec74c1 2367 }
8b6e33c7
AL
2368 *(t++) = '\0';
2369 SvCUR_set(pnum, (STRLEN) (t - s));
2370 return (m);
a6ec74c1
JH
2371}
2372
18529408 2373/*
7accc089
JH
2374=for apidoc packlist
2375
2376The engine implementing pack() Perl function.
2377
bfce84ec
AL
2378=cut
2379*/
7accc089
JH
2380
2381void
f7fe979e 2382Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2383{
97aff369 2384 dVAR;
aadb217d
JH
2385 tempsym_t sym;
2386
7918f24d
NC
2387 PERL_ARGS_ASSERT_PACKLIST;
2388
f7fe979e 2389 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2390
f337b084
TH
2391 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2392 Also make sure any UTF8 flag is loaded */
56eb0262 2393 SvPV_force_nolen(cat);
bfce84ec
AL
2394 if (DO_UTF8(cat))
2395 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2396
49704364
WL
2397 (void)pack_rec( cat, &sym, beglist, endlist );
2398}
2399
f337b084
TH
2400/* like sv_utf8_upgrade, but also repoint the group start markers */
2401STATIC void
2402marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2403 STRLEN len;
2404 tempsym_t *group;
f7fe979e
AL
2405 const char *from_ptr, *from_start, *from_end, **marks, **m;
2406 char *to_start, *to_ptr;
f337b084
TH
2407
2408 if (SvUTF8(sv)) return;
2409
aa07b2f6 2410 from_start = SvPVX_const(sv);
f337b084
TH
2411 from_end = from_start + SvCUR(sv);
2412 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2413 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2414 if (from_ptr == from_end) {
2415 /* Simple case: no character needs to be changed */
2416 SvUTF8_on(sv);
2417 return;
2418 }
2419
3473cf63 2420 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2421 Newx(to_start, len, char);
f337b084
TH
2422 Copy(from_start, to_start, from_ptr-from_start, char);
2423 to_ptr = to_start + (from_ptr-from_start);
2424
a02a5408 2425 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2426 for (group=sym_ptr; group; group = group->previous)
2427 marks[group->level] = from_start + group->strbeg;
2428 marks[sym_ptr->level+1] = from_end+1;
2429 for (m = marks; *m < from_ptr; m++)
2430 *m = to_start + (*m-from_start);
2431
2432 for (;from_ptr < from_end; from_ptr++) {
2433 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2434 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2435 }
2436 *to_ptr = 0;
2437
2438 while (*m == from_ptr) *m++ = to_ptr;
2439 if (m != marks + sym_ptr->level+1) {
2440 Safefree(marks);
2441 Safefree(to_start);
2442 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2443 }
2444 for (group=sym_ptr; group; group = group->previous)
2445 group->strbeg = marks[group->level] - to_start;
2446 Safefree(marks);
2447
2448 if (SvOOK(sv)) {
2449 if (SvIVX(sv)) {
b162af07 2450 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2451 from_start -= SvIVX(sv);
2452 SvIV_set(sv, 0);
2453 }
2454 SvFLAGS(sv) &= ~SVf_OOK;
2455 }
2456 if (SvLEN(sv) != 0)
2457 Safefree(from_start);
f880fe2f 2458 SvPV_set(sv, to_start);
b162af07
SP
2459 SvCUR_set(sv, to_ptr - to_start);
2460 SvLEN_set(sv, len);
f337b084
TH
2461 SvUTF8_on(sv);
2462}
2463
2464/* Exponential string grower. Makes string extension effectively O(n)
2465 needed says how many extra bytes we need (not counting the final '\0')
2466 Only grows the string if there is an actual lack of space
2467*/
2468STATIC char *
0bd48802 2469S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2470 const STRLEN cur = SvCUR(sv);
2471 const STRLEN len = SvLEN(sv);
f337b084 2472 STRLEN extend;
7918f24d
NC
2473
2474 PERL_ARGS_ASSERT_SV_EXP_GROW;
2475
f337b084
TH
2476 if (len - cur > needed) return SvPVX(sv);
2477 extend = needed > len ? needed : len;
2478 return SvGROW(sv, len+extend+1);
2479}
49704364
WL
2480
2481STATIC
2482SV **
f337b084 2483S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2484{
97aff369 2485 dVAR;
49704364 2486 tempsym_t lookahead;
f337b084
TH
2487 I32 items = endlist - beglist;
2488 bool found = next_symbol(symptr);
2489 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2490 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2491
7918f24d
NC
2492 PERL_ARGS_ASSERT_PACK_REC;
2493
f337b084
TH
2494 if (symptr->level == 0 && found && symptr->code == 'U') {
2495 marked_upgrade(aTHX_ cat, symptr);
2496 symptr->flags |= FLAG_DO_UTF8;
2497 utf8 = 0;
49704364 2498 }
f337b084 2499 symptr->strbeg = SvCUR(cat);
49704364
WL
2500
2501 while (found) {
f337b084
TH
2502 SV *fromstr;
2503 STRLEN fromlen;
2504 I32 len;
a0714e2c 2505 SV *lengthcode = NULL;
49704364 2506 I32 datumtype = symptr->code;
f337b084
TH
2507 howlen_t howlen = symptr->howlen;
2508 char *start = SvPVX(cat);
2509 char *cur = start + SvCUR(cat);
49704364 2510
f337b084
TH
2511#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2512
2513 switch (howlen) {
fc241834 2514 case e_star:
f337b084
TH
2515 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2516 0 : items;
2517 break;
2518 default:
2519 /* e_no_len and e_number */
2520 len = symptr->length;
49704364
WL
2521 break;
2522 }
2523
f337b084 2524 if (len) {
a7a3cfaa 2525 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2526
a7a3cfaa
TH
2527 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2528 /* We can process this letter. */
2529 STRLEN size = props & PACK_SIZE_MASK;
2530 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2531 }
f337b084
TH
2532 }
2533
49704364
WL
2534 /* Look ahead for next symbol. Do we have code/code? */
2535 lookahead = *symptr;
2536 found = next_symbol(&lookahead);
246f24af
TH
2537 if (symptr->flags & FLAG_SLASH) {
2538 IV count;
f337b084 2539 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2540 if (strchr("aAZ", lookahead.code)) {
2541 if (lookahead.howlen == e_number) count = lookahead.length;
2542 else {
ce399ba6
NC
2543 if (items > 0) {
2544 if (SvGAMAGIC(*beglist)) {
2545 /* Avoid reading the active data more than once
2546 by copying it to a temporary. */
2547 STRLEN len;
2548 const char *const pv = SvPV_const(*beglist, len);
740cce10 2549 SV *const temp
59cd0e26
NC
2550 = newSVpvn_flags(pv, len,
2551 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2552 *beglist = temp;
2553 }
246f24af
TH
2554 count = DO_UTF8(*beglist) ?
2555 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2556 }
246f24af
TH
2557 else count = 0;
2558 if (lookahead.code == 'Z') count++;
2559 }
2560 } else {
2561 if (lookahead.howlen == e_number && lookahead.length < items)
2562 count = lookahead.length;
2563 else count = items;
2564 }
2565 lookahead.howlen = e_number;
2566 lookahead.length = count;
2567 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2568 }
49704364 2569
fc241834
RGS
2570 /* Code inside the switch must take care to properly update
2571 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2572 doesn't simply leave using break */
1109a392 2573 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2574 default:
f337b084
TH
2575 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2576 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2577 case '%':
49704364 2578 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2579 {
2580 char *from;
2581#ifdef PERL_PACK_CAN_SHRIEKSIGN
2582 case '.' | TYPE_IS_SHRIEKING:
2583#endif
2584 case '.':
2585 if (howlen == e_star) from = start;
2586 else if (len == 0) from = cur;
2587 else {
2588 tempsym_t *group = symptr;
2589
2590 while (--len && group) group = group->previous;
2591 from = group ? start + group->strbeg : start;
2592 }
2593 fromstr = NEXTFROM;
2594 len = SvIV(fromstr);
2595 goto resize;
2596#ifdef PERL_PACK_CAN_SHRIEKSIGN
2597 case '@' | TYPE_IS_SHRIEKING:
2598#endif
a6ec74c1 2599 case '@':
28be1210
TH
2600 from = start + symptr->strbeg;
2601 resize:
2602#ifdef PERL_PACK_CAN_SHRIEKSIGN
2603 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2604#else /* PERL_PACK_CAN_SHRIEKSIGN */
2605 if (utf8)
2606#endif
2607 if (len >= 0) {
2608 while (len && from < cur) {
2609 from += UTF8SKIP(from);
2610 len--;
2611 }
2612 if (from > cur)
2613 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2614 if (len) {
2615 /* Here we know from == cur */
2616 grow:
2617 GROWING(0, cat, start, cur, len);
2618 Zero(cur, len, char);
2619 cur += len;
2620 } else if (from < cur) {
2621 len = cur - from;
2622 goto shrink;
2623 } else goto no_change;
2624 } else {
2625 cur = from;
2626 len = -len;
2627 goto utf8_shrink;
f337b084 2628 }
28be1210
TH
2629 else {
2630 len -= cur - from;
f337b084 2631 if (len > 0) goto grow;
28be1210 2632 if (len == 0) goto no_change;
fc241834 2633 len = -len;
28be1210 2634 goto shrink;
f337b084 2635 }
a6ec74c1 2636 break;
28be1210 2637 }
fc241834 2638 case '(': {
49704364 2639 tempsym_t savsym = *symptr;
66c611c5
MHM
2640 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2641 symptr->flags |= group_modifiers;
49704364
WL
2642 symptr->patend = savsym.grpend;
2643 symptr->level++;
f337b084 2644 symptr->previous = &lookahead;
18529408 2645 while (len--) {
f337b084
TH
2646 U32 was_utf8;
2647 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2648 else symptr->flags &= ~FLAG_PARSE_UTF8;
2649 was_utf8 = SvUTF8(cat);
49704364 2650 symptr->patptr = savsym.grpbeg;
f337b084
TH
2651 beglist = pack_rec(cat, symptr, beglist, endlist);
2652 if (SvUTF8(cat) != was_utf8)
2653 /* This had better be an upgrade while in utf8==0 mode */
2654 utf8 = 1;
2655
49704364 2656 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2657 break; /* No way to continue */
2658 }
ee790063 2659 items = endlist - beglist;
f337b084
TH
2660 lookahead.flags = symptr->flags & ~group_modifiers;
2661 goto no_change;
18529408 2662 }
62f95557
IZ
2663 case 'X' | TYPE_IS_SHRIEKING:
2664 if (!len) /* Avoid division by 0 */
2665 len = 1;
f337b084
TH
2666 if (utf8) {
2667 char *hop, *last;
2668 I32 l = len;
2669 hop = last = start;
2670 while (hop < cur) {
2671 hop += UTF8SKIP(hop);
2672 if (--l == 0) {
2673 last = hop;
2674 l = len;
2675 }
2676 }
2677 if (last > cur)
2678 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2679 cur = last;
2680 break;
2681 }
2682 len = (cur-start) % len;
62f95557 2683 /* FALL THROUGH */
a6ec74c1 2684 case 'X':
f337b084
TH
2685 if (utf8) {
2686 if (len < 1) goto no_change;
28be1210 2687 utf8_shrink:
f337b084
TH
2688 while (len > 0) {
2689 if (cur <= start)
28be1210
TH
2690 Perl_croak(aTHX_ "'%c' outside of string in pack",
2691 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2692 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2693 if (cur <= start)
28be1210
TH
2694 Perl_croak(aTHX_ "'%c' outside of string in pack",
2695 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2696 }
2697 len--;
2698 }
2699 } else {
fc241834 2700 shrink:
f337b084 2701 if (cur - start < len)
28be1210
TH
2702 Perl_croak(aTHX_ "'%c' outside of string in pack",
2703 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2704 cur -= len;
2705 }
2706 if (cur < start+symptr->strbeg) {
2707 /* Make sure group starts don't point into the void */
2708 tempsym_t *group;
9e27e96a 2709 const STRLEN length = cur-start;
f337b084
TH
2710 for (group = symptr;
2711 group && length < group->strbeg;
2712 group = group->previous) group->strbeg = length;
2713 lookahead.strbeg = length;
2714 }
a6ec74c1 2715 break;
fc241834
RGS
2716 case 'x' | TYPE_IS_SHRIEKING: {
2717 I32 ai32;
62f95557
IZ
2718 if (!len) /* Avoid division by 0 */
2719 len = 1;
230e1fce 2720 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2721 else ai32 = (cur - start) % len;
2722 if (ai32 == 0) goto no_change;
2723 len -= ai32;
2724 }
2725 /* FALL THROUGH */
a6ec74c1 2726 case 'x':
f337b084 2727 goto grow;
a6ec74c1
JH
2728 case 'A':
2729 case 'Z':
f337b084 2730 case 'a': {
f7fe979e 2731 const char *aptr;
f337b084 2732
a6ec74c1 2733 fromstr = NEXTFROM;
e62f0680 2734 aptr = SvPV_const(fromstr, fromlen);
f337b084 2735 if (DO_UTF8(fromstr)) {
f7fe979e 2736 const char *end, *s;
f337b084
TH
2737
2738 if (!utf8 && !SvUTF8(cat)) {
2739 marked_upgrade(aTHX_ cat, symptr);
2740 lookahead.flags |= FLAG_DO_UTF8;
2741 lookahead.strbeg = symptr->strbeg;
2742 utf8 = 1;
2743 start = SvPVX(cat);
2744 cur = start + SvCUR(cat);
2745 }
fc241834 2746 if (howlen == e_star) {
f337b084
TH
2747 if (utf8) goto string_copy;
2748 len = fromlen+1;
2749 }
2750 s = aptr;
2751 end = aptr + fromlen;
2752 fromlen = datumtype == 'Z' ? len-1 : len;
2753 while ((I32) fromlen > 0 && s < end) {
2754 s += UTF8SKIP(s);
2755 fromlen--;
2756 }
2757 if (s > end)
2758 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2759 if (utf8) {
fc241834 2760 len = fromlen;
f337b084
TH
2761 if (datumtype == 'Z') len++;
2762 fromlen = s-aptr;
2763 len += fromlen;
fc241834 2764
f337b084 2765 goto string_copy;
fc241834 2766 }
f337b084
TH
2767 fromlen = len - fromlen;
2768 if (datumtype == 'Z') fromlen--;
2769 if (howlen == e_star) {
2770 len = fromlen;
2771 if (datumtype == 'Z') len++;
fc241834 2772 }
f337b084 2773 GROWING(0, cat, start, cur, len);
fc241834 2774 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2775 datumtype | TYPE_IS_PACK))
2776 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2777 cur += fromlen;
a6ec74c1 2778 len -= fromlen;
f337b084
TH
2779 } else if (utf8) {
2780 if (howlen == e_star) {
2781 len = fromlen;
2782 if (datumtype == 'Z') len++;
a6ec74c1 2783 }
f337b084
TH
2784 if (len <= (I32) fromlen) {
2785 fromlen = len;
2786 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2787 }
fc241834 2788 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2789 upgrade, so:
2790 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2791 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2792 len -= fromlen;
2793 while (fromlen > 0) {
230e1fce 2794 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2795 aptr++;
2796 fromlen--;
fc241834 2797 }
f337b084
TH
2798 } else {
2799 string_copy:
2800 if (howlen == e_star) {
2801 len = fromlen;
2802 if (datumtype == 'Z') len++;
2803 }
2804 if (len <= (I32) fromlen) {
2805 fromlen = len;
2806 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2807 }
f337b084
TH
2808 GROWING(0, cat, start, cur, len);
2809 Copy(aptr, cur, fromlen, char);
2810 cur += fromlen;
2811 len -= fromlen;
a6ec74c1 2812 }
f337b084
TH
2813 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2814 cur += len;
a6ec74c1 2815 break;
f337b084 2816 }
a6ec74c1 2817 case 'B':
f337b084 2818 case 'b': {
b83604b4 2819 const char *str, *end;
f337b084
TH
2820 I32 l, field_len;
2821 U8 bits;
2822 bool utf8_source;
2823 U32 utf8_flags;
a6ec74c1 2824
fc241834 2825 fromstr = NEXTFROM;
b83604b4 2826 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2827 end = str + fromlen;
2828 if (DO_UTF8(fromstr)) {
2829 utf8_source = TRUE;
041457d9 2830 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2831 } else {
2832 utf8_source = FALSE;
2833 utf8_flags = 0; /* Unused, but keep compilers happy */
2834 }
2835 if (howlen == e_star) len = fromlen;
2836 field_len = (len+7)/8;
2837 GROWING(utf8, cat, start, cur, field_len);
2838 if (len > (I32)fromlen) len = fromlen;
2839 bits = 0;
2840 l = 0;
2841 if (datumtype == 'B')
2842 while (l++ < len) {
2843 if (utf8_source) {
95b63a38 2844 UV val = 0;
f337b084
TH
2845 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2846 bits |= val & 1;
2847 } else bits |= *str++ & 1;
2848 if (l & 7) bits <<= 1;
fc241834 2849 else {
f337b084
TH
2850 PUSH_BYTE(utf8, cur, bits);
2851 bits = 0;
a6ec74c1
JH
2852 }
2853 }
f337b084
TH
2854 else
2855 /* datumtype == 'b' */
2856 while (l++ < len) {
2857 if (utf8_source) {
95b63a38 2858 UV val = 0;
f337b084
TH
2859 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2860 if (val & 1) bits |= 0x80;
2861 } else if (*str++ & 1)
2862 bits |= 0x80;
2863 if (l & 7) bits >>= 1;
fc241834 2864 else {
f337b084
TH
2865 PUSH_BYTE(utf8, cur, bits);
2866 bits = 0;
a6ec74c1
JH
2867 }
2868 }
f337b084
TH
2869 l--;
2870 if (l & 7) {
fc241834 2871 if (datumtype == 'B')
f337b084 2872 bits <<= 7 - (l & 7);
fc241834 2873 else
f337b084
TH
2874 bits >>= 7 - (l & 7);
2875 PUSH_BYTE(utf8, cur, bits);
2876 l += 7;
a6ec74c1 2877 }
f337b084
TH
2878 /* Determine how many chars are left in the requested field */
2879 l /= 8;
2880 if (howlen == e_star) field_len = 0;
2881 else field_len -= l;
2882 Zero(cur, field_len, char);
2883 cur += field_len;
a6ec74c1 2884 break;
f337b084 2885 }
a6ec74c1 2886 case 'H':
f337b084 2887 case 'h': {
10516c54 2888 const char *str, *end;
f337b084
TH
2889 I32 l, field_len;
2890 U8 bits;
2891 bool utf8_source;
2892 U32 utf8_flags;
a6ec74c1 2893
fc241834 2894 fromstr = NEXTFROM;
10516c54 2895 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2896 end = str + fromlen;
2897 if (DO_UTF8(fromstr)) {
2898 utf8_source = TRUE;
041457d9 2899 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2900 } else {
2901 utf8_source = FALSE;
2902 utf8_flags = 0; /* Unused, but keep compilers happy */
2903 }
2904 if (howlen == e_star) len = fromlen;
2905 field_len = (len+1)/2;
2906 GROWING(utf8, cat, start, cur, field_len);
2907 if (!utf8 && len > (I32)fromlen) len = fromlen;
2908 bits = 0;
2909 l = 0;
2910 if (datumtype == 'H')
2911 while (l++ < len) {
2912 if (utf8_source) {
95b63a38 2913 UV val = 0;
f337b084
TH
2914 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2915 if (val < 256 && isALPHA(val))
2916 bits |= (val + 9) & 0xf;
a6ec74c1 2917 else
f337b084
TH
2918 bits |= val & 0xf;
2919 } else if (isALPHA(*str))
2920 bits |= (*str++ + 9) & 0xf;
2921 else
2922 bits |= *str++ & 0xf;
2923 if (l & 1) bits <<= 4;
fc241834 2924 else {
f337b084
TH
2925 PUSH_BYTE(utf8, cur, bits);
2926 bits = 0;
a6ec74c1
JH
2927 }
2928 }
f337b084
TH
2929 else
2930 while (l++ < len) {
2931 if (utf8_source) {
95b63a38 2932 UV val = 0;
f337b084
TH
2933 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2934 if (val < 256 && isALPHA(val))
2935 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2936 else
f337b084
TH
2937 bits |= (val & 0xf) << 4;
2938 } else if (isALPHA(*str))
2939 bits |= ((*str++ + 9) & 0xf) << 4;
2940 else
2941 bits |= (*str++ & 0xf) << 4;
2942 if (l & 1) bits >>= 4;
fc241834 2943 else {
f337b084
TH
2944 PUSH_BYTE(utf8, cur, bits);
2945 bits = 0;
a6ec74c1 2946 }
fc241834 2947 }
f337b084
TH
2948 l--;
2949 if (l & 1) {
2950 PUSH_BYTE(utf8, cur, bits);
2951 l++;
2952 }
2953 /* Determine how many chars are left in the requested field */
2954 l /= 2;
2955 if (howlen == e_star) field_len = 0;
2956 else field_len -= l;
2957 Zero(cur, field_len, char);
2958 cur += field_len;
2959 break;
fc241834
RGS
2960 }
2961 case 'c':
f337b084
TH
2962 while (len-- > 0) {
2963 IV aiv;
2964 fromstr = NEXTFROM;
2965 aiv = SvIV(fromstr);
2966 if ((-128 > aiv || aiv > 127) &&
2967 ckWARN(WARN_PACK))
2968 Perl_warner(aTHX_ packWARN(WARN_PACK),
2969 "Character in 'c' format wrapped in pack");
585ec06d 2970 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2971 }
2972 break;
2973 case 'C':
f337b084
TH
2974 if (len == 0) {
2975 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2976 break;
2977 }
a6ec74c1 2978 while (len-- > 0) {
f337b084 2979 IV aiv;
a6ec74c1 2980 fromstr = NEXTFROM;
f337b084
TH
2981 aiv = SvIV(fromstr);
2982 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2983 ckWARN(WARN_PACK))
2984 Perl_warner(aTHX_ packWARN(WARN_PACK),
2985 "Character in 'C' format wrapped in pack");
1651fc44 2986 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2987 }
fc241834
RGS
2988 break;
2989 case 'W': {
2990 char *end;
670f1322 2991 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2992
2993 end = start+SvLEN(cat)-1;
2994 if (utf8) end -= UTF8_MAXLEN-1;
2995 while (len-- > 0) {
2996 UV auv;
2997 fromstr = NEXTFROM;
2998 auv = SvUV(fromstr);
2999 if (in_bytes) auv = auv % 0x100;
3000 if (utf8) {
3001 W_utf8:
3002 if (cur > end) {
3003 *cur = '\0';
b162af07 3004 SvCUR_set(cat, cur - start);
fc241834
RGS
3005
3006 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3007 end = start+SvLEN(cat)-UTF8_MAXLEN;
3008 }
230e1fce
NC
3009 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3010 NATIVE_TO_UNI(auv),
041457d9 3011 warn_utf8 ?
230e1fce 3012 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3013 } else {
3014 if (auv >= 0x100) {
3015 if (!SvUTF8(cat)) {
3016 *cur = '\0';
b162af07 3017 SvCUR_set(cat, cur - start);
fc241834
RGS
3018 marked_upgrade(aTHX_ cat, symptr);
3019 lookahead.flags |= FLAG_DO_UTF8;
3020 lookahead.strbeg = symptr->strbeg;
3021 utf8 = 1;
3022 start = SvPVX(cat);
3023 cur = start + SvCUR(cat);
3024 end = start+SvLEN(cat)-UTF8_MAXLEN;
3025 goto W_utf8;
3026 }
3027 if (ckWARN(WARN_PACK))
3028 Perl_warner(aTHX_ packWARN(WARN_PACK),
3029 "Character in 'W' format wrapped in pack");
3030 auv &= 0xff;
3031 }
3032 if (cur >= end) {
3033 *cur = '\0';
b162af07 3034 SvCUR_set(cat, cur - start);
fc241834
RGS
3035 GROWING(0, cat, start, cur, len+1);
3036 end = start+SvLEN(cat)-1;
3037 }
fe2774ed 3038 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3039 }
3040 }
3041 break;
fc241834
RGS
3042 }
3043 case 'U': {
3044 char *end;
3045
3046 if (len == 0) {
3047 if (!(symptr->flags & FLAG_DO_UTF8)) {
3048 marked_upgrade(aTHX_ cat, symptr);
3049 lookahead.flags |= FLAG_DO_UTF8;
3050 lookahead.strbeg = symptr->strbeg;
3051 }
3052 utf8 = 0;
3053 goto no_change;
3054 }
3055
3056 end = start+SvLEN(cat);
3057 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3058 while (len-- > 0) {
fc241834 3059 UV auv;
a6ec74c1 3060 fromstr = NEXTFROM;
fc241834
RGS
3061 auv = SvUV(fromstr);
3062 if (utf8) {
230e1fce 3063 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3064 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3065 warn_utf8 ?
fc241834
RGS
3066 0 : UNICODE_ALLOW_ANY);
3067 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3068 *cur = '\0';
b162af07 3069 SvCUR_set(cat, cur - start);
fc241834
RGS
3070 GROWING(0, cat, start, cur,
3071 len+(endb-buffer)*UTF8_EXPAND);
3072 end = start+SvLEN(cat);
3073 }
64844641 3074 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3075 } else {
3076 if (cur >= end) {
3077 *cur = '\0';
b162af07 3078 SvCUR_set(cat, cur - start);
fc241834
RGS
3079 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3080 end = start+SvLEN(cat)-UTF8_MAXLEN;
3081 }
230e1fce 3082 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3083 warn_utf8 ?
230e1fce 3084 0 : UNICODE_ALLOW_ANY);
fc241834 3085 }
a6ec74c1 3086 }
a6ec74c1 3087 break;
fc241834 3088 }
a6ec74c1
JH
3089 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3090 case 'f':
a6ec74c1 3091 while (len-- > 0) {
f337b084
TH
3092 float afloat;
3093 NV anv;
a6ec74c1 3094 fromstr = NEXTFROM;
f337b084 3095 anv = SvNV(fromstr);
5cdb9e01 3096#ifdef __VOS__
f337b084 3097 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3098 during conversion from double to float into infinity, so we
3099 do it by hand. This code should either be generalized for
3100 any OS that needs it, or removed if and when VOS implements
3101 posix-976 (suggestion to support mapping to infinity).
3102 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3103{
3104extern const float _float_constants[];
f337b084 3105 if (anv > FLT_MAX)
fc241834 3106 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3107 else if (anv < -FLT_MAX)
fc241834 3108 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3109 else afloat = (float) anv;
3722f0dc 3110}
f337b084 3111#else /* __VOS__ */
baf3cf9c 3112# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3113 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3114 * on Alpha; fake it if we don't have them.
3115 */
f337b084 3116 if (anv > FLT_MAX)
fc241834 3117 afloat = FLT_MAX;
f337b084 3118 else if (anv < -FLT_MAX)
fc241834 3119 afloat = -FLT_MAX;
f337b084 3120 else afloat = (float)anv;
baf3cf9c 3121# else
f337b084 3122 afloat = (float)anv;
baf3cf9c 3123# endif
f337b084 3124#endif /* __VOS__ */
1109a392 3125 DO_BO_PACK_N(afloat, float);
f337b084 3126 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3127 }
3128 break;
3129 case 'd':
a6ec74c1 3130 while (len-- > 0) {
f337b084
TH
3131 double adouble;
3132 NV anv;
a6ec74c1 3133 fromstr = NEXTFROM;
f337b084 3134 anv = SvNV(fromstr);
5cdb9e01 3135#ifdef __VOS__
f337b084 3136 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3137 during conversion from long double to double into infinity,
3138 so we do it by hand. This code should either be generalized
3139 for any OS that needs it, or removed if and when VOS
3140 implements posix-976 (suggestion to support mapping to
3141 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3142{
3143extern const double _double_constants[];
f337b084 3144 if (anv > DBL_MAX)
fc241834 3145 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3146 else if (anv < -DBL_MAX)
fc241834 3147 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3148 else adouble = (double) anv;
3722f0dc 3149}
f337b084 3150#else /* __VOS__ */
baf3cf9c 3151# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3152 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3153 * on Alpha; fake it if we don't have them.
3154 */
f337b084 3155 if (anv > DBL_MAX)
fc241834 3156 adouble = DBL_MAX;
f337b084 3157 else if (anv < -DBL_MAX)
fc241834 3158 adouble = -DBL_MAX;
f337b084 3159 else adouble = (double)anv;
baf3cf9c 3160# else
f337b084 3161 adouble = (double)anv;
baf3cf9c 3162# endif
f337b084 3163#endif /* __VOS__ */
1109a392 3164 DO_BO_PACK_N(adouble, double);
f337b084 3165 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3166 }
3167 break;
fc241834
RGS
3168 case 'F': {
3169 NV anv;
1109a392 3170 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3171 while (len-- > 0) {
3172 fromstr = NEXTFROM;
3173 anv = SvNV(fromstr);
1109a392 3174 DO_BO_PACK_N(anv, NV);
fc241834 3175 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3176 }
3177 break;
fc241834 3178 }
92d41999 3179#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3180 case 'D': {
3181 long double aldouble;
1109a392
MHM
3182 /* long doubles can have unused bits, which may be nonzero */
3183 Zero(&aldouble, 1, long double);
92d41999
JH
3184 while (len-- > 0) {
3185 fromstr = NEXTFROM;
3186 aldouble = (long double)SvNV(fromstr);
1109a392 3187 DO_BO_PACK_N(aldouble, long double);
fc241834 3188 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3189 }
3190 break;
fc241834 3191 }
92d41999 3192#endif
7212898e 3193#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3194 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3195#endif
a6ec74c1
JH
3196 case 'n':
3197 while (len-- > 0) {
f337b084 3198 I16 ai16;
a6ec74c1 3199 fromstr = NEXTFROM;
ef108786 3200 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3201#ifdef HAS_HTONS
ef108786 3202 ai16 = PerlSock_htons(ai16);
a6ec74c1 3203#endif
f337b084 3204 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3205 }
3206 break;
7212898e 3207#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3208 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3209#endif
a6ec74c1
JH
3210 case 'v':
3211 while (len-- > 0) {
f337b084 3212 I16 ai16;
a6ec74c1 3213 fromstr = NEXTFROM;
ef108786 3214 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3215#ifdef HAS_HTOVS
ef108786 3216 ai16 = htovs(ai16);
a6ec74c1 3217#endif
f337b084 3218 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3219 }
3220 break;
49704364 3221 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3222#if SHORTSIZE != SIZE16
fc241834 3223 while (len-- > 0) {
f337b084 3224 unsigned short aushort;
fc241834
RGS
3225 fromstr = NEXTFROM;
3226 aushort = SvUV(fromstr);
3227 DO_BO_PACK(aushort, s);
f337b084 3228 PUSH_VAR(utf8, cur, aushort);
fc241834 3229 }
49704364
WL
3230 break;
3231#else
3232 /* Fall through! */
a6ec74c1 3233#endif
49704364 3234 case 'S':
fc241834 3235 while (len-- > 0) {
f337b084 3236 U16 au16;
fc241834
RGS
3237 fromstr = NEXTFROM;
3238 au16 = (U16)SvUV(fromstr);
3239 DO_BO_PACK(au16, 16);
f337b084 3240 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3241 }
3242 break;
49704364 3243 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3244#if SHORTSIZE != SIZE16
fc241834 3245 while (len-- > 0) {
f337b084 3246 short ashort;
fc241834
RGS
3247 fromstr = NEXTFROM;
3248 ashort = SvIV(fromstr);
3249 DO_BO_PACK(ashort, s);
f337b084 3250 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3251 }
49704364
WL
3252 break;
3253#else
3254 /* Fall through! */
a6ec74c1 3255#endif
49704364
WL
3256 case 's':
3257 while (len-- > 0) {
f337b084 3258 I16 ai16;
49704364 3259 fromstr = NEXTFROM;
ef108786
MHM
3260 ai16 = (I16)SvIV(fromstr);
3261 DO_BO_PACK(ai16, 16);
f337b084 3262 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3263 }
3264 break;
3265 case 'I':
49704364 3266 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3267 while (len-- > 0) {
f337b084 3268 unsigned int auint;
a6ec74c1
JH
3269 fromstr = NEXTFROM;
3270 auint = SvUV(fromstr);
1109a392 3271 DO_BO_PACK(auint, i);
f337b084 3272 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3273 }
3274 break;
92d41999
JH
3275 case 'j':
3276 while (len-- > 0) {
f337b084 3277 IV aiv;
92d41999
JH
3278 fromstr = NEXTFROM;
3279 aiv = SvIV(fromstr);
1109a392
MHM
3280#if IVSIZE == INTSIZE
3281 DO_BO_PACK(aiv, i);
3282#elif IVSIZE == LONGSIZE
3283 DO_BO_PACK(aiv, l);
3284#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3285 DO_BO_PACK(aiv, 64);
f337b084
TH
3286#else
3287 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3288#endif
f337b084 3289 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3290 }
3291 break;
3292 case 'J':
3293 while (len-- > 0) {
f337b084 3294 UV auv;
92d41999
JH
3295 fromstr = NEXTFROM;
3296 auv = SvUV(fromstr);
1109a392
MHM
3297#if UVSIZE == INTSIZE
3298 DO_BO_PACK(auv, i);
3299#elif UVSIZE == LONGSIZE
3300 DO_BO_PACK(auv, l);
3301#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3302 DO_BO_PACK(auv, 64);
f337b084
TH
3303#else
3304 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3305#endif
f337b084 3306 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3307 }
3308 break;
a6ec74c1
JH
3309 case 'w':
3310 while (len-- > 0) {
f337b084 3311 NV anv;
a6ec74c1 3312 fromstr = NEXTFROM;
15e9f109 3313 anv = SvNV(fromstr);
a6ec74c1 3314
f337b084
TH
3315 if (anv < 0) {
3316 *cur = '\0';
b162af07 3317 SvCUR_set(cat, cur - start);
49704364 3318 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3319 }
a6ec74c1 3320
196b62db
NC
3321 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3322 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3323 any negative IVs will have already been got by the croak()
3324 above. IOK is untrue for fractions, so we test them
3325 against UV_MAX_P1. */
f337b084
TH
3326 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3327 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3328 char *in = buf + sizeof(buf);
196b62db 3329 UV auv = SvUV(fromstr);
a6ec74c1
JH
3330
3331 do {
eb160463 3332 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3333 auv >>= 7;
3334 } while (auv);
3335 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3336 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3337 in, (buf + sizeof(buf)) - in);
3338 } else if (SvPOKp(fromstr))
3339 goto w_string;
a6ec74c1 3340 else if (SvNOKp(fromstr)) {
0258719b
NC
3341 /* 10**NV_MAX_10_EXP is the largest power of 10
3342 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3343 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3344 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3345 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3346 Some C compilers are strict about integral constant
3347 expressions so we conservatively divide by a slightly
3348 smaller integer instead of multiplying by the exact
3349 floating-point value.
0258719b
NC
3350 */
3351#ifdef NV_MAX_10_EXP
f337b084 3352 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3353 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3354#else
f337b084 3355 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3356 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3357#endif
a6ec74c1
JH
3358 char *in = buf + sizeof(buf);
3359
8b6e33c7 3360 anv = Perl_floor(anv);
a6ec74c1 3361 do {
8b6e33c7 3362 const NV next = Perl_floor(anv / 128);
a6ec74c1 3363 if (in <= buf) /* this cannot happen ;-) */
49704364 3364 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3365 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3366 anv = next;
3367 } while (anv > 0);
a6ec74c1 3368 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3369 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3370 in, (buf + sizeof(buf)) - in);
3371 } else {
8b6e33c7
AL
3372 const char *from;
3373 char *result, *in;
735b914b
JH
3374 SV *norm;
3375 STRLEN len;
3376 bool done;
3377
f337b084 3378 w_string:
735b914b 3379 /* Copy string and check for compliance */
349d4f2f 3380 from = SvPV_const(fromstr, len);
735b914b 3381 if ((norm = is_an_int(from, len)) == NULL)
49704364 3382 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3383
a02a5408 3384 Newx(result, len, char);
735b914b
JH
3385 in = result + len;
3386 done = FALSE;
f337b084 3387 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3388 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3389 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3390 in, (result + len) - in);
735b914b
JH
3391 Safefree(result);
3392 SvREFCNT_dec(norm); /* free norm */
fc241834 3393 }
a6ec74c1
JH
3394 }
3395 break;
3396 case 'i':
49704364 3397 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3398 while (len-- > 0) {
f337b084 3399 int aint;
a6ec74c1
JH
3400 fromstr = NEXTFROM;
3401 aint = SvIV(fromstr);
1109a392 3402 DO_BO_PACK(aint, i);
f337b084 3403 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3404 }
3405 break;
7212898e 3406#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3407 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3408#endif
a6ec74c1
JH
3409 case 'N':
3410 while (len-- > 0) {
f337b084 3411 U32 au32;
a6ec74c1 3412 fromstr = NEXTFROM;
ef108786 3413 au32 = SvUV(fromstr);
a6ec74c1 3414#ifdef HAS_HTONL
ef108786 3415 au32 = PerlSock_htonl(au32);
a6ec74c1 3416#endif
f337b084 3417 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3418 }
3419 break;
7212898e 3420#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3421 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3422#endif
a6ec74c1
JH
3423 case 'V':
3424 while (len-- > 0) {
f337b084 3425 U32 au32;
a6ec74c1 3426 fromstr = NEXTFROM;
ef108786 3427 au32 = SvUV(fromstr);
a6ec74c1 3428#ifdef HAS_HTOVL
ef108786 3429 au32 = htovl(au32);
a6ec74c1 3430#endif
f337b084 3431 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3432 }
3433 break;
49704364 3434 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3435#if LONGSIZE != SIZE32
fc241834 3436 while (len-- > 0) {
f337b084 3437 unsigned long aulong;
fc241834
RGS
3438 fromstr = NEXTFROM;
3439 aulong = SvUV(fromstr);
3440 DO_BO_PACK(aulong, l);
f337b084 3441 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3442 }
49704364
WL
3443 break;
3444#else
3445 /* Fall though! */
a6ec74c1 3446#endif
49704364 3447 case 'L':
fc241834 3448 while (len-- > 0) {
f337b084 3449 U32 au32;
fc241834
RGS
3450 fromstr = NEXTFROM;
3451 au32 = SvUV(fromstr);
3452 DO_BO_PACK(au32, 32);
f337b084 3453 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3454 }
3455 break;
49704364 3456 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3457#if LONGSIZE != SIZE32
fc241834 3458 while (len-- > 0) {
f337b084 3459 long along;
fc241834
RGS
3460 fromstr = NEXTFROM;
3461 along = SvIV(fromstr);
3462 DO_BO_PACK(along, l);
f337b084 3463 PUSH_VAR(utf8, cur, along);
a6ec74c1 3464 }
49704364
WL
3465 break;
3466#else
3467 /* Fall though! */
a6ec74c1 3468#endif
49704364
WL
3469 case 'l':
3470 while (len-- > 0) {
f337b084 3471 I32 ai32;
49704364 3472 fromstr = NEXTFROM;
ef108786
MHM
3473 ai32 = SvIV(fromstr);
3474 DO_BO_PACK(ai32, 32);
f337b084 3475 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3476 }
3477 break;
3478#ifdef HAS_QUAD
3479 case 'Q':
3480 while (len-- > 0) {
f337b084 3481 Uquad_t auquad;
a6ec74c1 3482 fromstr = NEXTFROM;
f337b084 3483 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3484 DO_BO_PACK(auquad, 64);
f337b084 3485 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3486 }
3487 break;
3488 case 'q':
3489 while (len-- > 0) {
f337b084 3490 Quad_t aquad;
a6ec74c1
JH
3491 fromstr = NEXTFROM;
3492 aquad = (Quad_t)SvIV(fromstr);
1109a392 3493 DO_BO_PACK(aquad, 64);
f337b084 3494 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3495 }
3496 break;
f337b084 3497#endif /* HAS_QUAD */
a6ec74c1
JH
3498 case 'P':
3499 len = 1; /* assume SV is correct length */
f337b084 3500 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3501 /* Fall through! */
a6ec74c1
JH
3502 case 'p':
3503 while (len-- > 0) {
83003860 3504 const char *aptr;
f337b084 3505
a6ec74c1 3506 fromstr = NEXTFROM;
28a4f200
TH
3507 SvGETMAGIC(fromstr);
3508 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3509 else {
a6ec74c1
JH
3510 /* XXX better yet, could spirit away the string to
3511 * a safe spot and hang on to it until the result
3512 * of pack() (and all copies of the result) are
3513 * gone.
3514 */
041457d9
DM
3515 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3516 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3517 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3518 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3519 }
3520 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3521 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3522 else
2596d9fe 3523 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3524 }
07409e01 3525 DO_BO_PACK_PC(aptr);
f337b084 3526 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3527 }
3528 break;
fc241834 3529 case 'u': {
f7fe979e 3530 const char *aptr, *aend;
fc241834 3531 bool from_utf8;
f337b084 3532
a6ec74c1 3533 fromstr = NEXTFROM;
fc241834
RGS
3534 if (len <= 2) len = 45;
3535 else len = len / 3 * 3;
3536 if (len >= 64) {
0fdc08b6
RGS
3537 if (ckWARN(WARN_PACK))
3538 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3539 "Field too wide in 'u' format in pack");
3540 len = 63;
3541 }
83003860 3542 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3543 from_utf8 = DO_UTF8(fromstr);
3544 if (from_utf8) {
3545 aend = aptr + fromlen;
3546 fromlen = sv_len_utf8(fromstr);
3547 } else aend = NULL; /* Unused, but keep compilers happy */
3548 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3549 while (fromlen > 0) {
fc241834 3550 U8 *end;
a6ec74c1 3551 I32 todo;
fc241834 3552 U8 hunk[1+63/3*4+1];
a6ec74c1 3553
eb160463 3554 if ((I32)fromlen > len)
a6ec74c1
JH
3555 todo = len;
3556 else
3557 todo = fromlen;
fc241834
RGS
3558 if (from_utf8) {
3559 char buffer[64];
3560 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3561 'u' | TYPE_IS_PACK)) {
3562 *cur = '\0';
b162af07 3563 SvCUR_set(cat, cur - start);
fc241834
RGS
3564 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3565 }
3566 end = doencodes(hunk, buffer, todo);
3567 } else {
3568 end = doencodes(hunk, aptr, todo);
3569 aptr += todo;
3570 }
3571 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3572 fromlen -= todo;
3573 }
a6ec74c1
JH
3574 break;
3575 }
f337b084
TH
3576 }
3577 *cur = '\0';
b162af07 3578 SvCUR_set(cat, cur - start);
f337b084 3579 no_change:
49704364 3580 *symptr = lookahead;
a6ec74c1 3581 }
49704364 3582 return beglist;
18529408
IZ
3583}
3584#undef NEXTFROM
3585
3586
3587PP(pp_pack)
3588{
97aff369 3589 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3590 register SV *cat = TARG;
3591 STRLEN fromlen;
349d4f2f
NC
3592 SV *pat_sv = *++MARK;
3593 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3594 register const char *patend = pat + fromlen;
18529408
IZ
3595
3596 MARK++;
76f68e9b 3597 sv_setpvs(cat, "");
f337b084 3598 SvUTF8_off(cat);
18529408 3599
7accc089 3600 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3601
a6ec74c1
JH
3602 SvSETMAGIC(cat);
3603 SP = ORIGMARK;
3604 PUSHs(cat);
3605 RETURN;
3606}
a6ec74c1 3607
73cb7263
NC
3608/*
3609 * Local variables:
3610 * c-indentation-style: bsd
3611 * c-basic-offset: 4
3612 * indent-tabs-mode: t
3613 * End:
3614 *
37442d52
RGS
3615 * ex: set ts=8 sts=4 sw=4 noet:
3616 */