This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add MUTABLE_CV(), and eliminate (CV *) casts in *.c.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
a6ec74c1
JH
29#include "EXTERN.h"
30#define PERL_IN_PP_PACK_C
31#include "perl.h"
32
f7fe979e
AL
33/* Types used by pack/unpack */
34typedef enum {
35 e_no_len, /* no length */
36 e_number, /* number, [] */
37 e_star /* asterisk */
38} howlen_t;
39
40typedef struct tempsym {
41 const char* patptr; /* current template char */
42 const char* patend; /* one after last char */
43 const char* grpbeg; /* 1st char of ()-group */
44 const char* grpend; /* end of ()-group */
45 I32 code; /* template code (!<>) */
46 I32 length; /* length/repeat count */
47 howlen_t howlen; /* how length is given */
48 int level; /* () nesting level */
49 U32 flags; /* /=4, comma=2, pack=1 */
50 /* and group modifiers */
51 STRLEN strbeg; /* offset of group start */
52 struct tempsym *previous; /* previous group */
53} tempsym_t;
54
55#define TEMPSYM_INIT(symptr, p, e, f) \
56 STMT_START { \
57 (symptr)->patptr = (p); \
58 (symptr)->patend = (e); \
59 (symptr)->grpbeg = NULL; \
60 (symptr)->grpend = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->code = 0; \
63 (symptr)->length = 0; \
10edeb5d 64 (symptr)->howlen = e_no_len; \
f7fe979e
AL
65 (symptr)->level = 0; \
66 (symptr)->flags = (f); \
67 (symptr)->strbeg = 0; \
68 (symptr)->previous = NULL; \
69 } STMT_END
70
7212898e 71#if PERL_VERSION >= 9
f337b084
TH
72# define PERL_PACK_CAN_BYTEORDER
73# define PERL_PACK_CAN_SHRIEKSIGN
74#endif
75
76#ifndef CHAR_BIT
77# define CHAR_BIT 8
7212898e 78#endif
3473cf63
RGS
79/* Maximum number of bytes to which a byte can grow due to upgrade */
80#define UTF8_EXPAND 2
7212898e 81
a6ec74c1 82/*
a6ec74c1
JH
83 * Offset for integer pack/unpack.
84 *
85 * On architectures where I16 and I32 aren't really 16 and 32 bits,
86 * which for now are all Crays, pack and unpack have to play games.
87 */
88
89/*
90 * These values are required for portability of pack() output.
91 * If they're not right on your machine, then pack() and unpack()
92 * wouldn't work right anyway; you'll need to apply the Cray hack.
93 * (I'd like to check them with #if, but you can't use sizeof() in
94 * the preprocessor.) --???
95 */
96/*
97 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
98 defines are now in config.h. --Andy Dougherty April 1998
99 */
100#define SIZE16 2
101#define SIZE32 4
102
103/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
104 --jhi Feb 1999 */
105
1109a392
MHM
106#if U16SIZE > SIZE16 || U32SIZE > SIZE32
107# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
108# define OFF16(p) ((char*)(p))
109# define OFF32(p) ((char*)(p))
a6ec74c1 110# else
1109a392 111# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
112# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
113# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
114# else
08ca2aa3 115 ++++ bad cray byte order
a6ec74c1
JH
116# endif
117# endif
a6ec74c1 118#else
08ca2aa3
TH
119# define OFF16(p) ((char *) (p))
120# define OFF32(p) ((char *) (p))
a6ec74c1
JH
121#endif
122
f337b084
TH
123/* Only to be used inside a loop (see the break) */
124#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
125 if (utf8) { \
126 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
127 } else { \
128 Copy(s, OFF16(p), SIZE16, char); \
129 (s) += SIZE16; \
130 } \
131} STMT_END
132
133/* Only to be used inside a loop (see the break) */
134#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
135 if (utf8) { \
136 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
137 } else { \
138 Copy(s, OFF32(p), SIZE32, char); \
139 (s) += SIZE32; \
140 } \
141} STMT_END
142
143#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
144#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
145
146/* Only to be used inside a loop (see the break) */
f337b084 147#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
148STMT_START { \
149 if (utf8) { \
f337b084
TH
150 if (!uni_to_bytes(aTHX_ &s, strend, \
151 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
152 } else { \
153 Copy(s, (char *) &var, sizeof(var), char); \
154 s += sizeof(var); \
155 } \
08ca2aa3
TH
156} STMT_END
157
f337b084 158#define PUSH_VAR(utf8, aptr, var) \
230e1fce 159 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 160
49704364
WL
161/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
162#define MAX_SUB_TEMPLATE_LEVEL 100
163
66c611c5 164/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
165#define FLAG_WAS_UTF8 0x40
166#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 167#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 168#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
169#define FLAG_SLASH 0x04
170#define FLAG_COMMA 0x02
171#define FLAG_PACK 0x01
172
a6ec74c1
JH
173STATIC SV *
174S_mul128(pTHX_ SV *sv, U8 m)
175{
176 STRLEN len;
177 char *s = SvPV(sv, len);
178 char *t;
a6ec74c1 179
7918f24d
NC
180 PERL_ARGS_ASSERT_MUL128;
181
a6ec74c1 182 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 183 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
184
185 sv_catsv(tmpNew, sv);
186 SvREFCNT_dec(sv); /* free old sv */
187 sv = tmpNew;
188 s = SvPV(sv, len);
189 }
190 t = s + len - 1;
191 while (!*t) /* trailing '\0'? */
192 t--;
193 while (t > s) {
f7fe979e 194 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
195 *(t--) = '0' + (char)(i % 10);
196 m = (char)(i / 10);
a6ec74c1
JH
197 }
198 return (sv);
199}
200
201/* Explosives and implosives. */
202
203#if 'I' == 73 && 'J' == 74
204/* On an ASCII/ISO kind of system */
205#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
206#else
207/*
208 Some other sort of character set - use memchr() so we don't match
209 the null byte.
210 */
211#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
212#endif
213
66c611c5 214/* type modifiers */
62f95557 215#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
216#define TYPE_IS_BIG_ENDIAN 0x200
217#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 218#define TYPE_IS_PACK 0x800
1109a392 219#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 220#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
221#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222
7212898e 223#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 224# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 225#else
28be1210 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
227#endif
228
229#ifndef PERL_PACK_CAN_BYTEORDER
230/* Put "can't" first because it is shorter */
231# define TYPE_ENDIANNESS(t) 0
232# define TYPE_NO_ENDIANNESS(t) (t)
233
234# define ENDIANNESS_ALLOWED_TYPES ""
235
236# define DO_BO_UNPACK(var, type)
237# define DO_BO_PACK(var, type)
07409e01
NC
238# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
239# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
240# define DO_BO_UNPACK_N(var, type)
241# define DO_BO_PACK_N(var, type)
242# define DO_BO_UNPACK_P(var)
243# define DO_BO_PACK_P(var)
45f723e1
NC
244# define DO_BO_UNPACK_PC(var)
245# define DO_BO_PACK_PC(var)
66c611c5 246
f337b084 247#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
248
249# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
250# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251
252# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253
254# define DO_BO_UNPACK(var, type) \
1109a392 255 STMT_START { \
66c611c5 256 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
257 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
258 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
259 default: break; \
260 } \
261 } STMT_END
262
7212898e 263# define DO_BO_PACK(var, type) \
1109a392 264 STMT_START { \
66c611c5 265 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
266 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
267 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
268 default: break; \
269 } \
270 } STMT_END
271
07409e01 272# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 273 STMT_START { \
66c611c5 274 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 275 case TYPE_IS_BIG_ENDIAN: \
07409e01 276 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
277 break; \
278 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 279 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
280 break; \
281 default: \
282 break; \
283 } \
284 } STMT_END
285
07409e01 286# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 287 STMT_START { \
66c611c5 288 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 289 case TYPE_IS_BIG_ENDIAN: \
07409e01 290 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
291 break; \
292 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 293 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 default: \
296 break; \
297 } \
298 } STMT_END
299
7212898e 300# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
301 STMT_START { \
302 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
303 case TYPE_IS_BIG_ENDIAN: \
304 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
305 "platform", #action, #type); \
306 break; \
307 case TYPE_IS_LITTLE_ENDIAN: \
308 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
309 "platform", #action, #type); \
310 break; \
311 default: \
312 break; \
313 } \
314 } STMT_END
315
7212898e 316# if PTRSIZE == INTSIZE
07409e01
NC
317# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
318# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
319# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
320# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 321# elif PTRSIZE == LONGSIZE
07409e01
NC
322# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
323# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
324# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
325# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
326# elif PTRSIZE == IVSIZE
327# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
328# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
329# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
330# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
331# else
332# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
333# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
334# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
335# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 336# endif
1109a392 337
7212898e 338# if defined(my_htolen) && defined(my_letohn) && \
1109a392 339 defined(my_htoben) && defined(my_betohn)
7212898e 340# define DO_BO_UNPACK_N(var, type) \
1109a392 341 STMT_START { \
66c611c5 342 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
343 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
344 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
345 default: break; \
346 } \
347 } STMT_END
348
7212898e 349# define DO_BO_PACK_N(var, type) \
1109a392 350 STMT_START { \
66c611c5 351 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
352 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
353 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
354 default: break; \
355 } \
356 } STMT_END
7212898e
NC
357# else
358# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
359# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
360# endif
361
f337b084 362#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 363
78d46eaa 364#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 365#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
366#define PACK_SIZE_MASK 0x3F
367
78d46eaa
NC
368/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
369 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
370
371#if TYPE_IS_SHRIEKING != 0x100
372 ++++shriek offset should be 256
373#endif
374
375typedef U8 packprops_t;
78d46eaa
NC
376#if 'J'-'I' == 1
377/* ASCII */
f43a9a31 378STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
379 /* normal */
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0,
1651fc44 385 /* C */ sizeof(unsigned char),
80a13697 386#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 387 /* D */ LONG_DOUBLESIZE,
80a13697 388#else
a7a3cfaa 389 0,
80a13697 390#endif
a7a3cfaa
TH
391 0,
392 /* F */ NVSIZE,
393 0, 0,
394 /* I */ sizeof(unsigned int),
395 /* J */ UVSIZE,
396 0,
397 /* L */ SIZE32,
398 0,
399 /* N */ SIZE32,
400 0, 0,
80a13697 401#if defined(HAS_QUAD)
a7a3cfaa 402 /* Q */ sizeof(Uquad_t),
80a13697 403#else
a7a3cfaa 404 0,
80a13697 405#endif
a7a3cfaa
TH
406 0,
407 /* S */ SIZE16,
408 0,
409 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* V */ SIZE32,
411 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413 /* c */ sizeof(char),
414 /* d */ sizeof(double),
415 0,
416 /* f */ sizeof(float),
417 0, 0,
418 /* i */ sizeof(int),
419 /* j */ IVSIZE,
420 0,
421 /* l */ SIZE32,
422 0,
423 /* n */ SIZE16,
424 0,
425 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 426#if defined(HAS_QUAD)
a7a3cfaa 427 /* q */ sizeof(Quad_t),
80a13697 428#else
a7a3cfaa 429 0,
80a13697 430#endif
a7a3cfaa
TH
431 0,
432 /* s */ SIZE16,
433 0, 0,
434 /* v */ SIZE16,
435 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0,
445 /* shrieking */
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 /* I */ sizeof(unsigned int),
452 0, 0,
453 /* L */ sizeof(unsigned long),
454 0,
7212898e 455#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 456 /* N */ SIZE32,
7212898e 457#else
a7a3cfaa 458 0,
7212898e 459#endif
a7a3cfaa
TH
460 0, 0, 0, 0,
461 /* S */ sizeof(unsigned short),
462 0, 0,
7212898e 463#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 464 /* V */ SIZE32,
7212898e 465#else
a7a3cfaa 466 0,
7212898e 467#endif
a7a3cfaa
TH
468 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469 0, 0,
470 /* i */ sizeof(int),
471 0, 0,
472 /* l */ sizeof(long),
473 0,
7212898e 474#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 475 /* n */ SIZE16,
7212898e 476#else
a7a3cfaa 477 0,
7212898e 478#endif
a7a3cfaa
TH
479 0, 0, 0, 0,
480 /* s */ sizeof(short),
481 0, 0,
7212898e 482#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 483 /* v */ SIZE16,
7212898e 484#else
a7a3cfaa 485 0,
7212898e 486#endif
a7a3cfaa
TH
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
496};
497#else
498/* EBCDIC (or bust) */
f43a9a31 499STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
500 /* normal */
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0,
510 /* c */ sizeof(char),
511 /* d */ sizeof(double),
512 0,
513 /* f */ sizeof(float),
514 0, 0,
515 /* i */ sizeof(int),
516 0, 0, 0, 0, 0, 0, 0,
517 /* j */ IVSIZE,
518 0,
519 /* l */ SIZE32,
520 0,
521 /* n */ SIZE16,
522 0,
523 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 524#if defined(HAS_QUAD)
a7a3cfaa 525 /* q */ sizeof(Quad_t),
80a13697 526#else
a7a3cfaa 527 0,
80a13697 528#endif
a7a3cfaa
TH
529 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 /* s */ SIZE16,
531 0, 0,
532 /* v */ SIZE16,
533 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 536 /* C */ sizeof(unsigned char),
80a13697 537#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 538 /* D */ LONG_DOUBLESIZE,
80a13697 539#else
a7a3cfaa 540 0,
80a13697 541#endif
a7a3cfaa
TH
542 0,
543 /* F */ NVSIZE,
544 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0,
547 /* J */ UVSIZE,
548 0,
549 /* L */ SIZE32,
550 0,
551 /* N */ SIZE32,
552 0, 0,
80a13697 553#if defined(HAS_QUAD)
a7a3cfaa 554 /* Q */ sizeof(Uquad_t),
80a13697 555#else
a7a3cfaa 556 0,
80a13697 557#endif
a7a3cfaa
TH
558 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 /* S */ SIZE16,
560 0,
561 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* V */ SIZE32,
563 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 /* shrieking */
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* i */ sizeof(int),
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* l */ sizeof(long),
579 0,
7212898e 580#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 581 /* n */ SIZE16,
7212898e 582#else
a7a3cfaa 583 0,
7212898e 584#endif
a7a3cfaa
TH
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 /* s */ sizeof(short),
587 0, 0,
7212898e 588#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 589 /* v */ SIZE16,
7212898e 590#else
a7a3cfaa 591 0,
7212898e 592#endif
a7a3cfaa
TH
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0,
596 /* I */ sizeof(unsigned int),
597 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 /* L */ sizeof(unsigned long),
599 0,
7212898e 600#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 601 /* N */ SIZE32,
7212898e 602#else
a7a3cfaa 603 0,
7212898e 604#endif
a7a3cfaa
TH
605 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
606 /* S */ sizeof(unsigned short),
607 0, 0,
7212898e 608#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 609 /* V */ SIZE32,
7212898e 610#else
a7a3cfaa 611 0,
7212898e 612#endif
a7a3cfaa
TH
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
615};
616#endif
617
08ca2aa3 618STATIC U8
f7fe979e 619uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 620{
08ca2aa3 621 STRLEN retlen;
0bcc34c2 622 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 623 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
624 /* We try to process malformed UTF-8 as much as possible (preferrably with
625 warnings), but these two mean we make no progress in the string and
626 might enter an infinite loop */
627 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
628 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
629 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 630 if (val >= 0x100) {
f337b084 631 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
632 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
633 "Character in '%c' format wrapped in unpack",
f337b084 634 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
635 val &= 0xff;
636 }
637 *s += retlen;
fe2774ed 638 return (U8)val;
08ca2aa3
TH
639}
640
f337b084
TH
641#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
642 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
643 *(U8 *)(s)++)
644
645STATIC bool
f7fe979e 646uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
647{
648 UV val;
649 STRLEN retlen;
f7fe979e 650 const char *from = *s;
08ca2aa3 651 int bad = 0;
f7fe979e 652 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
653 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
654 for (;buf_len > 0; buf_len--) {
655 if (from >= end) return FALSE;
f337b084 656 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
657 if (retlen == (STRLEN) -1 || retlen == 0) {
658 from += UTF8SKIP(from);
659 bad |= 1;
660 } else from += retlen;
661 if (val >= 0x100) {
662 bad |= 2;
663 val &= 0xff;
664 }
fe2774ed 665 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
666 }
667 /* We have enough characters for the buffer. Did we have problems ? */
668 if (bad) {
669 if (bad & 1) {
670 /* Rewalk the string fragment while warning */
f7fe979e 671 const char *ptr;
9e27e96a 672 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
673 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
674 if (ptr >= end) break;
f337b084 675 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
676 }
677 if (from > end) from = end;
678 }
679 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 680 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
681 WARN_PACK : WARN_UNPACK),
682 "Character(s) in '%c' format wrapped in %s",
fc241834 683 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 684 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
685 }
686 *s = from;
687 return TRUE;
688}
689
690STATIC bool
f7fe979e 691next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 692{
97aff369 693 dVAR;
08ca2aa3 694 STRLEN retlen;
0bcc34c2 695 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
696 if (val >= 0x100 || !ISUUCHAR(val) ||
697 retlen == (STRLEN) -1 || retlen == 0) {
698 *out = 0;
699 return FALSE;
700 }
701 *out = PL_uudmap[val] & 077;
f337b084 702 *s += retlen;
08ca2aa3
TH
703 return TRUE;
704}
78d46eaa 705
64844641 706STATIC char *
14333449 707S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 708 const U8 * const end = start + len;
64844641 709
7918f24d
NC
710 PERL_ARGS_ASSERT_BYTES_TO_UNI;
711
f337b084 712 while (start < end) {
48fa4626
TS
713 const UV uv = NATIVE_TO_ASCII(*start);
714 if (UNI_IS_INVARIANT(uv))
715 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
716 else {
717 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
718 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
719 }
720 start++;
721 }
64844641 722 return dest;
f337b084
TH
723}
724
230e1fce
NC
725#define PUSH_BYTES(utf8, cur, buf, len) \
726STMT_START { \
64844641
AL
727 if (utf8) \
728 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
729 else { \
730 Copy(buf, cur, len, char); \
731 (cur) += (len); \
732 } \
f337b084
TH
733} STMT_END
734
735#define GROWING(utf8, cat, start, cur, in_len) \
736STMT_START { \
737 STRLEN glen = (in_len); \
3473cf63 738 if (utf8) glen *= UTF8_EXPAND; \
f337b084 739 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 740 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
741 (cur) = (start) + SvCUR(cat); \
742 } \
743} STMT_END
744
745#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
746STMT_START { \
f7fe979e 747 const STRLEN glen = (in_len); \
f337b084 748 STRLEN gl = glen; \
3473cf63 749 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
750 if ((cur) + gl >= (start) + SvLEN(cat)) { \
751 *cur = '\0'; \
b162af07 752 SvCUR_set((cat), (cur) - (start)); \
0bd48802 753 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
754 (cur) = (start) + SvCUR(cat); \
755 } \
756 PUSH_BYTES(utf8, cur, buf, glen); \
757} STMT_END
758
759#define PUSH_BYTE(utf8, s, byte) \
760STMT_START { \
761 if (utf8) { \
f7fe979e 762 const U8 au8 = (byte); \
64844641 763 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
764 } else *(U8 *)(s)++ = (byte); \
765} STMT_END
766
767/* Only to be used inside a loop (see the break) */
768#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
769STMT_START { \
770 STRLEN retlen; \
771 if (str >= end) break; \
772 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
773 if (retlen == (STRLEN) -1 || retlen == 0) { \
774 *cur = '\0'; \
775 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
776 } \
777 str += retlen; \
778} STMT_END
779
f7fe979e
AL
780static const char *_action( const tempsym_t* symptr )
781{
10edeb5d 782 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
783}
784
206947d2 785/* Returns the sizeof() struct described by pat */
028d1f6d 786STATIC I32
f337b084 787S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 788{
f337b084 789 I32 total = 0;
206947d2 790
7918f24d
NC
791 PERL_ARGS_ASSERT_MEASURE_STRUCT;
792
49704364 793 while (next_symbol(symptr)) {
f337b084 794 I32 len;
f7fe979e 795 int size;
f337b084
TH
796
797 switch (symptr->howlen) {
fc241834 798 case e_star:
49704364 799 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 800 _action( symptr ) );
49704364 801 break;
f337b084
TH
802 default:
803 /* e_no_len and e_number */
804 len = symptr->length;
805 break;
49704364
WL
806 }
807
a7a3cfaa 808 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 809 if (!size) {
f7fe979e 810 int star;
80a13697
NC
811 /* endianness doesn't influence the size of a type */
812 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
813 default:
814 Perl_croak(aTHX_ "Invalid type '%c' in %s",
815 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 816 _action( symptr ) );
28be1210
TH
817#ifdef PERL_PACK_CAN_SHRIEKSIGN
818 case '.' | TYPE_IS_SHRIEKING:
819 case '@' | TYPE_IS_SHRIEKING:
820#endif
80a13697 821 case '@':
28be1210 822 case '.':
80a13697
NC
823 case '/':
824 case 'U': /* XXXX Is it correct? */
825 case 'w':
826 case 'u':
827 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 828 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 829 _action( symptr ) );
80a13697
NC
830 case '%':
831 size = 0;
832 break;
833 case '(':
fc241834
RGS
834 {
835 tempsym_t savsym = *symptr;
836 symptr->patptr = savsym.grpbeg;
837 symptr->patend = savsym.grpend;
838 /* XXXX Theoretically, we need to measure many times at
839 different positions, since the subexpression may contain
840 alignment commands, but be not of aligned length.
841 Need to detect this and croak(). */
842 size = measure_struct(symptr);
843 *symptr = savsym;
844 break;
845 }
80a13697
NC
846 case 'X' | TYPE_IS_SHRIEKING:
847 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
848 */
849 if (!len) /* Avoid division by 0 */
850 len = 1;
851 len = total % len; /* Assumed: the start is aligned. */
852 /* FALL THROUGH */
853 case 'X':
854 size = -1;
855 if (total < len)
f7fe979e 856 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
857 break;
858 case 'x' | TYPE_IS_SHRIEKING:
859 if (!len) /* Avoid division by 0 */
860 len = 1;
861 star = total % len; /* Assumed: the start is aligned. */
862 if (star) /* Other portable ways? */
863 len = len - star;
864 else
865 len = 0;
866 /* FALL THROUGH */
867 case 'x':
868 case 'A':
869 case 'Z':
870 case 'a':
80a13697
NC
871 size = 1;
872 break;
873 case 'B':
874 case 'b':
875 len = (len + 7)/8;
876 size = 1;
877 break;
878 case 'H':
879 case 'h':
880 len = (len + 1)/2;
881 size = 1;
882 break;
78d46eaa 883
80a13697
NC
884 case 'P':
885 len = 1;
886 size = sizeof(char*);
78d46eaa
NC
887 break;
888 }
206947d2
IZ
889 }
890 total += len * size;
891 }
892 return total;
893}
894
49704364
WL
895
896/* locate matching closing parenthesis or bracket
897 * returns char pointer to char after match, or NULL
898 */
f7fe979e
AL
899STATIC const char *
900S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 901{
7918f24d
NC
902 PERL_ARGS_ASSERT_GROUP_END;
903
49704364 904 while (patptr < patend) {
f7fe979e 905 const char c = *patptr++;
49704364
WL
906
907 if (isSPACE(c))
908 continue;
909 else if (c == ender)
910 return patptr-1;
911 else if (c == '#') {
912 while (patptr < patend && *patptr != '\n')
913 patptr++;
914 continue;
915 } else if (c == '(')
916 patptr = group_end(patptr, patend, ')') + 1;
917 else if (c == '[')
918 patptr = group_end(patptr, patend, ']') + 1;
18529408 919 }
49704364
WL
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
921 ender);
922 return 0;
18529408
IZ
923}
924
49704364
WL
925
926/* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
fc241834 929 */
f7fe979e
AL
930STATIC const char *
931S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
932{
933 I32 len = *patptr++ - '0';
7918f24d
NC
934
935 PERL_ARGS_ASSERT_GET_NUM;
936
49704364
WL
937 while (isDIGIT(*patptr)) {
938 if (len >= 0x7FFFFFFF/10)
939 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
940 len = (len * 10) + (*patptr++ - '0');
941 }
942 *lenptr = len;
943 return patptr;
944}
945
946/* The marvellous template parsing routine: Using state stored in *symptr,
947 * locates next template code and count
948 */
949STATIC bool
f337b084 950S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 951{
f7fe979e 952 const char* patptr = symptr->patptr;
0bcc34c2 953 const char* const patend = symptr->patend;
49704364 954
7918f24d
NC
955 PERL_ARGS_ASSERT_NEXT_SYMBOL;
956
49704364
WL
957 symptr->flags &= ~FLAG_SLASH;
958
959 while (patptr < patend) {
960 if (isSPACE(*patptr))
961 patptr++;
962 else if (*patptr == '#') {
963 patptr++;
964 while (patptr < patend && *patptr != '\n')
965 patptr++;
966 if (patptr < patend)
967 patptr++;
968 } else {
fc241834 969 /* We should have found a template code */
49704364 970 I32 code = *patptr++ & 0xFF;
66c611c5 971 U32 inherited_modifiers = 0;
49704364
WL
972
973 if (code == ','){ /* grandfather in commas but with a warning */
974 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
975 symptr->flags |= FLAG_COMMA;
976 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 977 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
978 }
979 continue;
980 }
fc241834 981
49704364 982 /* for '(', skip to ')' */
fc241834 983 if (code == '(') {
49704364
WL
984 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
985 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 986 _action( symptr ) );
49704364
WL
987 symptr->grpbeg = patptr;
988 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
989 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
990 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 991 _action( symptr ) );
49704364
WL
992 }
993
66c611c5
MHM
994 /* look for group modifiers to inherit */
995 if (TYPE_ENDIANNESS(symptr->flags)) {
996 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
997 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
998 }
999
1109a392
MHM
1000 /* look for modifiers */
1001 while (patptr < patend) {
b7787f18
AL
1002 const char *allowed;
1003 I32 modifier;
1109a392
MHM
1004 switch (*patptr) {
1005 case '!':
1006 modifier = TYPE_IS_SHRIEKING;
7212898e 1007 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1008 break;
7212898e 1009#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1010 case '>':
1011 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1012 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1013 break;
1014 case '<':
1015 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1016 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1017 break;
f337b084 1018#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1019 default:
b7787f18
AL
1020 allowed = "";
1021 modifier = 0;
1109a392
MHM
1022 break;
1023 }
66c611c5 1024
1109a392
MHM
1025 if (modifier == 0)
1026 break;
66c611c5 1027
1109a392
MHM
1028 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1029 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1030 allowed, _action( symptr ) );
66c611c5
MHM
1031
1032 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1033 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1034 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1035 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1036 TYPE_ENDIANNESS_MASK)
1037 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1038 *patptr, _action( symptr ) );
66c611c5 1039
041457d9 1040 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1041 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1042 "Duplicate modifier '%c' after '%c' in %s",
1043 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1044 _action( symptr ) );
1109a392 1045 }
66c611c5 1046
1109a392
MHM
1047 code |= modifier;
1048 patptr++;
49704364
WL
1049 }
1050
66c611c5
MHM
1051 /* inherit modifiers */
1052 code |= inherited_modifiers;
1053
fc241834 1054 /* look for count and/or / */
49704364
WL
1055 if (patptr < patend) {
1056 if (isDIGIT(*patptr)) {
1057 patptr = get_num( patptr, &symptr->length );
1058 symptr->howlen = e_number;
1059
1060 } else if (*patptr == '*') {
1061 patptr++;
1062 symptr->howlen = e_star;
1063
1064 } else if (*patptr == '[') {
f7fe979e 1065 const char* lenptr = ++patptr;
49704364
WL
1066 symptr->howlen = e_number;
1067 patptr = group_end( patptr, patend, ']' ) + 1;
1068 /* what kind of [] is it? */
1069 if (isDIGIT(*lenptr)) {
1070 lenptr = get_num( lenptr, &symptr->length );
1071 if( *lenptr != ']' )
1072 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1073 _action( symptr ) );
49704364
WL
1074 } else {
1075 tempsym_t savsym = *symptr;
1076 symptr->patend = patptr-1;
1077 symptr->patptr = lenptr;
1078 savsym.length = measure_struct(symptr);
1079 *symptr = savsym;
1080 }
1081 } else {
1082 symptr->howlen = e_no_len;
1083 symptr->length = 1;
1084 }
1085
1086 /* try to find / */
1087 while (patptr < patend) {
1088 if (isSPACE(*patptr))
1089 patptr++;
1090 else if (*patptr == '#') {
1091 patptr++;
1092 while (patptr < patend && *patptr != '\n')
1093 patptr++;
1094 if (patptr < patend)
1095 patptr++;
1096 } else {
66c611c5 1097 if (*patptr == '/') {
49704364
WL
1098 symptr->flags |= FLAG_SLASH;
1099 patptr++;
66c611c5
MHM
1100 if (patptr < patend &&
1101 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1102 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1103 _action( symptr ) );
49704364
WL
1104 }
1105 break;
1106 }
18529408 1107 }
49704364
WL
1108 } else {
1109 /* at end - no count, no / */
1110 symptr->howlen = e_no_len;
1111 symptr->length = 1;
1112 }
1113
1114 symptr->code = code;
fc241834 1115 symptr->patptr = patptr;
49704364 1116 return TRUE;
18529408 1117 }
49704364 1118 }
fc241834 1119 symptr->patptr = patptr;
49704364 1120 return FALSE;
18529408
IZ
1121}
1122
18529408 1123/*
fc241834 1124 There is no way to cleanly handle the case where we should process the
08ca2aa3 1125 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1126 (e.g. estimates like strend-s as an upper bound for the number of
1127 characters left wouldn't work). So if we foresee the need of this
1128 (pattern starts with U or contains U0), we want to work on the encoded
1129 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1130 themselves if they need to do a lot of unpacks like this on it
1131*/
fc241834 1132STATIC bool
08ca2aa3
TH
1133need_utf8(const char *pat, const char *patend)
1134{
1135 bool first = TRUE;
7918f24d
NC
1136
1137 PERL_ARGS_ASSERT_NEED_UTF8;
1138
08ca2aa3
TH
1139 while (pat < patend) {
1140 if (pat[0] == '#') {
1141 pat++;
f7fe979e 1142 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1143 if (!pat) return FALSE;
1144 } else if (pat[0] == 'U') {
1145 if (first || pat[1] == '0') return TRUE;
1146 } else first = FALSE;
1147 pat++;
1148 }
1149 return FALSE;
1150}
1151
1152STATIC char
1153first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1154 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1155
08ca2aa3
TH
1156 while (pat < patend) {
1157 if (pat[0] != '#') return pat[0];
1158 pat++;
f7fe979e 1159 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1160 if (!pat) return 0;
1161 pat++;
1162 }
1163 return 0;
1164}
1165
1166/*
7accc089
JH
1167=for apidoc unpackstring
1168
608d3aed
WL
1169The engine implementing unpack() Perl function. C<unpackstring> puts the
1170extracted list items on the stack and returns the number of elements.
1171Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1172
1173=cut */
1174
1175I32
f7fe979e 1176Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1177{
f7fe979e 1178 tempsym_t sym;
08ca2aa3 1179
7918f24d
NC
1180 PERL_ARGS_ASSERT_UNPACKSTRING;
1181
f337b084 1182 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1183 else if (need_utf8(pat, patend)) {
1184 /* We probably should try to avoid this in case a scalar context call
1185 wouldn't get to the "U0" */
1186 STRLEN len = strend - s;
230e1fce 1187 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1188 SAVEFREEPV(s);
1189 strend = s + len;
f337b084 1190 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1191 }
1192
f337b084
TH
1193 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1194 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1195
f7fe979e 1196 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1197
1198 return unpack_rec(&sym, s, s, strend, NULL );
1199}
1200
4136a0f7 1201STATIC I32
f7fe979e 1202S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1203{
27da23d5 1204 dVAR; dSP;
18529408 1205 SV *sv;
f7fe979e 1206 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1207 howlen_t howlen;
a6ec74c1 1208 I32 checksum = 0;
92d41999 1209 UV cuv = 0;
a6ec74c1 1210 NV cdouble = 0.0;
f337b084 1211 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1212 bool beyond = FALSE;
21c16052 1213 bool explicit_length;
9e27e96a 1214 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1215 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1216
1217 PERL_ARGS_ASSERT_UNPACK_REC;
1218
28be1210 1219 symptr->strbeg = s - strbeg;
49704364 1220
49704364 1221 while (next_symbol(symptr)) {
a7a3cfaa 1222 packprops_t props;
9e27e96a 1223 I32 len;
f337b084 1224 I32 datumtype = symptr->code;
206947d2 1225 /* do first one only unless in list context
08ca2aa3 1226 / is implemented by unpacking the count, then popping it from the
206947d2 1227 stack, so must check that we're not in the middle of a / */
49704364 1228 if ( unpack_only_one
206947d2 1229 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1230 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1231 break;
49704364 1232
f337b084 1233 switch (howlen = symptr->howlen) {
fc241834
RGS
1234 case e_star:
1235 len = strend - strbeg; /* long enough */
49704364 1236 break;
f337b084
TH
1237 default:
1238 /* e_no_len and e_number */
1239 len = symptr->length;
1240 break;
49704364 1241 }
18529408 1242
21c16052 1243 explicit_length = TRUE;
a6ec74c1 1244 redo_switch:
49704364 1245 beyond = s >= strend;
a7a3cfaa
TH
1246
1247 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1248 if (props) {
1249 /* props nonzero means we can process this letter. */
9e27e96a
AL
1250 const long size = props & PACK_SIZE_MASK;
1251 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1252 if (len > howmany)
1253 len = howmany;
1254
1255 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1256 if (len && unpack_only_one) len = 1;
1257 EXTEND(SP, len);
1258 EXTEND_MORTAL(len);
78d46eaa
NC
1259 }
1260 }
a7a3cfaa 1261
1109a392 1262 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1263 default:
1109a392 1264 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1265
a6ec74c1 1266 case '%':
49704364 1267 if (howlen == e_no_len)
18529408 1268 len = 16; /* len is not specified */
a6ec74c1 1269 checksum = len;
92d41999 1270 cuv = 0;
a6ec74c1 1271 cdouble = 0;
18529408 1272 continue;
a6ec74c1 1273 break;
18529408
IZ
1274 case '(':
1275 {
49704364 1276 tempsym_t savsym = *symptr;
9e27e96a 1277 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1278 symptr->flags |= group_modifiers;
49704364 1279 symptr->patend = savsym.grpend;
28be1210 1280 symptr->previous = &savsym;
49704364 1281 symptr->level++;
18529408 1282 PUTBACK;
c6f750d1 1283 if (len && unpack_only_one) len = 1;
18529408 1284 while (len--) {
49704364 1285 symptr->patptr = savsym.grpbeg;
f337b084
TH
1286 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1287 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1288 unpack_rec(symptr, s, strbeg, strend, &s);
1289 if (s == strend && savsym.howlen == e_star)
49704364 1290 break; /* No way to continue */
18529408
IZ
1291 }
1292 SPAGAIN;
28be1210 1293 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1294 *symptr = savsym;
18529408
IZ
1295 break;
1296 }
28be1210
TH
1297#ifdef PERL_PACK_CAN_SHRIEKSIGN
1298 case '.' | TYPE_IS_SHRIEKING:
1299#endif
1300 case '.': {
9e27e96a 1301 const char *from;
28be1210
TH
1302 SV *sv;
1303#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1304 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1305#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1306 const bool u8 = utf8;
28be1210
TH
1307#endif
1308 if (howlen == e_star) from = strbeg;
1309 else if (len <= 0) from = s;
1310 else {
1311 tempsym_t *group = symptr;
1312
1313 while (--len && group) group = group->previous;
1314 from = group ? strbeg + group->strbeg : strbeg;
1315 }
1316 sv = from <= s ?
00646304
CB
1317 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1318 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1319 mXPUSHs(sv);
28be1210
TH
1320 break;
1321 }
1322#ifdef PERL_PACK_CAN_SHRIEKSIGN
1323 case '@' | TYPE_IS_SHRIEKING:
1324#endif
a6ec74c1 1325 case '@':
28be1210
TH
1326 s = strbeg + symptr->strbeg;
1327#ifdef PERL_PACK_CAN_SHRIEKSIGN
1328 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1329#else /* PERL_PACK_CAN_SHRIEKSIGN */
1330 if (utf8)
1331#endif
1332 {
08ca2aa3
TH
1333 while (len > 0) {
1334 if (s >= strend)
1335 Perl_croak(aTHX_ "'@' outside of string in unpack");
1336 s += UTF8SKIP(s);
1337 len--;
1338 }
1339 if (s > strend)
1340 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1341 } else {
28be1210 1342 if (strend-s < len)
fc241834 1343 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1344 s += len;
08ca2aa3 1345 }
a6ec74c1 1346 break;
62f95557
IZ
1347 case 'X' | TYPE_IS_SHRIEKING:
1348 if (!len) /* Avoid division by 0 */
1349 len = 1;
08ca2aa3 1350 if (utf8) {
f7fe979e 1351 const char *hop, *last;
f337b084
TH
1352 I32 l = len;
1353 hop = last = strbeg;
1354 while (hop < s) {
1355 hop += UTF8SKIP(hop);
1356 if (--l == 0) {
08ca2aa3 1357 last = hop;
f337b084
TH
1358 l = len;
1359 }
fc241834 1360 }
f337b084
TH
1361 if (last > s)
1362 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1363 s = last;
1364 break;
f337b084
TH
1365 }
1366 len = (s - strbeg) % len;
62f95557 1367 /* FALL THROUGH */
a6ec74c1 1368 case 'X':
08ca2aa3
TH
1369 if (utf8) {
1370 while (len > 0) {
1371 if (s <= strbeg)
1372 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1373 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1374 if (s <= strbeg)
1375 Perl_croak(aTHX_ "'X' outside of string in unpack");
1376 }
1377 len--;
1378 }
1379 } else {
fc241834
RGS
1380 if (len > s - strbeg)
1381 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1382 s -= len;
08ca2aa3 1383 }
a6ec74c1 1384 break;
9e27e96a
AL
1385 case 'x' | TYPE_IS_SHRIEKING: {
1386 I32 ai32;
62f95557
IZ
1387 if (!len) /* Avoid division by 0 */
1388 len = 1;
230e1fce
NC
1389 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1390 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1391 if (ai32 == 0) break;
1392 len -= ai32;
9e27e96a 1393 }
62f95557 1394 /* FALL THROUGH */
a6ec74c1 1395 case 'x':
08ca2aa3
TH
1396 if (utf8) {
1397 while (len>0) {
1398 if (s >= strend)
1399 Perl_croak(aTHX_ "'x' outside of string in unpack");
1400 s += UTF8SKIP(s);
1401 len--;
1402 }
1403 } else {
fc241834
RGS
1404 if (len > strend - s)
1405 Perl_croak(aTHX_ "'x' outside of string in unpack");
1406 s += len;
f337b084 1407 }
a6ec74c1
JH
1408 break;
1409 case '/':
49704364
WL
1410 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1411 break;
a6ec74c1
JH
1412 case 'A':
1413 case 'Z':
1414 case 'a':
08ca2aa3
TH
1415 if (checksum) {
1416 /* Preliminary length estimate is assumed done in 'W' */
1417 if (len > strend - s) len = strend - s;
1418 goto W_checksum;
1419 }
1420 if (utf8) {
1421 I32 l;
f7fe979e 1422 const char *hop;
08ca2aa3
TH
1423 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1424 if (hop >= strend) {
1425 if (hop > strend)
1426 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1427 break;
fc241834 1428 }
a6ec74c1 1429 }
08ca2aa3
TH
1430 if (hop > strend)
1431 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1432 len = hop - s;
1433 } else if (len > strend - s)
1434 len = strend - s;
1435
1436 if (datumtype == 'Z') {
1437 /* 'Z' strips stuff after first null */
f7fe979e 1438 const char *ptr, *end;
f337b084
TH
1439 end = s + len;
1440 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1441 sv = newSVpvn(s, ptr-s);
1442 if (howlen == e_star) /* exact for 'Z*' */
1443 len = ptr-s + (ptr != strend ? 1 : 0);
1444 } else if (datumtype == 'A') {
1445 /* 'A' strips both nulls and spaces */
f7fe979e 1446 const char *ptr;
18bdf90a
TH
1447 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1448 for (ptr = s+len-1; ptr >= s; ptr--)
1449 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1450 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1451 if (ptr >= s) ptr += UTF8SKIP(ptr);
1452 else ptr++;
28be1210 1453 if (ptr > s+len)
18bdf90a
TH
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1455 } else {
1456 for (ptr = s+len-1; ptr >= s; ptr--)
1457 if (*ptr != 0 && !isSPACE(*ptr)) break;
1458 ptr++;
1459 }
08ca2aa3
TH
1460 sv = newSVpvn(s, ptr-s);
1461 } else sv = newSVpvn(s, len);
1462
1463 if (utf8) {
1464 SvUTF8_on(sv);
1465 /* Undo any upgrade done due to need_utf8() */
f337b084 1466 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1467 sv_utf8_downgrade(sv, 0);
a6ec74c1 1468 }
6e449a3a 1469 mXPUSHs(sv);
08ca2aa3 1470 s += len;
a6ec74c1
JH
1471 break;
1472 case 'B':
08ca2aa3
TH
1473 case 'b': {
1474 char *str;
49704364 1475 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1476 len = (strend - s) * 8;
1477 if (checksum) {
1478 if (!PL_bitcount) {
08ca2aa3 1479 int bits;
a02a5408 1480 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1481 for (bits = 1; bits < 256; bits++) {
1482 if (bits & 1) PL_bitcount[bits]++;
1483 if (bits & 2) PL_bitcount[bits]++;
1484 if (bits & 4) PL_bitcount[bits]++;
1485 if (bits & 8) PL_bitcount[bits]++;
1486 if (bits & 16) PL_bitcount[bits]++;
1487 if (bits & 32) PL_bitcount[bits]++;
1488 if (bits & 64) PL_bitcount[bits]++;
1489 if (bits & 128) PL_bitcount[bits]++;
1490 }
1491 }
f337b084 1492 if (utf8)
08ca2aa3 1493 while (len >= 8 && s < strend) {
f337b084 1494 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1495 len -= 8;
1496 }
f337b084 1497 else
fc241834 1498 while (len >= 8) {
08ca2aa3 1499 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1500 len -= 8;
1501 }
08ca2aa3
TH
1502 if (len && s < strend) {
1503 U8 bits;
f337b084
TH
1504 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1505 if (datumtype == 'b')
a6ec74c1 1506 while (len-- > 0) {
92d41999 1507 if (bits & 1) cuv++;
a6ec74c1
JH
1508 bits >>= 1;
1509 }
f337b084 1510 else
a6ec74c1 1511 while (len-- > 0) {
08ca2aa3 1512 if (bits & 0x80) cuv++;
a6ec74c1
JH
1513 bits <<= 1;
1514 }
fc241834 1515 }
a6ec74c1
JH
1516 break;
1517 }
08ca2aa3 1518
561b68a9 1519 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1520 SvPOK_on(sv);
1521 str = SvPVX(sv);
1522 if (datumtype == 'b') {
f337b084 1523 U8 bits = 0;
f7fe979e 1524 const I32 ai32 = len;
08ca2aa3
TH
1525 for (len = 0; len < ai32; len++) {
1526 if (len & 7) bits >>= 1;
1527 else if (utf8) {
1528 if (s >= strend) break;
f337b084 1529 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1530 } else bits = *(U8 *) s++;
1531 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1532 }
08ca2aa3 1533 } else {
f337b084 1534 U8 bits = 0;
f7fe979e 1535 const I32 ai32 = len;
08ca2aa3
TH
1536 for (len = 0; len < ai32; len++) {
1537 if (len & 7) bits <<= 1;
1538 else if (utf8) {
1539 if (s >= strend) break;
f337b084 1540 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1541 } else bits = *(U8 *) s++;
1542 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1543 }
1544 }
1545 *str = '\0';
aa07b2f6 1546 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1547 XPUSHs(sv);
a6ec74c1 1548 break;
08ca2aa3 1549 }
a6ec74c1 1550 case 'H':
08ca2aa3
TH
1551 case 'h': {
1552 char *str;
fc241834 1553 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1554 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1555 len = (strend - s) * 2;
561b68a9 1556 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1557 SvPOK_on(sv);
1558 str = SvPVX(sv);
1559 if (datumtype == 'h') {
f337b084 1560 U8 bits = 0;
9e27e96a 1561 I32 ai32 = len;
fc241834
RGS
1562 for (len = 0; len < ai32; len++) {
1563 if (len & 1) bits >>= 4;
1564 else if (utf8) {
1565 if (s >= strend) break;
f337b084 1566 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1567 } else bits = * (U8 *) s++;
a6ec74c1
JH
1568 *str++ = PL_hexdigit[bits & 15];
1569 }
08ca2aa3 1570 } else {
f337b084 1571 U8 bits = 0;
f7fe979e 1572 const I32 ai32 = len;
08ca2aa3
TH
1573 for (len = 0; len < ai32; len++) {
1574 if (len & 1) bits <<= 4;
1575 else if (utf8) {
1576 if (s >= strend) break;
f337b084 1577 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1578 } else bits = *(U8 *) s++;
a6ec74c1
JH
1579 *str++ = PL_hexdigit[(bits >> 4) & 15];
1580 }
1581 }
1582 *str = '\0';
aa07b2f6 1583 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1584 XPUSHs(sv);
a6ec74c1 1585 break;
08ca2aa3 1586 }
1651fc44
ML
1587 case 'C':
1588 if (len == 0) {
1589 if (explicit_length)
1590 /* Switch to "character" mode */
1591 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1592 break;
1593 }
1594 /* FALL THROUGH */
a6ec74c1 1595 case 'c':
1651fc44
ML
1596 while (len-- > 0 && s < strend) {
1597 int aint;
1598 if (utf8)
1599 {
1600 STRLEN retlen;
1601 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1602 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1605 s += retlen;
1606 }
1607 else
1608 aint = *(U8 *)(s)++;
1609 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1610 aint -= 256;
08ca2aa3 1611 if (!checksum)
6e449a3a 1612 mPUSHi(aint);
73cb7263
NC
1613 else if (checksum > bits_in_uv)
1614 cdouble += (NV)aint;
1615 else
1616 cuv += aint;
a6ec74c1
JH
1617 }
1618 break;
08ca2aa3
TH
1619 case 'W':
1620 W_checksum:
1651fc44 1621 if (utf8) {
08ca2aa3 1622 while (len-- > 0 && s < strend) {
08ca2aa3 1623 STRLEN retlen;
f7fe979e 1624 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1626 if (retlen == (STRLEN) -1 || retlen == 0)
1627 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1628 s += retlen;
1629 if (!checksum)
6e449a3a 1630 mPUSHu(val);
08ca2aa3
TH
1631 else if (checksum > bits_in_uv)
1632 cdouble += (NV) val;
d6d3e8bd 1633 else
08ca2aa3 1634 cuv += val;
fc241834 1635 }
08ca2aa3 1636 } else if (!checksum)
a6ec74c1 1637 while (len-- > 0) {
f7fe979e 1638 const U8 ch = *(U8 *) s++;
6e449a3a 1639 mPUSHu(ch);
a6ec74c1 1640 }
08ca2aa3
TH
1641 else if (checksum > bits_in_uv)
1642 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1643 else
1644 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1645 break;
1646 case 'U':
35bcd338 1647 if (len == 0) {
08ca2aa3
TH
1648 if (explicit_length) {
1649 /* Switch to "bytes in UTF-8" mode */
f337b084 1650 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1651 else
1652 /* Should be impossible due to the need_utf8() test */
1653 Perl_croak(aTHX_ "U0 mode on a byte string");
1654 }
35bcd338
JH
1655 break;
1656 }
08ca2aa3 1657 if (len > strend - s) len = strend - s;
fc241834 1658 if (!checksum) {
08ca2aa3
TH
1659 if (len && unpack_only_one) len = 1;
1660 EXTEND(SP, len);
1661 EXTEND_MORTAL(len);
fc241834 1662 }
08ca2aa3
TH
1663 while (len-- > 0 && s < strend) {
1664 STRLEN retlen;
1665 UV auv;
1666 if (utf8) {
1667 U8 result[UTF8_MAXLEN];
f7fe979e 1668 const char *ptr = s;
08ca2aa3 1669 STRLEN len;
08ca2aa3
TH
1670 /* Bug: warns about bad utf8 even if we are short on bytes
1671 and will break out of the loop */
230e1fce
NC
1672 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1673 'U'))
08ca2aa3
TH
1674 break;
1675 len = UTF8SKIP(result);
fc241834 1676 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1677 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1678 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1679 s = ptr;
1680 } else {
1681 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1682 if (retlen == (STRLEN) -1 || retlen == 0)
1683 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1684 s += retlen;
1685 }
1686 if (!checksum)
6e449a3a 1687 mPUSHu(auv);
73cb7263 1688 else if (checksum > bits_in_uv)
08ca2aa3 1689 cdouble += (NV) auv;
73cb7263 1690 else
08ca2aa3 1691 cuv += auv;
a6ec74c1
JH
1692 }
1693 break;
49704364
WL
1694 case 's' | TYPE_IS_SHRIEKING:
1695#if SHORTSIZE != SIZE16
73cb7263 1696 while (len-- > 0) {
08ca2aa3 1697 short ashort;
f337b084
TH
1698 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1699 DO_BO_UNPACK(ashort, s);
08ca2aa3 1700 if (!checksum)
6e449a3a 1701 mPUSHi(ashort);
73cb7263
NC
1702 else if (checksum > bits_in_uv)
1703 cdouble += (NV)ashort;
1704 else
1705 cuv += ashort;
49704364
WL
1706 }
1707 break;
1708#else
1709 /* Fallthrough! */
a6ec74c1 1710#endif
49704364 1711 case 's':
73cb7263 1712 while (len-- > 0) {
08ca2aa3
TH
1713 I16 ai16;
1714
1715#if U16SIZE > SIZE16
1716 ai16 = 0;
1717#endif
f337b084 1718 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1719 DO_BO_UNPACK(ai16, 16);
1109a392 1720#if U16SIZE > SIZE16
73cb7263
NC
1721 if (ai16 > 32767)
1722 ai16 -= 65536;
a6ec74c1 1723#endif
08ca2aa3 1724 if (!checksum)
6e449a3a 1725 mPUSHi(ai16);
73cb7263
NC
1726 else if (checksum > bits_in_uv)
1727 cdouble += (NV)ai16;
1728 else
1729 cuv += ai16;
a6ec74c1
JH
1730 }
1731 break;
49704364
WL
1732 case 'S' | TYPE_IS_SHRIEKING:
1733#if SHORTSIZE != SIZE16
73cb7263 1734 while (len-- > 0) {
08ca2aa3 1735 unsigned short aushort;
f337b084
TH
1736 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1737 DO_BO_UNPACK(aushort, s);
08ca2aa3 1738 if (!checksum)
6e449a3a 1739 mPUSHu(aushort);
73cb7263
NC
1740 else if (checksum > bits_in_uv)
1741 cdouble += (NV)aushort;
1742 else
1743 cuv += aushort;
49704364
WL
1744 }
1745 break;
1746#else
1747 /* Fallhrough! */
1748#endif
a6ec74c1
JH
1749 case 'v':
1750 case 'n':
1751 case 'S':
73cb7263 1752 while (len-- > 0) {
08ca2aa3
TH
1753 U16 au16;
1754#if U16SIZE > SIZE16
1755 au16 = 0;
1756#endif
f337b084 1757 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1758 DO_BO_UNPACK(au16, 16);
a6ec74c1 1759#ifdef HAS_NTOHS
73cb7263
NC
1760 if (datumtype == 'n')
1761 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1762#endif
1763#ifdef HAS_VTOHS
73cb7263
NC
1764 if (datumtype == 'v')
1765 au16 = vtohs(au16);
a6ec74c1 1766#endif
08ca2aa3 1767 if (!checksum)
6e449a3a 1768 mPUSHu(au16);
73cb7263 1769 else if (checksum > bits_in_uv)
f337b084 1770 cdouble += (NV) au16;
73cb7263
NC
1771 else
1772 cuv += au16;
a6ec74c1
JH
1773 }
1774 break;
7212898e 1775#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1776 case 'v' | TYPE_IS_SHRIEKING:
1777 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1778 while (len-- > 0) {
08ca2aa3
TH
1779 I16 ai16;
1780# if U16SIZE > SIZE16
1781 ai16 = 0;
1782# endif
f337b084 1783 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1784# ifdef HAS_NTOHS
73cb7263 1785 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1786 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1787# endif /* HAS_NTOHS */
1788# ifdef HAS_VTOHS
73cb7263 1789 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1790 ai16 = (I16) vtohs((U16) ai16);
1791# endif /* HAS_VTOHS */
1792 if (!checksum)
6e449a3a 1793 mPUSHi(ai16);
73cb7263 1794 else if (checksum > bits_in_uv)
08ca2aa3 1795 cdouble += (NV) ai16;
73cb7263
NC
1796 else
1797 cuv += ai16;
068bd2e7
MHM
1798 }
1799 break;
08ca2aa3 1800#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1801 case 'i':
49704364 1802 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1803 while (len-- > 0) {
08ca2aa3 1804 int aint;
f337b084
TH
1805 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1806 DO_BO_UNPACK(aint, i);
08ca2aa3 1807 if (!checksum)
6e449a3a 1808 mPUSHi(aint);
73cb7263
NC
1809 else if (checksum > bits_in_uv)
1810 cdouble += (NV)aint;
1811 else
1812 cuv += aint;
a6ec74c1
JH
1813 }
1814 break;
1815 case 'I':
49704364 1816 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1817 while (len-- > 0) {
08ca2aa3 1818 unsigned int auint;
f337b084
TH
1819 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1820 DO_BO_UNPACK(auint, i);
08ca2aa3 1821 if (!checksum)
6e449a3a 1822 mPUSHu(auint);
73cb7263
NC
1823 else if (checksum > bits_in_uv)
1824 cdouble += (NV)auint;
1825 else
1826 cuv += auint;
a6ec74c1
JH
1827 }
1828 break;
92d41999 1829 case 'j':
73cb7263 1830 while (len-- > 0) {
08ca2aa3 1831 IV aiv;
f337b084 1832 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1833#if IVSIZE == INTSIZE
f337b084 1834 DO_BO_UNPACK(aiv, i);
1109a392 1835#elif IVSIZE == LONGSIZE
f337b084 1836 DO_BO_UNPACK(aiv, l);
1109a392 1837#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1838 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1839#else
1840 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1841#endif
08ca2aa3 1842 if (!checksum)
6e449a3a 1843 mPUSHi(aiv);
73cb7263
NC
1844 else if (checksum > bits_in_uv)
1845 cdouble += (NV)aiv;
1846 else
1847 cuv += aiv;
92d41999
JH
1848 }
1849 break;
1850 case 'J':
73cb7263 1851 while (len-- > 0) {
08ca2aa3 1852 UV auv;
f337b084 1853 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1854#if IVSIZE == INTSIZE
f337b084 1855 DO_BO_UNPACK(auv, i);
08ca2aa3 1856#elif IVSIZE == LONGSIZE
f337b084 1857 DO_BO_UNPACK(auv, l);
08ca2aa3 1858#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1859 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1860#else
1861 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1862#endif
08ca2aa3 1863 if (!checksum)
6e449a3a 1864 mPUSHu(auv);
73cb7263
NC
1865 else if (checksum > bits_in_uv)
1866 cdouble += (NV)auv;
1867 else
1868 cuv += auv;
92d41999
JH
1869 }
1870 break;
49704364
WL
1871 case 'l' | TYPE_IS_SHRIEKING:
1872#if LONGSIZE != SIZE32
73cb7263 1873 while (len-- > 0) {
08ca2aa3 1874 long along;
f337b084
TH
1875 SHIFT_VAR(utf8, s, strend, along, datumtype);
1876 DO_BO_UNPACK(along, l);
08ca2aa3 1877 if (!checksum)
6e449a3a 1878 mPUSHi(along);
73cb7263
NC
1879 else if (checksum > bits_in_uv)
1880 cdouble += (NV)along;
1881 else
1882 cuv += along;
49704364
WL
1883 }
1884 break;
1885#else
1886 /* Fallthrough! */
a6ec74c1 1887#endif
49704364 1888 case 'l':
73cb7263 1889 while (len-- > 0) {
08ca2aa3
TH
1890 I32 ai32;
1891#if U32SIZE > SIZE32
1892 ai32 = 0;
1893#endif
f337b084 1894 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1895 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1896#if U32SIZE > SIZE32
08ca2aa3 1897 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1898#endif
08ca2aa3 1899 if (!checksum)
6e449a3a 1900 mPUSHi(ai32);
73cb7263
NC
1901 else if (checksum > bits_in_uv)
1902 cdouble += (NV)ai32;
1903 else
1904 cuv += ai32;
a6ec74c1
JH
1905 }
1906 break;
49704364
WL
1907 case 'L' | TYPE_IS_SHRIEKING:
1908#if LONGSIZE != SIZE32
73cb7263 1909 while (len-- > 0) {
08ca2aa3 1910 unsigned long aulong;
f337b084
TH
1911 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1912 DO_BO_UNPACK(aulong, l);
08ca2aa3 1913 if (!checksum)
6e449a3a 1914 mPUSHu(aulong);
73cb7263
NC
1915 else if (checksum > bits_in_uv)
1916 cdouble += (NV)aulong;
1917 else
1918 cuv += aulong;
49704364
WL
1919 }
1920 break;
1921#else
1922 /* Fall through! */
1923#endif
a6ec74c1
JH
1924 case 'V':
1925 case 'N':
1926 case 'L':
73cb7263 1927 while (len-- > 0) {
08ca2aa3
TH
1928 U32 au32;
1929#if U32SIZE > SIZE32
1930 au32 = 0;
1931#endif
f337b084 1932 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1933 DO_BO_UNPACK(au32, 32);
a6ec74c1 1934#ifdef HAS_NTOHL
73cb7263
NC
1935 if (datumtype == 'N')
1936 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1937#endif
1938#ifdef HAS_VTOHL
73cb7263
NC
1939 if (datumtype == 'V')
1940 au32 = vtohl(au32);
a6ec74c1 1941#endif
08ca2aa3 1942 if (!checksum)
6e449a3a 1943 mPUSHu(au32);
fc241834
RGS
1944 else if (checksum > bits_in_uv)
1945 cdouble += (NV)au32;
1946 else
1947 cuv += au32;
a6ec74c1
JH
1948 }
1949 break;
7212898e 1950#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1951 case 'V' | TYPE_IS_SHRIEKING:
1952 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1953 while (len-- > 0) {
08ca2aa3
TH
1954 I32 ai32;
1955# if U32SIZE > SIZE32
1956 ai32 = 0;
1957# endif
f337b084 1958 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1959# ifdef HAS_NTOHL
73cb7263
NC
1960 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1961 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1962# endif
1963# ifdef HAS_VTOHL
73cb7263
NC
1964 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1965 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1966# endif
1967 if (!checksum)
6e449a3a 1968 mPUSHi(ai32);
73cb7263
NC
1969 else if (checksum > bits_in_uv)
1970 cdouble += (NV)ai32;
1971 else
1972 cuv += ai32;
068bd2e7
MHM
1973 }
1974 break;
08ca2aa3 1975#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1976 case 'p':
a6ec74c1 1977 while (len-- > 0) {
f7fe979e 1978 const char *aptr;
f337b084 1979 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1980 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1981 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1982 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1983 }
1984 break;
1985 case 'w':
a6ec74c1
JH
1986 {
1987 UV auv = 0;
1988 U32 bytes = 0;
fc241834 1989
08ca2aa3
TH
1990 while (len > 0 && s < strend) {
1991 U8 ch;
f337b084 1992 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1993 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1994 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1995 if (ch < 0x80) {
a6ec74c1 1996 bytes = 0;
6e449a3a 1997 mPUSHu(auv);
a6ec74c1
JH
1998 len--;
1999 auv = 0;
08ca2aa3 2000 continue;
a6ec74c1 2001 }
08ca2aa3 2002 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2003 const char *t;
a6ec74c1 2004
f5992bc4 2005 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2006 while (s < strend) {
f337b084 2007 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2008 sv = mul128(sv, (U8)(ch & 0x7f));
2009 if (!(ch & 0x80)) {
a6ec74c1
JH
2010 bytes = 0;
2011 break;
2012 }
2013 }
10516c54 2014 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2015 while (*t == '0')
2016 t++;
2017 sv_chop(sv, t);
6e449a3a 2018 mPUSHs(sv);
a6ec74c1
JH
2019 len--;
2020 auv = 0;
2021 }
2022 }
2023 if ((s >= strend) && bytes)
49704364 2024 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2025 }
2026 break;
2027 case 'P':
49704364
WL
2028 if (symptr->howlen == e_star)
2029 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2030 EXTEND(SP, 1);
2d3e0934 2031 if (s + sizeof(char*) <= strend) {
08ca2aa3 2032 char *aptr;
f337b084 2033 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2034 DO_BO_UNPACK_PC(aptr);
fc241834 2035 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2036 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2037 }
a6ec74c1
JH
2038 break;
2039#ifdef HAS_QUAD
2040 case 'q':
73cb7263 2041 while (len-- > 0) {
08ca2aa3 2042 Quad_t aquad;
f337b084
TH
2043 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2044 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2045 if (!checksum)
6e449a3a
MHM
2046 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2047 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2048 else if (checksum > bits_in_uv)
2049 cdouble += (NV)aquad;
2050 else
2051 cuv += aquad;
2052 }
a6ec74c1
JH
2053 break;
2054 case 'Q':
73cb7263 2055 while (len-- > 0) {
08ca2aa3 2056 Uquad_t auquad;
f337b084
TH
2057 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2058 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2059 if (!checksum)
6e449a3a
MHM
2060 mPUSHs(auquad <= UV_MAX ?
2061 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2062 else if (checksum > bits_in_uv)
2063 cdouble += (NV)auquad;
2064 else
2065 cuv += auquad;
a6ec74c1
JH
2066 }
2067 break;
08ca2aa3 2068#endif /* HAS_QUAD */
a6ec74c1
JH
2069 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2070 case 'f':
73cb7263 2071 while (len-- > 0) {
08ca2aa3 2072 float afloat;
f337b084 2073 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2074 DO_BO_UNPACK_N(afloat, float);
2075 if (!checksum)
6e449a3a 2076 mPUSHn(afloat);
08ca2aa3 2077 else
73cb7263 2078 cdouble += afloat;
fc241834 2079 }
a6ec74c1
JH
2080 break;
2081 case 'd':
73cb7263 2082 while (len-- > 0) {
08ca2aa3 2083 double adouble;
f337b084 2084 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2085 DO_BO_UNPACK_N(adouble, double);
2086 if (!checksum)
6e449a3a 2087 mPUSHn(adouble);
08ca2aa3 2088 else
73cb7263 2089 cdouble += adouble;
fc241834 2090 }
a6ec74c1 2091 break;
92d41999 2092 case 'F':
73cb7263 2093 while (len-- > 0) {
08ca2aa3 2094 NV anv;
f337b084 2095 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2096 DO_BO_UNPACK_N(anv, NV);
2097 if (!checksum)
6e449a3a 2098 mPUSHn(anv);
08ca2aa3 2099 else
73cb7263 2100 cdouble += anv;
fc241834 2101 }
92d41999
JH
2102 break;
2103#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2104 case 'D':
73cb7263 2105 while (len-- > 0) {
08ca2aa3 2106 long double aldouble;
f337b084 2107 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2108 DO_BO_UNPACK_N(aldouble, long double);
2109 if (!checksum)
6e449a3a 2110 mPUSHn(aldouble);
08ca2aa3
TH
2111 else
2112 cdouble += aldouble;
92d41999
JH
2113 }
2114 break;
2115#endif
a6ec74c1 2116 case 'u':
08ca2aa3 2117 {
f7fe979e 2118 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2119 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2120 if (l) SvPOK_on(sv);
2121 }
2122 if (utf8) {
2123 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2124 I32 a, b, c, d;
db187877 2125 char hunk[3];
08ca2aa3 2126
08ca2aa3
TH
2127 while (len > 0) {
2128 next_uni_uu(aTHX_ &s, strend, &a);
2129 next_uni_uu(aTHX_ &s, strend, &b);
2130 next_uni_uu(aTHX_ &s, strend, &c);
2131 next_uni_uu(aTHX_ &s, strend, &d);
2132 hunk[0] = (char)((a << 2) | (b >> 4));
2133 hunk[1] = (char)((b << 4) | (c >> 2));
2134 hunk[2] = (char)((c << 6) | d);
2135 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2136 len -= 3;
2137 }
2138 if (s < strend) {
f7fe979e
AL
2139 if (*s == '\n') {
2140 s++;
2141 }
08ca2aa3
TH
2142 else {
2143 /* possible checksum byte */
f7fe979e
AL
2144 const char *skip = s+UTF8SKIP(s);
2145 if (skip < strend && *skip == '\n')
2146 s = skip+1;
08ca2aa3
TH
2147 }
2148 }
2149 }
2150 } else {
fc241834
RGS
2151 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2152 I32 a, b, c, d;
db187877 2153 char hunk[3];
a6ec74c1 2154
fc241834
RGS
2155 len = PL_uudmap[*(U8*)s++] & 077;
2156 while (len > 0) {
2157 if (s < strend && ISUUCHAR(*s))
2158 a = PL_uudmap[*(U8*)s++] & 077;
2159 else
2160 a = 0;
2161 if (s < strend && ISUUCHAR(*s))
2162 b = PL_uudmap[*(U8*)s++] & 077;
2163 else
2164 b = 0;
2165 if (s < strend && ISUUCHAR(*s))
2166 c = PL_uudmap[*(U8*)s++] & 077;
2167 else
2168 c = 0;
2169 if (s < strend && ISUUCHAR(*s))
2170 d = PL_uudmap[*(U8*)s++] & 077;
2171 else
2172 d = 0;
2173 hunk[0] = (char)((a << 2) | (b >> 4));
2174 hunk[1] = (char)((b << 4) | (c >> 2));
2175 hunk[2] = (char)((c << 6) | d);
2176 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2177 len -= 3;
2178 }
2179 if (*s == '\n')
2180 s++;
2181 else /* possible checksum byte */
2182 if (s + 1 < strend && s[1] == '\n')
2183 s += 2;
a6ec74c1 2184 }
08ca2aa3
TH
2185 }
2186 XPUSHs(sv);
a6ec74c1
JH
2187 break;
2188 }
49704364 2189
a6ec74c1 2190 if (checksum) {
1109a392 2191 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2192 (checksum > bits_in_uv &&
08ca2aa3
TH
2193 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2194 NV trouble, anv;
a6ec74c1 2195
08ca2aa3 2196 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2197 while (checksum >= 16) {
2198 checksum -= 16;
08ca2aa3 2199 anv *= 65536.0;
a6ec74c1 2200 }
a6ec74c1 2201 while (cdouble < 0.0)
08ca2aa3
TH
2202 cdouble += anv;
2203 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2204 sv = newSVnv(cdouble);
a6ec74c1
JH
2205 }
2206 else {
fa8ec7c1
NC
2207 if (checksum < bits_in_uv) {
2208 UV mask = ((UV)1 << checksum) - 1;
92d41999 2209 cuv &= mask;
a6ec74c1 2210 }
c4c5f44a 2211 sv = newSVuv(cuv);
a6ec74c1 2212 }
6e449a3a 2213 mXPUSHs(sv);
a6ec74c1
JH
2214 checksum = 0;
2215 }
fc241834 2216
49704364
WL
2217 if (symptr->flags & FLAG_SLASH){
2218 if (SP - PL_stack_base - start_sp_offset <= 0)
2219 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2220 if( next_symbol(symptr) ){
2221 if( symptr->howlen == e_number )
2222 Perl_croak(aTHX_ "Count after length/code in unpack" );
2223 if( beyond ){
2224 /* ...end of char buffer then no decent length available */
2225 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2226 } else {
2227 /* take top of stack (hope it's numeric) */
2228 len = POPi;
2229 if( len < 0 )
2230 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2231 }
2232 } else {
2233 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2234 }
2235 datumtype = symptr->code;
21c16052 2236 explicit_length = FALSE;
49704364
WL
2237 goto redo_switch;
2238 }
a6ec74c1 2239 }
49704364 2240
18529408
IZ
2241 if (new_s)
2242 *new_s = s;
2243 PUTBACK;
2244 return SP - PL_stack_base - start_sp_offset;
2245}
2246
2247PP(pp_unpack)
2248{
97aff369 2249 dVAR;
18529408 2250 dSP;
bab9c0ac 2251 dPOPPOPssrl;
18529408
IZ
2252 I32 gimme = GIMME_V;
2253 STRLEN llen;
2254 STRLEN rlen;
5c144d81
NC
2255 const char *pat = SvPV_const(left, llen);
2256 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2257 const char *strend = s + rlen;
2258 const char *patend = pat + llen;
08ca2aa3 2259 I32 cnt;
18529408
IZ
2260
2261 PUTBACK;
7accc089 2262 cnt = unpackstring(pat, patend, s, strend,
49704364 2263 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2264 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2265
18529408
IZ
2266 SPAGAIN;
2267 if ( !cnt && gimme == G_SCALAR )
2268 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2269 RETURN;
2270}
2271
f337b084 2272STATIC U8 *
f7fe979e 2273doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2274{
f337b084 2275 *h++ = PL_uuemap[len];
a6ec74c1 2276 while (len > 2) {
f337b084
TH
2277 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2278 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2279 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2280 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2281 s += 3;
2282 len -= 3;
2283 }
2284 if (len > 0) {
f7fe979e 2285 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2286 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2287 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2288 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2289 *h++ = PL_uuemap[0];
a6ec74c1 2290 }
f337b084
TH
2291 *h++ = '\n';
2292 return h;
a6ec74c1
JH
2293}
2294
2295STATIC SV *
f7fe979e 2296S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2297{
8b6e33c7
AL
2298 SV *result = newSVpvn(s, l);
2299 char *const result_c = SvPV_nolen(result); /* convenience */
2300 char *out = result_c;
2301 bool skip = 1;
2302 bool ignore = 0;
a6ec74c1 2303
7918f24d
NC
2304 PERL_ARGS_ASSERT_IS_AN_INT;
2305
a6ec74c1
JH
2306 while (*s) {
2307 switch (*s) {
2308 case ' ':
2309 break;
2310 case '+':
2311 if (!skip) {
2312 SvREFCNT_dec(result);
2313 return (NULL);
2314 }
2315 break;
2316 case '0':
2317 case '1':
2318 case '2':
2319 case '3':
2320 case '4':
2321 case '5':
2322 case '6':
2323 case '7':
2324 case '8':
2325 case '9':
2326 skip = 0;
2327 if (!ignore) {
2328 *(out++) = *s;
2329 }
2330 break;
2331 case '.':
2332 ignore = 1;
2333 break;
2334 default:
2335 SvREFCNT_dec(result);
2336 return (NULL);
2337 }
2338 s++;
2339 }
2340 *(out++) = '\0';
2341 SvCUR_set(result, out - result_c);
2342 return (result);
2343}
2344
2345/* pnum must be '\0' terminated */
2346STATIC int
2347S_div128(pTHX_ SV *pnum, bool *done)
2348{
8b6e33c7
AL
2349 STRLEN len;
2350 char * const s = SvPV(pnum, len);
2351 char *t = s;
2352 int m = 0;
2353
7918f24d
NC
2354 PERL_ARGS_ASSERT_DIV128;
2355
8b6e33c7
AL
2356 *done = 1;
2357 while (*t) {
2358 const int i = m * 10 + (*t - '0');
2359 const int r = (i >> 7); /* r < 10 */
2360 m = i & 0x7F;
2361 if (r) {
2362 *done = 0;
2363 }
2364 *(t++) = '0' + r;
a6ec74c1 2365 }
8b6e33c7
AL
2366 *(t++) = '\0';
2367 SvCUR_set(pnum, (STRLEN) (t - s));
2368 return (m);
a6ec74c1
JH
2369}
2370
18529408 2371/*
7accc089
JH
2372=for apidoc packlist
2373
2374The engine implementing pack() Perl function.
2375
bfce84ec
AL
2376=cut
2377*/
7accc089
JH
2378
2379void
f7fe979e 2380Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2381{
97aff369 2382 dVAR;
aadb217d
JH
2383 tempsym_t sym;
2384
7918f24d
NC
2385 PERL_ARGS_ASSERT_PACKLIST;
2386
f7fe979e 2387 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2388
f337b084
TH
2389 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2390 Also make sure any UTF8 flag is loaded */
56eb0262 2391 SvPV_force_nolen(cat);
bfce84ec
AL
2392 if (DO_UTF8(cat))
2393 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2394
49704364
WL
2395 (void)pack_rec( cat, &sym, beglist, endlist );
2396}
2397
f337b084
TH
2398/* like sv_utf8_upgrade, but also repoint the group start markers */
2399STATIC void
2400marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2401 STRLEN len;
2402 tempsym_t *group;
f7fe979e
AL
2403 const char *from_ptr, *from_start, *from_end, **marks, **m;
2404 char *to_start, *to_ptr;
f337b084
TH
2405
2406 if (SvUTF8(sv)) return;
2407
aa07b2f6 2408 from_start = SvPVX_const(sv);
f337b084
TH
2409 from_end = from_start + SvCUR(sv);
2410 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2411 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2412 if (from_ptr == from_end) {
2413 /* Simple case: no character needs to be changed */
2414 SvUTF8_on(sv);
2415 return;
2416 }
2417
3473cf63 2418 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2419 Newx(to_start, len, char);
f337b084
TH
2420 Copy(from_start, to_start, from_ptr-from_start, char);
2421 to_ptr = to_start + (from_ptr-from_start);
2422
a02a5408 2423 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2424 for (group=sym_ptr; group; group = group->previous)
2425 marks[group->level] = from_start + group->strbeg;
2426 marks[sym_ptr->level+1] = from_end+1;
2427 for (m = marks; *m < from_ptr; m++)
2428 *m = to_start + (*m-from_start);
2429
2430 for (;from_ptr < from_end; from_ptr++) {
2431 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2432 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2433 }
2434 *to_ptr = 0;
2435
2436 while (*m == from_ptr) *m++ = to_ptr;
2437 if (m != marks + sym_ptr->level+1) {
2438 Safefree(marks);
2439 Safefree(to_start);
2440 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2441 }
2442 for (group=sym_ptr; group; group = group->previous)
2443 group->strbeg = marks[group->level] - to_start;
2444 Safefree(marks);
2445
2446 if (SvOOK(sv)) {
2447 if (SvIVX(sv)) {
b162af07 2448 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2449 from_start -= SvIVX(sv);
2450 SvIV_set(sv, 0);
2451 }
2452 SvFLAGS(sv) &= ~SVf_OOK;
2453 }
2454 if (SvLEN(sv) != 0)
2455 Safefree(from_start);
f880fe2f 2456 SvPV_set(sv, to_start);
b162af07
SP
2457 SvCUR_set(sv, to_ptr - to_start);
2458 SvLEN_set(sv, len);
f337b084
TH
2459 SvUTF8_on(sv);
2460}
2461
2462/* Exponential string grower. Makes string extension effectively O(n)
2463 needed says how many extra bytes we need (not counting the final '\0')
2464 Only grows the string if there is an actual lack of space
2465*/
2466STATIC char *
0bd48802 2467S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2468 const STRLEN cur = SvCUR(sv);
2469 const STRLEN len = SvLEN(sv);
f337b084 2470 STRLEN extend;
7918f24d
NC
2471
2472 PERL_ARGS_ASSERT_SV_EXP_GROW;
2473
f337b084
TH
2474 if (len - cur > needed) return SvPVX(sv);
2475 extend = needed > len ? needed : len;
2476 return SvGROW(sv, len+extend+1);
2477}
49704364
WL
2478
2479STATIC
2480SV **
f337b084 2481S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2482{
97aff369 2483 dVAR;
49704364 2484 tempsym_t lookahead;
f337b084
TH
2485 I32 items = endlist - beglist;
2486 bool found = next_symbol(symptr);
2487 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2488 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2489
7918f24d
NC
2490 PERL_ARGS_ASSERT_PACK_REC;
2491
f337b084
TH
2492 if (symptr->level == 0 && found && symptr->code == 'U') {
2493 marked_upgrade(aTHX_ cat, symptr);
2494 symptr->flags |= FLAG_DO_UTF8;
2495 utf8 = 0;
49704364 2496 }
f337b084 2497 symptr->strbeg = SvCUR(cat);
49704364
WL
2498
2499 while (found) {
f337b084
TH
2500 SV *fromstr;
2501 STRLEN fromlen;
2502 I32 len;
a0714e2c 2503 SV *lengthcode = NULL;
49704364 2504 I32 datumtype = symptr->code;
f337b084
TH
2505 howlen_t howlen = symptr->howlen;
2506 char *start = SvPVX(cat);
2507 char *cur = start + SvCUR(cat);
49704364 2508
f337b084
TH
2509#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2510
2511 switch (howlen) {
fc241834 2512 case e_star:
f337b084
TH
2513 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2514 0 : items;
2515 break;
2516 default:
2517 /* e_no_len and e_number */
2518 len = symptr->length;
49704364
WL
2519 break;
2520 }
2521
f337b084 2522 if (len) {
a7a3cfaa 2523 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2524
a7a3cfaa
TH
2525 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2526 /* We can process this letter. */
2527 STRLEN size = props & PACK_SIZE_MASK;
2528 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2529 }
f337b084
TH
2530 }
2531
49704364
WL
2532 /* Look ahead for next symbol. Do we have code/code? */
2533 lookahead = *symptr;
2534 found = next_symbol(&lookahead);
246f24af
TH
2535 if (symptr->flags & FLAG_SLASH) {
2536 IV count;
f337b084 2537 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2538 if (strchr("aAZ", lookahead.code)) {
2539 if (lookahead.howlen == e_number) count = lookahead.length;
2540 else {
ce399ba6
NC
2541 if (items > 0) {
2542 if (SvGAMAGIC(*beglist)) {
2543 /* Avoid reading the active data more than once
2544 by copying it to a temporary. */
2545 STRLEN len;
2546 const char *const pv = SvPV_const(*beglist, len);
740cce10 2547 SV *const temp
59cd0e26
NC
2548 = newSVpvn_flags(pv, len,
2549 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2550 *beglist = temp;
2551 }
246f24af
TH
2552 count = DO_UTF8(*beglist) ?
2553 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2554 }
246f24af
TH
2555 else count = 0;
2556 if (lookahead.code == 'Z') count++;
2557 }
2558 } else {
2559 if (lookahead.howlen == e_number && lookahead.length < items)
2560 count = lookahead.length;
2561 else count = items;
2562 }
2563 lookahead.howlen = e_number;
2564 lookahead.length = count;
2565 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2566 }
49704364 2567
fc241834
RGS
2568 /* Code inside the switch must take care to properly update
2569 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2570 doesn't simply leave using break */
1109a392 2571 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2572 default:
f337b084
TH
2573 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2574 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2575 case '%':
49704364 2576 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2577 {
2578 char *from;
2579#ifdef PERL_PACK_CAN_SHRIEKSIGN
2580 case '.' | TYPE_IS_SHRIEKING:
2581#endif
2582 case '.':
2583 if (howlen == e_star) from = start;
2584 else if (len == 0) from = cur;
2585 else {
2586 tempsym_t *group = symptr;
2587
2588 while (--len && group) group = group->previous;
2589 from = group ? start + group->strbeg : start;
2590 }
2591 fromstr = NEXTFROM;
2592 len = SvIV(fromstr);
2593 goto resize;
2594#ifdef PERL_PACK_CAN_SHRIEKSIGN
2595 case '@' | TYPE_IS_SHRIEKING:
2596#endif
a6ec74c1 2597 case '@':
28be1210
TH
2598 from = start + symptr->strbeg;
2599 resize:
2600#ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2602#else /* PERL_PACK_CAN_SHRIEKSIGN */
2603 if (utf8)
2604#endif
2605 if (len >= 0) {
2606 while (len && from < cur) {
2607 from += UTF8SKIP(from);
2608 len--;
2609 }
2610 if (from > cur)
2611 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2612 if (len) {
2613 /* Here we know from == cur */
2614 grow:
2615 GROWING(0, cat, start, cur, len);
2616 Zero(cur, len, char);
2617 cur += len;
2618 } else if (from < cur) {
2619 len = cur - from;
2620 goto shrink;
2621 } else goto no_change;
2622 } else {
2623 cur = from;
2624 len = -len;
2625 goto utf8_shrink;
f337b084 2626 }
28be1210
TH
2627 else {
2628 len -= cur - from;
f337b084 2629 if (len > 0) goto grow;
28be1210 2630 if (len == 0) goto no_change;
fc241834 2631 len = -len;
28be1210 2632 goto shrink;
f337b084 2633 }
a6ec74c1 2634 break;
28be1210 2635 }
fc241834 2636 case '(': {
49704364 2637 tempsym_t savsym = *symptr;
66c611c5
MHM
2638 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2639 symptr->flags |= group_modifiers;
49704364
WL
2640 symptr->patend = savsym.grpend;
2641 symptr->level++;
f337b084 2642 symptr->previous = &lookahead;
18529408 2643 while (len--) {
f337b084
TH
2644 U32 was_utf8;
2645 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2646 else symptr->flags &= ~FLAG_PARSE_UTF8;
2647 was_utf8 = SvUTF8(cat);
49704364 2648 symptr->patptr = savsym.grpbeg;
f337b084
TH
2649 beglist = pack_rec(cat, symptr, beglist, endlist);
2650 if (SvUTF8(cat) != was_utf8)
2651 /* This had better be an upgrade while in utf8==0 mode */
2652 utf8 = 1;
2653
49704364 2654 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2655 break; /* No way to continue */
2656 }
ee790063 2657 items = endlist - beglist;
f337b084
TH
2658 lookahead.flags = symptr->flags & ~group_modifiers;
2659 goto no_change;
18529408 2660 }
62f95557
IZ
2661 case 'X' | TYPE_IS_SHRIEKING:
2662 if (!len) /* Avoid division by 0 */
2663 len = 1;
f337b084
TH
2664 if (utf8) {
2665 char *hop, *last;
2666 I32 l = len;
2667 hop = last = start;
2668 while (hop < cur) {
2669 hop += UTF8SKIP(hop);
2670 if (--l == 0) {
2671 last = hop;
2672 l = len;
2673 }
2674 }
2675 if (last > cur)
2676 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2677 cur = last;
2678 break;
2679 }
2680 len = (cur-start) % len;
62f95557 2681 /* FALL THROUGH */
a6ec74c1 2682 case 'X':
f337b084
TH
2683 if (utf8) {
2684 if (len < 1) goto no_change;
28be1210 2685 utf8_shrink:
f337b084
TH
2686 while (len > 0) {
2687 if (cur <= start)
28be1210
TH
2688 Perl_croak(aTHX_ "'%c' outside of string in pack",
2689 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2690 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2691 if (cur <= start)
28be1210
TH
2692 Perl_croak(aTHX_ "'%c' outside of string in pack",
2693 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2694 }
2695 len--;
2696 }
2697 } else {
fc241834 2698 shrink:
f337b084 2699 if (cur - start < len)
28be1210
TH
2700 Perl_croak(aTHX_ "'%c' outside of string in pack",
2701 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2702 cur -= len;
2703 }
2704 if (cur < start+symptr->strbeg) {
2705 /* Make sure group starts don't point into the void */
2706 tempsym_t *group;
9e27e96a 2707 const STRLEN length = cur-start;
f337b084
TH
2708 for (group = symptr;
2709 group && length < group->strbeg;
2710 group = group->previous) group->strbeg = length;
2711 lookahead.strbeg = length;
2712 }
a6ec74c1 2713 break;
fc241834
RGS
2714 case 'x' | TYPE_IS_SHRIEKING: {
2715 I32 ai32;
62f95557
IZ
2716 if (!len) /* Avoid division by 0 */
2717 len = 1;
230e1fce 2718 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2719 else ai32 = (cur - start) % len;
2720 if (ai32 == 0) goto no_change;
2721 len -= ai32;
2722 }
2723 /* FALL THROUGH */
a6ec74c1 2724 case 'x':
f337b084 2725 goto grow;
a6ec74c1
JH
2726 case 'A':
2727 case 'Z':
f337b084 2728 case 'a': {
f7fe979e 2729 const char *aptr;
f337b084 2730
a6ec74c1 2731 fromstr = NEXTFROM;
e62f0680 2732 aptr = SvPV_const(fromstr, fromlen);
f337b084 2733 if (DO_UTF8(fromstr)) {
f7fe979e 2734 const char *end, *s;
f337b084
TH
2735
2736 if (!utf8 && !SvUTF8(cat)) {
2737 marked_upgrade(aTHX_ cat, symptr);
2738 lookahead.flags |= FLAG_DO_UTF8;
2739 lookahead.strbeg = symptr->strbeg;
2740 utf8 = 1;
2741 start = SvPVX(cat);
2742 cur = start + SvCUR(cat);
2743 }
fc241834 2744 if (howlen == e_star) {
f337b084
TH
2745 if (utf8) goto string_copy;
2746 len = fromlen+1;
2747 }
2748 s = aptr;
2749 end = aptr + fromlen;
2750 fromlen = datumtype == 'Z' ? len-1 : len;
2751 while ((I32) fromlen > 0 && s < end) {
2752 s += UTF8SKIP(s);
2753 fromlen--;
2754 }
2755 if (s > end)
2756 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2757 if (utf8) {
fc241834 2758 len = fromlen;
f337b084
TH
2759 if (datumtype == 'Z') len++;
2760 fromlen = s-aptr;
2761 len += fromlen;
fc241834 2762
f337b084 2763 goto string_copy;
fc241834 2764 }
f337b084
TH
2765 fromlen = len - fromlen;
2766 if (datumtype == 'Z') fromlen--;
2767 if (howlen == e_star) {
2768 len = fromlen;
2769 if (datumtype == 'Z') len++;
fc241834 2770 }
f337b084 2771 GROWING(0, cat, start, cur, len);
fc241834 2772 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2773 datumtype | TYPE_IS_PACK))
2774 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2775 cur += fromlen;
a6ec74c1 2776 len -= fromlen;
f337b084
TH
2777 } else if (utf8) {
2778 if (howlen == e_star) {
2779 len = fromlen;
2780 if (datumtype == 'Z') len++;
a6ec74c1 2781 }
f337b084
TH
2782 if (len <= (I32) fromlen) {
2783 fromlen = len;
2784 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2785 }
fc241834 2786 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2787 upgrade, so:
2788 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2789 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2790 len -= fromlen;
2791 while (fromlen > 0) {
230e1fce 2792 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2793 aptr++;
2794 fromlen--;
fc241834 2795 }
f337b084
TH
2796 } else {
2797 string_copy:
2798 if (howlen == e_star) {
2799 len = fromlen;
2800 if (datumtype == 'Z') len++;
2801 }
2802 if (len <= (I32) fromlen) {
2803 fromlen = len;
2804 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2805 }
f337b084
TH
2806 GROWING(0, cat, start, cur, len);
2807 Copy(aptr, cur, fromlen, char);
2808 cur += fromlen;
2809 len -= fromlen;
a6ec74c1 2810 }
f337b084
TH
2811 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2812 cur += len;
a6ec74c1 2813 break;
f337b084 2814 }
a6ec74c1 2815 case 'B':
f337b084 2816 case 'b': {
b83604b4 2817 const char *str, *end;
f337b084
TH
2818 I32 l, field_len;
2819 U8 bits;
2820 bool utf8_source;
2821 U32 utf8_flags;
a6ec74c1 2822
fc241834 2823 fromstr = NEXTFROM;
b83604b4 2824 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2825 end = str + fromlen;
2826 if (DO_UTF8(fromstr)) {
2827 utf8_source = TRUE;
041457d9 2828 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2829 } else {
2830 utf8_source = FALSE;
2831 utf8_flags = 0; /* Unused, but keep compilers happy */
2832 }
2833 if (howlen == e_star) len = fromlen;
2834 field_len = (len+7)/8;
2835 GROWING(utf8, cat, start, cur, field_len);
2836 if (len > (I32)fromlen) len = fromlen;
2837 bits = 0;
2838 l = 0;
2839 if (datumtype == 'B')
2840 while (l++ < len) {
2841 if (utf8_source) {
95b63a38 2842 UV val = 0;
f337b084
TH
2843 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2844 bits |= val & 1;
2845 } else bits |= *str++ & 1;
2846 if (l & 7) bits <<= 1;
fc241834 2847 else {
f337b084
TH
2848 PUSH_BYTE(utf8, cur, bits);
2849 bits = 0;
a6ec74c1
JH
2850 }
2851 }
f337b084
TH
2852 else
2853 /* datumtype == 'b' */
2854 while (l++ < len) {
2855 if (utf8_source) {
95b63a38 2856 UV val = 0;
f337b084
TH
2857 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2858 if (val & 1) bits |= 0x80;
2859 } else if (*str++ & 1)
2860 bits |= 0x80;
2861 if (l & 7) bits >>= 1;
fc241834 2862 else {
f337b084
TH
2863 PUSH_BYTE(utf8, cur, bits);
2864 bits = 0;
a6ec74c1
JH
2865 }
2866 }
f337b084
TH
2867 l--;
2868 if (l & 7) {
fc241834 2869 if (datumtype == 'B')
f337b084 2870 bits <<= 7 - (l & 7);
fc241834 2871 else
f337b084
TH
2872 bits >>= 7 - (l & 7);
2873 PUSH_BYTE(utf8, cur, bits);
2874 l += 7;
a6ec74c1 2875 }
f337b084
TH
2876 /* Determine how many chars are left in the requested field */
2877 l /= 8;
2878 if (howlen == e_star) field_len = 0;
2879 else field_len -= l;
2880 Zero(cur, field_len, char);
2881 cur += field_len;
a6ec74c1 2882 break;
f337b084 2883 }
a6ec74c1 2884 case 'H':
f337b084 2885 case 'h': {
10516c54 2886 const char *str, *end;
f337b084
TH
2887 I32 l, field_len;
2888 U8 bits;
2889 bool utf8_source;
2890 U32 utf8_flags;
a6ec74c1 2891
fc241834 2892 fromstr = NEXTFROM;
10516c54 2893 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2894 end = str + fromlen;
2895 if (DO_UTF8(fromstr)) {
2896 utf8_source = TRUE;
041457d9 2897 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2898 } else {
2899 utf8_source = FALSE;
2900 utf8_flags = 0; /* Unused, but keep compilers happy */
2901 }
2902 if (howlen == e_star) len = fromlen;
2903 field_len = (len+1)/2;
2904 GROWING(utf8, cat, start, cur, field_len);
2905 if (!utf8 && len > (I32)fromlen) len = fromlen;
2906 bits = 0;
2907 l = 0;
2908 if (datumtype == 'H')
2909 while (l++ < len) {
2910 if (utf8_source) {
95b63a38 2911 UV val = 0;
f337b084
TH
2912 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2913 if (val < 256 && isALPHA(val))
2914 bits |= (val + 9) & 0xf;
a6ec74c1 2915 else
f337b084
TH
2916 bits |= val & 0xf;
2917 } else if (isALPHA(*str))
2918 bits |= (*str++ + 9) & 0xf;
2919 else
2920 bits |= *str++ & 0xf;
2921 if (l & 1) bits <<= 4;
fc241834 2922 else {
f337b084
TH
2923 PUSH_BYTE(utf8, cur, bits);
2924 bits = 0;
a6ec74c1
JH
2925 }
2926 }
f337b084
TH
2927 else
2928 while (l++ < len) {
2929 if (utf8_source) {
95b63a38 2930 UV val = 0;
f337b084
TH
2931 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2932 if (val < 256 && isALPHA(val))
2933 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2934 else
f337b084
TH
2935 bits |= (val & 0xf) << 4;
2936 } else if (isALPHA(*str))
2937 bits |= ((*str++ + 9) & 0xf) << 4;
2938 else
2939 bits |= (*str++ & 0xf) << 4;
2940 if (l & 1) bits >>= 4;
fc241834 2941 else {
f337b084
TH
2942 PUSH_BYTE(utf8, cur, bits);
2943 bits = 0;
a6ec74c1 2944 }
fc241834 2945 }
f337b084
TH
2946 l--;
2947 if (l & 1) {
2948 PUSH_BYTE(utf8, cur, bits);
2949 l++;
2950 }
2951 /* Determine how many chars are left in the requested field */
2952 l /= 2;
2953 if (howlen == e_star) field_len = 0;
2954 else field_len -= l;
2955 Zero(cur, field_len, char);
2956 cur += field_len;
2957 break;
fc241834
RGS
2958 }
2959 case 'c':
f337b084
TH
2960 while (len-- > 0) {
2961 IV aiv;
2962 fromstr = NEXTFROM;
2963 aiv = SvIV(fromstr);
2964 if ((-128 > aiv || aiv > 127) &&
2965 ckWARN(WARN_PACK))
2966 Perl_warner(aTHX_ packWARN(WARN_PACK),
2967 "Character in 'c' format wrapped in pack");
585ec06d 2968 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2969 }
2970 break;
2971 case 'C':
f337b084
TH
2972 if (len == 0) {
2973 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2974 break;
2975 }
a6ec74c1 2976 while (len-- > 0) {
f337b084 2977 IV aiv;
a6ec74c1 2978 fromstr = NEXTFROM;
f337b084
TH
2979 aiv = SvIV(fromstr);
2980 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2981 ckWARN(WARN_PACK))
2982 Perl_warner(aTHX_ packWARN(WARN_PACK),
2983 "Character in 'C' format wrapped in pack");
1651fc44 2984 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2985 }
fc241834
RGS
2986 break;
2987 case 'W': {
2988 char *end;
670f1322 2989 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2990
2991 end = start+SvLEN(cat)-1;
2992 if (utf8) end -= UTF8_MAXLEN-1;
2993 while (len-- > 0) {
2994 UV auv;
2995 fromstr = NEXTFROM;
2996 auv = SvUV(fromstr);
2997 if (in_bytes) auv = auv % 0x100;
2998 if (utf8) {
2999 W_utf8:
3000 if (cur > end) {
3001 *cur = '\0';
b162af07 3002 SvCUR_set(cat, cur - start);
fc241834
RGS
3003
3004 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3005 end = start+SvLEN(cat)-UTF8_MAXLEN;
3006 }
230e1fce
NC
3007 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3008 NATIVE_TO_UNI(auv),
041457d9 3009 warn_utf8 ?
230e1fce 3010 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3011 } else {
3012 if (auv >= 0x100) {
3013 if (!SvUTF8(cat)) {
3014 *cur = '\0';
b162af07 3015 SvCUR_set(cat, cur - start);
fc241834
RGS
3016 marked_upgrade(aTHX_ cat, symptr);
3017 lookahead.flags |= FLAG_DO_UTF8;
3018 lookahead.strbeg = symptr->strbeg;
3019 utf8 = 1;
3020 start = SvPVX(cat);
3021 cur = start + SvCUR(cat);
3022 end = start+SvLEN(cat)-UTF8_MAXLEN;
3023 goto W_utf8;
3024 }
3025 if (ckWARN(WARN_PACK))
3026 Perl_warner(aTHX_ packWARN(WARN_PACK),
3027 "Character in 'W' format wrapped in pack");
3028 auv &= 0xff;
3029 }
3030 if (cur >= end) {
3031 *cur = '\0';
b162af07 3032 SvCUR_set(cat, cur - start);
fc241834
RGS
3033 GROWING(0, cat, start, cur, len+1);
3034 end = start+SvLEN(cat)-1;
3035 }
fe2774ed 3036 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3037 }
3038 }
3039 break;
fc241834
RGS
3040 }
3041 case 'U': {
3042 char *end;
3043
3044 if (len == 0) {
3045 if (!(symptr->flags & FLAG_DO_UTF8)) {
3046 marked_upgrade(aTHX_ cat, symptr);
3047 lookahead.flags |= FLAG_DO_UTF8;
3048 lookahead.strbeg = symptr->strbeg;
3049 }
3050 utf8 = 0;
3051 goto no_change;
3052 }
3053
3054 end = start+SvLEN(cat);
3055 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3056 while (len-- > 0) {
fc241834 3057 UV auv;
a6ec74c1 3058 fromstr = NEXTFROM;
fc241834
RGS
3059 auv = SvUV(fromstr);
3060 if (utf8) {
230e1fce 3061 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3062 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3063 warn_utf8 ?
fc241834
RGS
3064 0 : UNICODE_ALLOW_ANY);
3065 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3066 *cur = '\0';
b162af07 3067 SvCUR_set(cat, cur - start);
fc241834
RGS
3068 GROWING(0, cat, start, cur,
3069 len+(endb-buffer)*UTF8_EXPAND);
3070 end = start+SvLEN(cat);
3071 }
64844641 3072 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3073 } else {
3074 if (cur >= end) {
3075 *cur = '\0';
b162af07 3076 SvCUR_set(cat, cur - start);
fc241834
RGS
3077 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3078 end = start+SvLEN(cat)-UTF8_MAXLEN;
3079 }
230e1fce 3080 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3081 warn_utf8 ?
230e1fce 3082 0 : UNICODE_ALLOW_ANY);
fc241834 3083 }
a6ec74c1 3084 }
a6ec74c1 3085 break;
fc241834 3086 }
a6ec74c1
JH
3087 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3088 case 'f':
a6ec74c1 3089 while (len-- > 0) {
f337b084
TH
3090 float afloat;
3091 NV anv;
a6ec74c1 3092 fromstr = NEXTFROM;
f337b084 3093 anv = SvNV(fromstr);
5cdb9e01 3094#ifdef __VOS__
f337b084 3095 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3096 during conversion from double to float into infinity, so we
3097 do it by hand. This code should either be generalized for
3098 any OS that needs it, or removed if and when VOS implements
3099 posix-976 (suggestion to support mapping to infinity).
3100 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3101{
3102extern const float _float_constants[];
f337b084 3103 if (anv > FLT_MAX)
fc241834 3104 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3105 else if (anv < -FLT_MAX)
fc241834 3106 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3107 else afloat = (float) anv;
3722f0dc 3108}
f337b084 3109#else /* __VOS__ */
baf3cf9c 3110# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3111 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3112 * on Alpha; fake it if we don't have them.
3113 */
f337b084 3114 if (anv > FLT_MAX)
fc241834 3115 afloat = FLT_MAX;
f337b084 3116 else if (anv < -FLT_MAX)
fc241834 3117 afloat = -FLT_MAX;
f337b084 3118 else afloat = (float)anv;
baf3cf9c 3119# else
f337b084 3120 afloat = (float)anv;
baf3cf9c 3121# endif
f337b084 3122#endif /* __VOS__ */
1109a392 3123 DO_BO_PACK_N(afloat, float);
f337b084 3124 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3125 }
3126 break;
3127 case 'd':
a6ec74c1 3128 while (len-- > 0) {
f337b084
TH
3129 double adouble;
3130 NV anv;
a6ec74c1 3131 fromstr = NEXTFROM;
f337b084 3132 anv = SvNV(fromstr);
5cdb9e01 3133#ifdef __VOS__
f337b084 3134 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3135 during conversion from long double to double into infinity,
3136 so we do it by hand. This code should either be generalized
3137 for any OS that needs it, or removed if and when VOS
3138 implements posix-976 (suggestion to support mapping to
3139 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3140{
3141extern const double _double_constants[];
f337b084 3142 if (anv > DBL_MAX)
fc241834 3143 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3144 else if (anv < -DBL_MAX)
fc241834 3145 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3146 else adouble = (double) anv;
3722f0dc 3147}
f337b084 3148#else /* __VOS__ */
baf3cf9c 3149# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3150 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3151 * on Alpha; fake it if we don't have them.
3152 */
f337b084 3153 if (anv > DBL_MAX)
fc241834 3154 adouble = DBL_MAX;
f337b084 3155 else if (anv < -DBL_MAX)
fc241834 3156 adouble = -DBL_MAX;
f337b084 3157 else adouble = (double)anv;
baf3cf9c 3158# else
f337b084 3159 adouble = (double)anv;
baf3cf9c 3160# endif
f337b084 3161#endif /* __VOS__ */
1109a392 3162 DO_BO_PACK_N(adouble, double);
f337b084 3163 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3164 }
3165 break;
fc241834
RGS
3166 case 'F': {
3167 NV anv;
1109a392 3168 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3169 while (len-- > 0) {
3170 fromstr = NEXTFROM;
3171 anv = SvNV(fromstr);
1109a392 3172 DO_BO_PACK_N(anv, NV);
fc241834 3173 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3174 }
3175 break;
fc241834 3176 }
92d41999 3177#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3178 case 'D': {
3179 long double aldouble;
1109a392
MHM
3180 /* long doubles can have unused bits, which may be nonzero */
3181 Zero(&aldouble, 1, long double);
92d41999
JH
3182 while (len-- > 0) {
3183 fromstr = NEXTFROM;
3184 aldouble = (long double)SvNV(fromstr);
1109a392 3185 DO_BO_PACK_N(aldouble, long double);
fc241834 3186 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3187 }
3188 break;
fc241834 3189 }
92d41999 3190#endif
7212898e 3191#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3192 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3193#endif
a6ec74c1
JH
3194 case 'n':
3195 while (len-- > 0) {
f337b084 3196 I16 ai16;
a6ec74c1 3197 fromstr = NEXTFROM;
ef108786 3198 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3199#ifdef HAS_HTONS
ef108786 3200 ai16 = PerlSock_htons(ai16);
a6ec74c1 3201#endif
f337b084 3202 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3203 }
3204 break;
7212898e 3205#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3206 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3207#endif
a6ec74c1
JH
3208 case 'v':
3209 while (len-- > 0) {
f337b084 3210 I16 ai16;
a6ec74c1 3211 fromstr = NEXTFROM;
ef108786 3212 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3213#ifdef HAS_HTOVS
ef108786 3214 ai16 = htovs(ai16);
a6ec74c1 3215#endif
f337b084 3216 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3217 }
3218 break;
49704364 3219 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3220#if SHORTSIZE != SIZE16
fc241834 3221 while (len-- > 0) {
f337b084 3222 unsigned short aushort;
fc241834
RGS
3223 fromstr = NEXTFROM;
3224 aushort = SvUV(fromstr);
3225 DO_BO_PACK(aushort, s);
f337b084 3226 PUSH_VAR(utf8, cur, aushort);
fc241834 3227 }
49704364
WL
3228 break;
3229#else
3230 /* Fall through! */
a6ec74c1 3231#endif
49704364 3232 case 'S':
fc241834 3233 while (len-- > 0) {
f337b084 3234 U16 au16;
fc241834
RGS
3235 fromstr = NEXTFROM;
3236 au16 = (U16)SvUV(fromstr);
3237 DO_BO_PACK(au16, 16);
f337b084 3238 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3239 }
3240 break;
49704364 3241 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3242#if SHORTSIZE != SIZE16
fc241834 3243 while (len-- > 0) {
f337b084 3244 short ashort;
fc241834
RGS
3245 fromstr = NEXTFROM;
3246 ashort = SvIV(fromstr);
3247 DO_BO_PACK(ashort, s);
f337b084 3248 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3249 }
49704364
WL
3250 break;
3251#else
3252 /* Fall through! */
a6ec74c1 3253#endif
49704364
WL
3254 case 's':
3255 while (len-- > 0) {
f337b084 3256 I16 ai16;
49704364 3257 fromstr = NEXTFROM;
ef108786
MHM
3258 ai16 = (I16)SvIV(fromstr);
3259 DO_BO_PACK(ai16, 16);
f337b084 3260 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3261 }
3262 break;
3263 case 'I':
49704364 3264 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3265 while (len-- > 0) {
f337b084 3266 unsigned int auint;
a6ec74c1
JH
3267 fromstr = NEXTFROM;
3268 auint = SvUV(fromstr);
1109a392 3269 DO_BO_PACK(auint, i);
f337b084 3270 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3271 }
3272 break;
92d41999
JH
3273 case 'j':
3274 while (len-- > 0) {
f337b084 3275 IV aiv;
92d41999
JH
3276 fromstr = NEXTFROM;
3277 aiv = SvIV(fromstr);
1109a392
MHM
3278#if IVSIZE == INTSIZE
3279 DO_BO_PACK(aiv, i);
3280#elif IVSIZE == LONGSIZE
3281 DO_BO_PACK(aiv, l);
3282#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3283 DO_BO_PACK(aiv, 64);
f337b084
TH
3284#else
3285 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3286#endif
f337b084 3287 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3288 }
3289 break;
3290 case 'J':
3291 while (len-- > 0) {
f337b084 3292 UV auv;
92d41999
JH
3293 fromstr = NEXTFROM;
3294 auv = SvUV(fromstr);
1109a392
MHM
3295#if UVSIZE == INTSIZE
3296 DO_BO_PACK(auv, i);
3297#elif UVSIZE == LONGSIZE
3298 DO_BO_PACK(auv, l);
3299#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3300 DO_BO_PACK(auv, 64);
f337b084
TH
3301#else
3302 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3303#endif
f337b084 3304 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3305 }
3306 break;
a6ec74c1
JH
3307 case 'w':
3308 while (len-- > 0) {
f337b084 3309 NV anv;
a6ec74c1 3310 fromstr = NEXTFROM;
15e9f109 3311 anv = SvNV(fromstr);
a6ec74c1 3312
f337b084
TH
3313 if (anv < 0) {
3314 *cur = '\0';
b162af07 3315 SvCUR_set(cat, cur - start);
49704364 3316 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3317 }
a6ec74c1 3318
196b62db
NC
3319 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3320 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3321 any negative IVs will have already been got by the croak()
3322 above. IOK is untrue for fractions, so we test them
3323 against UV_MAX_P1. */
f337b084
TH
3324 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3325 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3326 char *in = buf + sizeof(buf);
196b62db 3327 UV auv = SvUV(fromstr);
a6ec74c1
JH
3328
3329 do {
eb160463 3330 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3331 auv >>= 7;
3332 } while (auv);
3333 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3334 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3335 in, (buf + sizeof(buf)) - in);
3336 } else if (SvPOKp(fromstr))
3337 goto w_string;
a6ec74c1 3338 else if (SvNOKp(fromstr)) {
0258719b
NC
3339 /* 10**NV_MAX_10_EXP is the largest power of 10
3340 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3341 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3342 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3343 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3344 Some C compilers are strict about integral constant
3345 expressions so we conservatively divide by a slightly
3346 smaller integer instead of multiplying by the exact
3347 floating-point value.
0258719b
NC
3348 */
3349#ifdef NV_MAX_10_EXP
f337b084 3350 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3351 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3352#else
f337b084 3353 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3354 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3355#endif
a6ec74c1
JH
3356 char *in = buf + sizeof(buf);
3357
8b6e33c7 3358 anv = Perl_floor(anv);
a6ec74c1 3359 do {
8b6e33c7 3360 const NV next = Perl_floor(anv / 128);
a6ec74c1 3361 if (in <= buf) /* this cannot happen ;-) */
49704364 3362 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3363 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3364 anv = next;
3365 } while (anv > 0);
a6ec74c1 3366 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3367 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3368 in, (buf + sizeof(buf)) - in);
3369 } else {
8b6e33c7
AL
3370 const char *from;
3371 char *result, *in;
735b914b
JH
3372 SV *norm;
3373 STRLEN len;
3374 bool done;
3375
f337b084 3376 w_string:
735b914b 3377 /* Copy string and check for compliance */
349d4f2f 3378 from = SvPV_const(fromstr, len);
735b914b 3379 if ((norm = is_an_int(from, len)) == NULL)
49704364 3380 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3381
a02a5408 3382 Newx(result, len, char);
735b914b
JH
3383 in = result + len;
3384 done = FALSE;
f337b084 3385 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3386 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (result + len) - in);
735b914b
JH
3389 Safefree(result);
3390 SvREFCNT_dec(norm); /* free norm */
fc241834 3391 }
a6ec74c1
JH
3392 }
3393 break;
3394 case 'i':
49704364 3395 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3396 while (len-- > 0) {
f337b084 3397 int aint;
a6ec74c1
JH
3398 fromstr = NEXTFROM;
3399 aint = SvIV(fromstr);
1109a392 3400 DO_BO_PACK(aint, i);
f337b084 3401 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3402 }
3403 break;
7212898e 3404#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3405 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3406#endif
a6ec74c1
JH
3407 case 'N':
3408 while (len-- > 0) {
f337b084 3409 U32 au32;
a6ec74c1 3410 fromstr = NEXTFROM;
ef108786 3411 au32 = SvUV(fromstr);
a6ec74c1 3412#ifdef HAS_HTONL
ef108786 3413 au32 = PerlSock_htonl(au32);
a6ec74c1 3414#endif
f337b084 3415 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3416 }
3417 break;
7212898e 3418#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3419 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3420#endif
a6ec74c1
JH
3421 case 'V':
3422 while (len-- > 0) {
f337b084 3423 U32 au32;
a6ec74c1 3424 fromstr = NEXTFROM;
ef108786 3425 au32 = SvUV(fromstr);
a6ec74c1 3426#ifdef HAS_HTOVL
ef108786 3427 au32 = htovl(au32);
a6ec74c1 3428#endif
f337b084 3429 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3430 }
3431 break;
49704364 3432 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3433#if LONGSIZE != SIZE32
fc241834 3434 while (len-- > 0) {
f337b084 3435 unsigned long aulong;
fc241834
RGS
3436 fromstr = NEXTFROM;
3437 aulong = SvUV(fromstr);
3438 DO_BO_PACK(aulong, l);
f337b084 3439 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3440 }
49704364
WL
3441 break;
3442#else
3443 /* Fall though! */
a6ec74c1 3444#endif
49704364 3445 case 'L':
fc241834 3446 while (len-- > 0) {
f337b084 3447 U32 au32;
fc241834
RGS
3448 fromstr = NEXTFROM;
3449 au32 = SvUV(fromstr);
3450 DO_BO_PACK(au32, 32);
f337b084 3451 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3452 }
3453 break;
49704364 3454 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3455#if LONGSIZE != SIZE32
fc241834 3456 while (len-- > 0) {
f337b084 3457 long along;
fc241834
RGS
3458 fromstr = NEXTFROM;
3459 along = SvIV(fromstr);
3460 DO_BO_PACK(along, l);
f337b084 3461 PUSH_VAR(utf8, cur, along);
a6ec74c1 3462 }
49704364
WL
3463 break;
3464#else
3465 /* Fall though! */
a6ec74c1 3466#endif
49704364
WL
3467 case 'l':
3468 while (len-- > 0) {
f337b084 3469 I32 ai32;
49704364 3470 fromstr = NEXTFROM;
ef108786
MHM
3471 ai32 = SvIV(fromstr);
3472 DO_BO_PACK(ai32, 32);
f337b084 3473 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3474 }
3475 break;
3476#ifdef HAS_QUAD
3477 case 'Q':
3478 while (len-- > 0) {
f337b084 3479 Uquad_t auquad;
a6ec74c1 3480 fromstr = NEXTFROM;
f337b084 3481 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3482 DO_BO_PACK(auquad, 64);
f337b084 3483 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3484 }
3485 break;
3486 case 'q':
3487 while (len-- > 0) {
f337b084 3488 Quad_t aquad;
a6ec74c1
JH
3489 fromstr = NEXTFROM;
3490 aquad = (Quad_t)SvIV(fromstr);
1109a392 3491 DO_BO_PACK(aquad, 64);
f337b084 3492 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3493 }
3494 break;
f337b084 3495#endif /* HAS_QUAD */
a6ec74c1
JH
3496 case 'P':
3497 len = 1; /* assume SV is correct length */
f337b084 3498 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3499 /* Fall through! */
a6ec74c1
JH
3500 case 'p':
3501 while (len-- > 0) {
83003860 3502 const char *aptr;
f337b084 3503
a6ec74c1 3504 fromstr = NEXTFROM;
28a4f200
TH
3505 SvGETMAGIC(fromstr);
3506 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3507 else {
a6ec74c1
JH
3508 /* XXX better yet, could spirit away the string to
3509 * a safe spot and hang on to it until the result
3510 * of pack() (and all copies of the result) are
3511 * gone.
3512 */
041457d9
DM
3513 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3514 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3515 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3516 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3517 }
3518 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3519 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3520 else
2596d9fe 3521 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3522 }
07409e01 3523 DO_BO_PACK_PC(aptr);
f337b084 3524 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3525 }
3526 break;
fc241834 3527 case 'u': {
f7fe979e 3528 const char *aptr, *aend;
fc241834 3529 bool from_utf8;
f337b084 3530
a6ec74c1 3531 fromstr = NEXTFROM;
fc241834
RGS
3532 if (len <= 2) len = 45;
3533 else len = len / 3 * 3;
3534 if (len >= 64) {
0fdc08b6
RGS
3535 if (ckWARN(WARN_PACK))
3536 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3537 "Field too wide in 'u' format in pack");
3538 len = 63;
3539 }
83003860 3540 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3541 from_utf8 = DO_UTF8(fromstr);
3542 if (from_utf8) {
3543 aend = aptr + fromlen;
3544 fromlen = sv_len_utf8(fromstr);
3545 } else aend = NULL; /* Unused, but keep compilers happy */
3546 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3547 while (fromlen > 0) {
fc241834 3548 U8 *end;
a6ec74c1 3549 I32 todo;
fc241834 3550 U8 hunk[1+63/3*4+1];
a6ec74c1 3551
eb160463 3552 if ((I32)fromlen > len)
a6ec74c1
JH
3553 todo = len;
3554 else
3555 todo = fromlen;
fc241834
RGS
3556 if (from_utf8) {
3557 char buffer[64];
3558 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3559 'u' | TYPE_IS_PACK)) {
3560 *cur = '\0';
b162af07 3561 SvCUR_set(cat, cur - start);
fc241834
RGS
3562 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3563 }
3564 end = doencodes(hunk, buffer, todo);
3565 } else {
3566 end = doencodes(hunk, aptr, todo);
3567 aptr += todo;
3568 }
3569 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3570 fromlen -= todo;
3571 }
a6ec74c1
JH
3572 break;
3573 }
f337b084
TH
3574 }
3575 *cur = '\0';
b162af07 3576 SvCUR_set(cat, cur - start);
f337b084 3577 no_change:
49704364 3578 *symptr = lookahead;
a6ec74c1 3579 }
49704364 3580 return beglist;
18529408
IZ
3581}
3582#undef NEXTFROM
3583
3584
3585PP(pp_pack)
3586{
97aff369 3587 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3588 register SV *cat = TARG;
3589 STRLEN fromlen;
349d4f2f
NC
3590 SV *pat_sv = *++MARK;
3591 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3592 register const char *patend = pat + fromlen;
18529408
IZ
3593
3594 MARK++;
3595 sv_setpvn(cat, "", 0);
f337b084 3596 SvUTF8_off(cat);
18529408 3597
7accc089 3598 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3599
a6ec74c1
JH
3600 SvSETMAGIC(cat);
3601 SP = ORIGMARK;
3602 PUSHs(cat);
3603 RETURN;
3604}
a6ec74c1 3605
73cb7263
NC
3606/*
3607 * Local variables:
3608 * c-indentation-style: bsd
3609 * c-basic-offset: 4
3610 * indent-tabs-mode: t
3611 * End:
3612 *
37442d52
RGS
3613 * ex: set ts=8 sts=4 sw=4 noet:
3614 */