This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: blead trie problems in tru64 with -DDEBUGGING
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
64844641 705STATIC char *
14333449 706S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 707 const U8 * const end = start + len;
64844641 708
f337b084 709 while (start < end) {
48fa4626
TS
710 const UV uv = NATIVE_TO_ASCII(*start);
711 if (UNI_IS_INVARIANT(uv))
712 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
713 else {
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
715 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
716 }
717 start++;
718 }
64844641 719 return dest;
f337b084
TH
720}
721
230e1fce
NC
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
64844641
AL
724 if (utf8) \
725 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
726 else { \
727 Copy(buf, cur, len, char); \
728 (cur) += (len); \
729 } \
f337b084
TH
730} STMT_END
731
732#define GROWING(utf8, cat, start, cur, in_len) \
733STMT_START { \
734 STRLEN glen = (in_len); \
3473cf63 735 if (utf8) glen *= UTF8_EXPAND; \
f337b084 736 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 737 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
738 (cur) = (start) + SvCUR(cat); \
739 } \
740} STMT_END
741
742#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743STMT_START { \
f7fe979e 744 const STRLEN glen = (in_len); \
f337b084 745 STRLEN gl = glen; \
3473cf63 746 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
747 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 *cur = '\0'; \
b162af07 749 SvCUR_set((cat), (cur) - (start)); \
0bd48802 750 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
751 (cur) = (start) + SvCUR(cat); \
752 } \
753 PUSH_BYTES(utf8, cur, buf, glen); \
754} STMT_END
755
756#define PUSH_BYTE(utf8, s, byte) \
757STMT_START { \
758 if (utf8) { \
f7fe979e 759 const U8 au8 = (byte); \
64844641 760 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
761 } else *(U8 *)(s)++ = (byte); \
762} STMT_END
763
764/* Only to be used inside a loop (see the break) */
765#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
766STMT_START { \
767 STRLEN retlen; \
768 if (str >= end) break; \
769 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
770 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 *cur = '\0'; \
772 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
773 } \
774 str += retlen; \
775} STMT_END
776
f7fe979e
AL
777static const char *_action( const tempsym_t* symptr )
778{
779 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
780}
781
206947d2 782/* Returns the sizeof() struct described by pat */
028d1f6d 783STATIC I32
f337b084 784S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 785{
f337b084 786 I32 total = 0;
206947d2 787
49704364 788 while (next_symbol(symptr)) {
f337b084 789 I32 len;
f7fe979e 790 int size;
f337b084
TH
791
792 switch (symptr->howlen) {
fc241834 793 case e_star:
49704364 794 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 795 _action( symptr ) );
49704364 796 break;
f337b084
TH
797 default:
798 /* e_no_len and e_number */
799 len = symptr->length;
800 break;
49704364
WL
801 }
802
a7a3cfaa 803 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 804 if (!size) {
f7fe979e 805 int star;
80a13697
NC
806 /* endianness doesn't influence the size of a type */
807 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 default:
809 Perl_croak(aTHX_ "Invalid type '%c' in %s",
810 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 811 _action( symptr ) );
28be1210
TH
812#ifdef PERL_PACK_CAN_SHRIEKSIGN
813 case '.' | TYPE_IS_SHRIEKING:
814 case '@' | TYPE_IS_SHRIEKING:
815#endif
80a13697 816 case '@':
28be1210 817 case '.':
80a13697
NC
818 case '/':
819 case 'U': /* XXXX Is it correct? */
820 case 'w':
821 case 'u':
822 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 823 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 824 _action( symptr ) );
80a13697
NC
825 case '%':
826 size = 0;
827 break;
828 case '(':
fc241834
RGS
829 {
830 tempsym_t savsym = *symptr;
831 symptr->patptr = savsym.grpbeg;
832 symptr->patend = savsym.grpend;
833 /* XXXX Theoretically, we need to measure many times at
834 different positions, since the subexpression may contain
835 alignment commands, but be not of aligned length.
836 Need to detect this and croak(). */
837 size = measure_struct(symptr);
838 *symptr = savsym;
839 break;
840 }
80a13697
NC
841 case 'X' | TYPE_IS_SHRIEKING:
842 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 */
844 if (!len) /* Avoid division by 0 */
845 len = 1;
846 len = total % len; /* Assumed: the start is aligned. */
847 /* FALL THROUGH */
848 case 'X':
849 size = -1;
850 if (total < len)
f7fe979e 851 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
852 break;
853 case 'x' | TYPE_IS_SHRIEKING:
854 if (!len) /* Avoid division by 0 */
855 len = 1;
856 star = total % len; /* Assumed: the start is aligned. */
857 if (star) /* Other portable ways? */
858 len = len - star;
859 else
860 len = 0;
861 /* FALL THROUGH */
862 case 'x':
863 case 'A':
864 case 'Z':
865 case 'a':
80a13697
NC
866 size = 1;
867 break;
868 case 'B':
869 case 'b':
870 len = (len + 7)/8;
871 size = 1;
872 break;
873 case 'H':
874 case 'h':
875 len = (len + 1)/2;
876 size = 1;
877 break;
78d46eaa 878
80a13697
NC
879 case 'P':
880 len = 1;
881 size = sizeof(char*);
78d46eaa
NC
882 break;
883 }
206947d2
IZ
884 }
885 total += len * size;
886 }
887 return total;
888}
889
49704364
WL
890
891/* locate matching closing parenthesis or bracket
892 * returns char pointer to char after match, or NULL
893 */
f7fe979e
AL
894STATIC const char *
895S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 896{
49704364 897 while (patptr < patend) {
f7fe979e 898 const char c = *patptr++;
49704364
WL
899
900 if (isSPACE(c))
901 continue;
902 else if (c == ender)
903 return patptr-1;
904 else if (c == '#') {
905 while (patptr < patend && *patptr != '\n')
906 patptr++;
907 continue;
908 } else if (c == '(')
909 patptr = group_end(patptr, patend, ')') + 1;
910 else if (c == '[')
911 patptr = group_end(patptr, patend, ']') + 1;
18529408 912 }
49704364
WL
913 Perl_croak(aTHX_ "No group ending character '%c' found in template",
914 ender);
915 return 0;
18529408
IZ
916}
917
49704364
WL
918
919/* Convert unsigned decimal number to binary.
920 * Expects a pointer to the first digit and address of length variable
921 * Advances char pointer to 1st non-digit char and returns number
fc241834 922 */
f7fe979e
AL
923STATIC const char *
924S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
925{
926 I32 len = *patptr++ - '0';
927 while (isDIGIT(*patptr)) {
928 if (len >= 0x7FFFFFFF/10)
929 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
930 len = (len * 10) + (*patptr++ - '0');
931 }
932 *lenptr = len;
933 return patptr;
934}
935
936/* The marvellous template parsing routine: Using state stored in *symptr,
937 * locates next template code and count
938 */
939STATIC bool
f337b084 940S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 941{
f7fe979e 942 const char* patptr = symptr->patptr;
0bcc34c2 943 const char* const patend = symptr->patend;
49704364
WL
944
945 symptr->flags &= ~FLAG_SLASH;
946
947 while (patptr < patend) {
948 if (isSPACE(*patptr))
949 patptr++;
950 else if (*patptr == '#') {
951 patptr++;
952 while (patptr < patend && *patptr != '\n')
953 patptr++;
954 if (patptr < patend)
955 patptr++;
956 } else {
fc241834 957 /* We should have found a template code */
49704364 958 I32 code = *patptr++ & 0xFF;
66c611c5 959 U32 inherited_modifiers = 0;
49704364
WL
960
961 if (code == ','){ /* grandfather in commas but with a warning */
962 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
963 symptr->flags |= FLAG_COMMA;
964 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 965 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
966 }
967 continue;
968 }
fc241834 969
49704364 970 /* for '(', skip to ')' */
fc241834 971 if (code == '(') {
49704364
WL
972 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
973 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 974 _action( symptr ) );
49704364
WL
975 symptr->grpbeg = patptr;
976 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
977 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
978 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 979 _action( symptr ) );
49704364
WL
980 }
981
66c611c5
MHM
982 /* look for group modifiers to inherit */
983 if (TYPE_ENDIANNESS(symptr->flags)) {
984 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
985 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
986 }
987
1109a392
MHM
988 /* look for modifiers */
989 while (patptr < patend) {
b7787f18
AL
990 const char *allowed;
991 I32 modifier;
1109a392
MHM
992 switch (*patptr) {
993 case '!':
994 modifier = TYPE_IS_SHRIEKING;
7212898e 995 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 996 break;
7212898e 997#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
998 case '>':
999 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1000 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1001 break;
1002 case '<':
1003 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1004 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1005 break;
f337b084 1006#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1007 default:
b7787f18
AL
1008 allowed = "";
1009 modifier = 0;
1109a392
MHM
1010 break;
1011 }
66c611c5 1012
1109a392
MHM
1013 if (modifier == 0)
1014 break;
66c611c5 1015
1109a392
MHM
1016 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1017 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1018 allowed, _action( symptr ) );
66c611c5
MHM
1019
1020 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1021 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1022 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1023 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1024 TYPE_ENDIANNESS_MASK)
1025 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1026 *patptr, _action( symptr ) );
66c611c5 1027
041457d9 1028 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1032 _action( symptr ) );
1109a392 1033 }
66c611c5 1034
1109a392
MHM
1035 code |= modifier;
1036 patptr++;
49704364
WL
1037 }
1038
66c611c5
MHM
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1041
fc241834 1042 /* look for count and/or / */
49704364
WL
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1047
1048 } else if (*patptr == '*') {
1049 patptr++;
1050 symptr->howlen = e_star;
1051
1052 } else if (*patptr == '[') {
f7fe979e 1053 const char* lenptr = ++patptr;
49704364
WL
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1061 _action( symptr ) );
49704364
WL
1062 } else {
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1067 *symptr = savsym;
1068 }
1069 } else {
1070 symptr->howlen = e_no_len;
1071 symptr->length = 1;
1072 }
1073
1074 /* try to find / */
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1077 patptr++;
1078 else if (*patptr == '#') {
1079 patptr++;
1080 while (patptr < patend && *patptr != '\n')
1081 patptr++;
1082 if (patptr < patend)
1083 patptr++;
1084 } else {
66c611c5 1085 if (*patptr == '/') {
49704364
WL
1086 symptr->flags |= FLAG_SLASH;
1087 patptr++;
66c611c5
MHM
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1091 _action( symptr ) );
49704364
WL
1092 }
1093 break;
1094 }
18529408 1095 }
49704364
WL
1096 } else {
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1099 symptr->length = 1;
1100 }
1101
1102 symptr->code = code;
fc241834 1103 symptr->patptr = patptr;
49704364 1104 return TRUE;
18529408 1105 }
49704364 1106 }
fc241834 1107 symptr->patptr = patptr;
49704364 1108 return FALSE;
18529408
IZ
1109}
1110
18529408 1111/*
fc241834 1112 There is no way to cleanly handle the case where we should process the
08ca2aa3 1113 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1118 themselves if they need to do a lot of unpacks like this on it
1119*/
fc241834 1120STATIC bool
08ca2aa3
TH
1121need_utf8(const char *pat, const char *patend)
1122{
1123 bool first = TRUE;
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1126 pat++;
f7fe979e 1127 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1132 pat++;
1133 }
1134 return FALSE;
1135}
1136
1137STATIC char
1138first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1141 pat++;
f7fe979e 1142 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1143 if (!pat) return 0;
1144 pat++;
1145 }
1146 return 0;
1147}
1148
1149/*
7accc089
JH
1150=for apidoc unpackstring
1151
608d3aed
WL
1152The engine implementing unpack() Perl function. C<unpackstring> puts the
1153extracted list items on the stack and returns the number of elements.
1154Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1155
1156=cut */
1157
1158I32
f7fe979e 1159Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1160{
f7fe979e 1161 tempsym_t sym;
08ca2aa3 1162
f337b084 1163 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1164 else if (need_utf8(pat, patend)) {
1165 /* We probably should try to avoid this in case a scalar context call
1166 wouldn't get to the "U0" */
1167 STRLEN len = strend - s;
230e1fce 1168 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1169 SAVEFREEPV(s);
1170 strend = s + len;
f337b084 1171 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1172 }
1173
f337b084
TH
1174 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1175 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1176
f7fe979e 1177 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1178
1179 return unpack_rec(&sym, s, s, strend, NULL );
1180}
1181
49704364
WL
1182STATIC
1183I32
f7fe979e 1184S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1185{
27da23d5 1186 dVAR; dSP;
18529408 1187 SV *sv;
f7fe979e 1188 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1189 howlen_t howlen;
a6ec74c1 1190
a6ec74c1 1191 I32 checksum = 0;
92d41999 1192 UV cuv = 0;
a6ec74c1 1193 NV cdouble = 0.0;
f337b084 1194 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1195 bool beyond = FALSE;
21c16052 1196 bool explicit_length;
9e27e96a 1197 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1198 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1199 symptr->strbeg = s - strbeg;
49704364 1200
49704364 1201 while (next_symbol(symptr)) {
a7a3cfaa 1202 packprops_t props;
9e27e96a 1203 I32 len;
f337b084 1204 I32 datumtype = symptr->code;
206947d2 1205 /* do first one only unless in list context
08ca2aa3 1206 / is implemented by unpacking the count, then popping it from the
206947d2 1207 stack, so must check that we're not in the middle of a / */
49704364 1208 if ( unpack_only_one
206947d2 1209 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1210 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1211 break;
49704364 1212
f337b084 1213 switch (howlen = symptr->howlen) {
fc241834
RGS
1214 case e_star:
1215 len = strend - strbeg; /* long enough */
49704364 1216 break;
f337b084
TH
1217 default:
1218 /* e_no_len and e_number */
1219 len = symptr->length;
1220 break;
49704364 1221 }
18529408 1222
21c16052 1223 explicit_length = TRUE;
a6ec74c1 1224 redo_switch:
49704364 1225 beyond = s >= strend;
a7a3cfaa
TH
1226
1227 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1228 if (props) {
1229 /* props nonzero means we can process this letter. */
9e27e96a
AL
1230 const long size = props & PACK_SIZE_MASK;
1231 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1232 if (len > howmany)
1233 len = howmany;
1234
1235 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1236 if (len && unpack_only_one) len = 1;
1237 EXTEND(SP, len);
1238 EXTEND_MORTAL(len);
78d46eaa
NC
1239 }
1240 }
a7a3cfaa 1241
1109a392 1242 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1243 default:
1109a392 1244 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1245
a6ec74c1 1246 case '%':
49704364 1247 if (howlen == e_no_len)
18529408 1248 len = 16; /* len is not specified */
a6ec74c1 1249 checksum = len;
92d41999 1250 cuv = 0;
a6ec74c1 1251 cdouble = 0;
18529408 1252 continue;
a6ec74c1 1253 break;
18529408
IZ
1254 case '(':
1255 {
49704364 1256 tempsym_t savsym = *symptr;
9e27e96a 1257 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1258 symptr->flags |= group_modifiers;
49704364 1259 symptr->patend = savsym.grpend;
28be1210 1260 symptr->previous = &savsym;
49704364 1261 symptr->level++;
18529408
IZ
1262 PUTBACK;
1263 while (len--) {
49704364 1264 symptr->patptr = savsym.grpbeg;
f337b084
TH
1265 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1266 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1267 unpack_rec(symptr, s, strbeg, strend, &s);
1268 if (s == strend && savsym.howlen == e_star)
49704364 1269 break; /* No way to continue */
18529408
IZ
1270 }
1271 SPAGAIN;
28be1210 1272 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1273 *symptr = savsym;
18529408
IZ
1274 break;
1275 }
28be1210
TH
1276#ifdef PERL_PACK_CAN_SHRIEKSIGN
1277 case '.' | TYPE_IS_SHRIEKING:
1278#endif
1279 case '.': {
9e27e96a 1280 const char *from;
28be1210
TH
1281 SV *sv;
1282#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1283 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1284#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1285 const bool u8 = utf8;
28be1210
TH
1286#endif
1287 if (howlen == e_star) from = strbeg;
1288 else if (len <= 0) from = s;
1289 else {
1290 tempsym_t *group = symptr;
1291
1292 while (--len && group) group = group->previous;
1293 from = group ? strbeg + group->strbeg : strbeg;
1294 }
1295 sv = from <= s ?
00646304
CB
1296 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1297 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1298 XPUSHs(sv_2mortal(sv));
1299 break;
1300 }
1301#ifdef PERL_PACK_CAN_SHRIEKSIGN
1302 case '@' | TYPE_IS_SHRIEKING:
1303#endif
a6ec74c1 1304 case '@':
28be1210
TH
1305 s = strbeg + symptr->strbeg;
1306#ifdef PERL_PACK_CAN_SHRIEKSIGN
1307 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1308#else /* PERL_PACK_CAN_SHRIEKSIGN */
1309 if (utf8)
1310#endif
1311 {
08ca2aa3
TH
1312 while (len > 0) {
1313 if (s >= strend)
1314 Perl_croak(aTHX_ "'@' outside of string in unpack");
1315 s += UTF8SKIP(s);
1316 len--;
1317 }
1318 if (s > strend)
1319 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1320 } else {
28be1210 1321 if (strend-s < len)
fc241834 1322 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1323 s += len;
08ca2aa3 1324 }
a6ec74c1 1325 break;
62f95557
IZ
1326 case 'X' | TYPE_IS_SHRIEKING:
1327 if (!len) /* Avoid division by 0 */
1328 len = 1;
08ca2aa3 1329 if (utf8) {
f7fe979e 1330 const char *hop, *last;
f337b084
TH
1331 I32 l = len;
1332 hop = last = strbeg;
1333 while (hop < s) {
1334 hop += UTF8SKIP(hop);
1335 if (--l == 0) {
08ca2aa3 1336 last = hop;
f337b084
TH
1337 l = len;
1338 }
fc241834 1339 }
f337b084
TH
1340 if (last > s)
1341 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1342 s = last;
1343 break;
f337b084
TH
1344 }
1345 len = (s - strbeg) % len;
62f95557 1346 /* FALL THROUGH */
a6ec74c1 1347 case 'X':
08ca2aa3
TH
1348 if (utf8) {
1349 while (len > 0) {
1350 if (s <= strbeg)
1351 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1352 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1353 if (s <= strbeg)
1354 Perl_croak(aTHX_ "'X' outside of string in unpack");
1355 }
1356 len--;
1357 }
1358 } else {
fc241834
RGS
1359 if (len > s - strbeg)
1360 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1361 s -= len;
08ca2aa3 1362 }
a6ec74c1 1363 break;
9e27e96a
AL
1364 case 'x' | TYPE_IS_SHRIEKING: {
1365 I32 ai32;
62f95557
IZ
1366 if (!len) /* Avoid division by 0 */
1367 len = 1;
230e1fce
NC
1368 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1369 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1370 if (ai32 == 0) break;
1371 len -= ai32;
9e27e96a 1372 }
62f95557 1373 /* FALL THROUGH */
a6ec74c1 1374 case 'x':
08ca2aa3
TH
1375 if (utf8) {
1376 while (len>0) {
1377 if (s >= strend)
1378 Perl_croak(aTHX_ "'x' outside of string in unpack");
1379 s += UTF8SKIP(s);
1380 len--;
1381 }
1382 } else {
fc241834
RGS
1383 if (len > strend - s)
1384 Perl_croak(aTHX_ "'x' outside of string in unpack");
1385 s += len;
f337b084 1386 }
a6ec74c1
JH
1387 break;
1388 case '/':
49704364
WL
1389 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1390 break;
a6ec74c1
JH
1391 case 'A':
1392 case 'Z':
1393 case 'a':
08ca2aa3
TH
1394 if (checksum) {
1395 /* Preliminary length estimate is assumed done in 'W' */
1396 if (len > strend - s) len = strend - s;
1397 goto W_checksum;
1398 }
1399 if (utf8) {
1400 I32 l;
f7fe979e 1401 const char *hop;
08ca2aa3
TH
1402 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1403 if (hop >= strend) {
1404 if (hop > strend)
1405 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1406 break;
fc241834 1407 }
a6ec74c1 1408 }
08ca2aa3
TH
1409 if (hop > strend)
1410 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1411 len = hop - s;
1412 } else if (len > strend - s)
1413 len = strend - s;
1414
1415 if (datumtype == 'Z') {
1416 /* 'Z' strips stuff after first null */
f7fe979e 1417 const char *ptr, *end;
f337b084
TH
1418 end = s + len;
1419 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1420 sv = newSVpvn(s, ptr-s);
1421 if (howlen == e_star) /* exact for 'Z*' */
1422 len = ptr-s + (ptr != strend ? 1 : 0);
1423 } else if (datumtype == 'A') {
1424 /* 'A' strips both nulls and spaces */
f7fe979e 1425 const char *ptr;
18bdf90a
TH
1426 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1427 for (ptr = s+len-1; ptr >= s; ptr--)
1428 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1429 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1430 if (ptr >= s) ptr += UTF8SKIP(ptr);
1431 else ptr++;
28be1210 1432 if (ptr > s+len)
18bdf90a
TH
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 } else {
1435 for (ptr = s+len-1; ptr >= s; ptr--)
1436 if (*ptr != 0 && !isSPACE(*ptr)) break;
1437 ptr++;
1438 }
08ca2aa3
TH
1439 sv = newSVpvn(s, ptr-s);
1440 } else sv = newSVpvn(s, len);
1441
1442 if (utf8) {
1443 SvUTF8_on(sv);
1444 /* Undo any upgrade done due to need_utf8() */
f337b084 1445 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1446 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1447 }
1448 XPUSHs(sv_2mortal(sv));
08ca2aa3 1449 s += len;
a6ec74c1
JH
1450 break;
1451 case 'B':
08ca2aa3
TH
1452 case 'b': {
1453 char *str;
49704364 1454 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1455 len = (strend - s) * 8;
1456 if (checksum) {
1457 if (!PL_bitcount) {
08ca2aa3 1458 int bits;
a02a5408 1459 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1460 for (bits = 1; bits < 256; bits++) {
1461 if (bits & 1) PL_bitcount[bits]++;
1462 if (bits & 2) PL_bitcount[bits]++;
1463 if (bits & 4) PL_bitcount[bits]++;
1464 if (bits & 8) PL_bitcount[bits]++;
1465 if (bits & 16) PL_bitcount[bits]++;
1466 if (bits & 32) PL_bitcount[bits]++;
1467 if (bits & 64) PL_bitcount[bits]++;
1468 if (bits & 128) PL_bitcount[bits]++;
1469 }
1470 }
f337b084 1471 if (utf8)
08ca2aa3 1472 while (len >= 8 && s < strend) {
f337b084 1473 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1474 len -= 8;
1475 }
f337b084 1476 else
fc241834 1477 while (len >= 8) {
08ca2aa3 1478 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1479 len -= 8;
1480 }
08ca2aa3
TH
1481 if (len && s < strend) {
1482 U8 bits;
f337b084
TH
1483 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1484 if (datumtype == 'b')
a6ec74c1 1485 while (len-- > 0) {
92d41999 1486 if (bits & 1) cuv++;
a6ec74c1
JH
1487 bits >>= 1;
1488 }
f337b084 1489 else
a6ec74c1 1490 while (len-- > 0) {
08ca2aa3 1491 if (bits & 0x80) cuv++;
a6ec74c1
JH
1492 bits <<= 1;
1493 }
fc241834 1494 }
a6ec74c1
JH
1495 break;
1496 }
08ca2aa3 1497
561b68a9 1498 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1499 SvPOK_on(sv);
1500 str = SvPVX(sv);
1501 if (datumtype == 'b') {
f337b084 1502 U8 bits = 0;
f7fe979e 1503 const I32 ai32 = len;
08ca2aa3
TH
1504 for (len = 0; len < ai32; len++) {
1505 if (len & 7) bits >>= 1;
1506 else if (utf8) {
1507 if (s >= strend) break;
f337b084 1508 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1509 } else bits = *(U8 *) s++;
1510 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1511 }
08ca2aa3 1512 } else {
f337b084 1513 U8 bits = 0;
f7fe979e 1514 const I32 ai32 = len;
08ca2aa3
TH
1515 for (len = 0; len < ai32; len++) {
1516 if (len & 7) bits <<= 1;
1517 else if (utf8) {
1518 if (s >= strend) break;
f337b084 1519 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1520 } else bits = *(U8 *) s++;
1521 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1522 }
1523 }
1524 *str = '\0';
aa07b2f6 1525 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1526 XPUSHs(sv);
a6ec74c1 1527 break;
08ca2aa3 1528 }
a6ec74c1 1529 case 'H':
08ca2aa3
TH
1530 case 'h': {
1531 char *str;
fc241834 1532 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1533 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1534 len = (strend - s) * 2;
561b68a9 1535 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1536 SvPOK_on(sv);
1537 str = SvPVX(sv);
1538 if (datumtype == 'h') {
f337b084 1539 U8 bits = 0;
9e27e96a 1540 I32 ai32 = len;
fc241834
RGS
1541 for (len = 0; len < ai32; len++) {
1542 if (len & 1) bits >>= 4;
1543 else if (utf8) {
1544 if (s >= strend) break;
f337b084 1545 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1546 } else bits = * (U8 *) s++;
a6ec74c1
JH
1547 *str++ = PL_hexdigit[bits & 15];
1548 }
08ca2aa3 1549 } else {
f337b084 1550 U8 bits = 0;
f7fe979e 1551 const I32 ai32 = len;
08ca2aa3
TH
1552 for (len = 0; len < ai32; len++) {
1553 if (len & 1) bits <<= 4;
1554 else if (utf8) {
1555 if (s >= strend) break;
f337b084 1556 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1557 } else bits = *(U8 *) s++;
a6ec74c1
JH
1558 *str++ = PL_hexdigit[(bits >> 4) & 15];
1559 }
1560 }
1561 *str = '\0';
aa07b2f6 1562 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1563 XPUSHs(sv);
a6ec74c1 1564 break;
08ca2aa3 1565 }
a6ec74c1 1566 case 'c':
73cb7263 1567 while (len-- > 0) {
f337b084 1568 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1569 if (aint >= 128) /* fake up signed chars */
1570 aint -= 256;
08ca2aa3 1571 if (!checksum)
ac7f3b1b 1572 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)aint;
1575 else
1576 cuv += aint;
a6ec74c1
JH
1577 }
1578 break;
1579 case 'C':
08ca2aa3
TH
1580 case 'W':
1581 W_checksum:
35bcd338 1582 if (len == 0) {
fc241834 1583 if (explicit_length && datumtype == 'C')
08ca2aa3 1584 /* Switch to "character" mode */
f337b084 1585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1586 break;
1587 }
fc241834 1588 if (datumtype == 'C' ?
f337b084
TH
1589 (symptr->flags & FLAG_DO_UTF8) &&
1590 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1591 while (len-- > 0 && s < strend) {
08ca2aa3 1592 STRLEN retlen;
f7fe979e 1593 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1594 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1595 if (retlen == (STRLEN) -1 || retlen == 0)
1596 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1597 s += retlen;
1598 if (!checksum)
1599 PUSHs(sv_2mortal(newSVuv((UV) val)));
1600 else if (checksum > bits_in_uv)
1601 cdouble += (NV) val;
d6d3e8bd 1602 else
08ca2aa3 1603 cuv += val;
fc241834 1604 }
08ca2aa3 1605 } else if (!checksum)
a6ec74c1 1606 while (len-- > 0) {
f7fe979e 1607 const U8 ch = *(U8 *) s++;
08ca2aa3 1608 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1609 }
08ca2aa3
TH
1610 else if (checksum > bits_in_uv)
1611 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1612 else
1613 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1614 break;
1615 case 'U':
35bcd338 1616 if (len == 0) {
08ca2aa3
TH
1617 if (explicit_length) {
1618 /* Switch to "bytes in UTF-8" mode */
f337b084 1619 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1620 else
1621 /* Should be impossible due to the need_utf8() test */
1622 Perl_croak(aTHX_ "U0 mode on a byte string");
1623 }
35bcd338
JH
1624 break;
1625 }
08ca2aa3 1626 if (len > strend - s) len = strend - s;
fc241834 1627 if (!checksum) {
08ca2aa3
TH
1628 if (len && unpack_only_one) len = 1;
1629 EXTEND(SP, len);
1630 EXTEND_MORTAL(len);
fc241834 1631 }
08ca2aa3
TH
1632 while (len-- > 0 && s < strend) {
1633 STRLEN retlen;
1634 UV auv;
1635 if (utf8) {
1636 U8 result[UTF8_MAXLEN];
f7fe979e 1637 const char *ptr = s;
08ca2aa3 1638 STRLEN len;
08ca2aa3
TH
1639 /* Bug: warns about bad utf8 even if we are short on bytes
1640 and will break out of the loop */
230e1fce
NC
1641 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1642 'U'))
08ca2aa3
TH
1643 break;
1644 len = UTF8SKIP(result);
fc241834 1645 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1646 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1647 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1648 s = ptr;
1649 } else {
1650 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1651 if (retlen == (STRLEN) -1 || retlen == 0)
1652 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1653 s += retlen;
1654 }
1655 if (!checksum)
1656 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1657 else if (checksum > bits_in_uv)
08ca2aa3 1658 cdouble += (NV) auv;
73cb7263 1659 else
08ca2aa3 1660 cuv += auv;
a6ec74c1
JH
1661 }
1662 break;
49704364
WL
1663 case 's' | TYPE_IS_SHRIEKING:
1664#if SHORTSIZE != SIZE16
73cb7263 1665 while (len-- > 0) {
08ca2aa3 1666 short ashort;
f337b084
TH
1667 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1668 DO_BO_UNPACK(ashort, s);
08ca2aa3 1669 if (!checksum)
ac7f3b1b 1670 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1671 else if (checksum > bits_in_uv)
1672 cdouble += (NV)ashort;
1673 else
1674 cuv += ashort;
49704364
WL
1675 }
1676 break;
1677#else
1678 /* Fallthrough! */
a6ec74c1 1679#endif
49704364 1680 case 's':
73cb7263 1681 while (len-- > 0) {
08ca2aa3
TH
1682 I16 ai16;
1683
1684#if U16SIZE > SIZE16
1685 ai16 = 0;
1686#endif
f337b084 1687 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1688 DO_BO_UNPACK(ai16, 16);
1109a392 1689#if U16SIZE > SIZE16
73cb7263
NC
1690 if (ai16 > 32767)
1691 ai16 -= 65536;
a6ec74c1 1692#endif
08ca2aa3 1693 if (!checksum)
ac7f3b1b 1694 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV)ai16;
1697 else
1698 cuv += ai16;
a6ec74c1
JH
1699 }
1700 break;
49704364
WL
1701 case 'S' | TYPE_IS_SHRIEKING:
1702#if SHORTSIZE != SIZE16
73cb7263 1703 while (len-- > 0) {
08ca2aa3 1704 unsigned short aushort;
f337b084
TH
1705 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1706 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1707 if (!checksum)
1708 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)aushort;
1711 else
1712 cuv += aushort;
49704364
WL
1713 }
1714 break;
1715#else
1716 /* Fallhrough! */
1717#endif
a6ec74c1
JH
1718 case 'v':
1719 case 'n':
1720 case 'S':
73cb7263 1721 while (len-- > 0) {
08ca2aa3
TH
1722 U16 au16;
1723#if U16SIZE > SIZE16
1724 au16 = 0;
1725#endif
f337b084 1726 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1727 DO_BO_UNPACK(au16, 16);
a6ec74c1 1728#ifdef HAS_NTOHS
73cb7263
NC
1729 if (datumtype == 'n')
1730 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1731#endif
1732#ifdef HAS_VTOHS
73cb7263
NC
1733 if (datumtype == 'v')
1734 au16 = vtohs(au16);
a6ec74c1 1735#endif
08ca2aa3
TH
1736 if (!checksum)
1737 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1738 else if (checksum > bits_in_uv)
f337b084 1739 cdouble += (NV) au16;
73cb7263
NC
1740 else
1741 cuv += au16;
a6ec74c1
JH
1742 }
1743 break;
7212898e 1744#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1745 case 'v' | TYPE_IS_SHRIEKING:
1746 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1747 while (len-- > 0) {
08ca2aa3
TH
1748 I16 ai16;
1749# if U16SIZE > SIZE16
1750 ai16 = 0;
1751# endif
f337b084 1752 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1753# ifdef HAS_NTOHS
73cb7263 1754 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1755 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1756# endif /* HAS_NTOHS */
1757# ifdef HAS_VTOHS
73cb7263 1758 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1759 ai16 = (I16) vtohs((U16) ai16);
1760# endif /* HAS_VTOHS */
1761 if (!checksum)
ac7f3b1b 1762 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1763 else if (checksum > bits_in_uv)
08ca2aa3 1764 cdouble += (NV) ai16;
73cb7263
NC
1765 else
1766 cuv += ai16;
068bd2e7
MHM
1767 }
1768 break;
08ca2aa3 1769#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1770 case 'i':
49704364 1771 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1772 while (len-- > 0) {
08ca2aa3 1773 int aint;
f337b084
TH
1774 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1775 DO_BO_UNPACK(aint, i);
08ca2aa3 1776 if (!checksum)
ac7f3b1b 1777 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV)aint;
1780 else
1781 cuv += aint;
a6ec74c1
JH
1782 }
1783 break;
1784 case 'I':
49704364 1785 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1786 while (len-- > 0) {
08ca2aa3 1787 unsigned int auint;
f337b084
TH
1788 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1789 DO_BO_UNPACK(auint, i);
08ca2aa3 1790 if (!checksum)
ac7f3b1b 1791 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1792 else if (checksum > bits_in_uv)
1793 cdouble += (NV)auint;
1794 else
1795 cuv += auint;
a6ec74c1
JH
1796 }
1797 break;
92d41999 1798 case 'j':
73cb7263 1799 while (len-- > 0) {
08ca2aa3 1800 IV aiv;
f337b084 1801 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1802#if IVSIZE == INTSIZE
f337b084 1803 DO_BO_UNPACK(aiv, i);
1109a392 1804#elif IVSIZE == LONGSIZE
f337b084 1805 DO_BO_UNPACK(aiv, l);
1109a392 1806#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1807 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1808#else
1809 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1810#endif
08ca2aa3 1811 if (!checksum)
ac7f3b1b 1812 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1813 else if (checksum > bits_in_uv)
1814 cdouble += (NV)aiv;
1815 else
1816 cuv += aiv;
92d41999
JH
1817 }
1818 break;
1819 case 'J':
73cb7263 1820 while (len-- > 0) {
08ca2aa3 1821 UV auv;
f337b084 1822 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1823#if IVSIZE == INTSIZE
f337b084 1824 DO_BO_UNPACK(auv, i);
08ca2aa3 1825#elif IVSIZE == LONGSIZE
f337b084 1826 DO_BO_UNPACK(auv, l);
08ca2aa3 1827#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1828 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1829#else
1830 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1831#endif
08ca2aa3 1832 if (!checksum)
ac7f3b1b 1833 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1834 else if (checksum > bits_in_uv)
1835 cdouble += (NV)auv;
1836 else
1837 cuv += auv;
92d41999
JH
1838 }
1839 break;
49704364
WL
1840 case 'l' | TYPE_IS_SHRIEKING:
1841#if LONGSIZE != SIZE32
73cb7263 1842 while (len-- > 0) {
08ca2aa3 1843 long along;
f337b084
TH
1844 SHIFT_VAR(utf8, s, strend, along, datumtype);
1845 DO_BO_UNPACK(along, l);
08ca2aa3 1846 if (!checksum)
ac7f3b1b 1847 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1848 else if (checksum > bits_in_uv)
1849 cdouble += (NV)along;
1850 else
1851 cuv += along;
49704364
WL
1852 }
1853 break;
1854#else
1855 /* Fallthrough! */
a6ec74c1 1856#endif
49704364 1857 case 'l':
73cb7263 1858 while (len-- > 0) {
08ca2aa3
TH
1859 I32 ai32;
1860#if U32SIZE > SIZE32
1861 ai32 = 0;
1862#endif
f337b084 1863 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1864 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1865#if U32SIZE > SIZE32
08ca2aa3 1866 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1867#endif
08ca2aa3 1868 if (!checksum)
ac7f3b1b 1869 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)ai32;
1872 else
1873 cuv += ai32;
a6ec74c1
JH
1874 }
1875 break;
49704364
WL
1876 case 'L' | TYPE_IS_SHRIEKING:
1877#if LONGSIZE != SIZE32
73cb7263 1878 while (len-- > 0) {
08ca2aa3 1879 unsigned long aulong;
f337b084
TH
1880 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1881 DO_BO_UNPACK(aulong, l);
08ca2aa3 1882 if (!checksum)
ac7f3b1b 1883 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)aulong;
1886 else
1887 cuv += aulong;
49704364
WL
1888 }
1889 break;
1890#else
1891 /* Fall through! */
1892#endif
a6ec74c1
JH
1893 case 'V':
1894 case 'N':
1895 case 'L':
73cb7263 1896 while (len-- > 0) {
08ca2aa3
TH
1897 U32 au32;
1898#if U32SIZE > SIZE32
1899 au32 = 0;
1900#endif
f337b084 1901 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1902 DO_BO_UNPACK(au32, 32);
a6ec74c1 1903#ifdef HAS_NTOHL
73cb7263
NC
1904 if (datumtype == 'N')
1905 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1906#endif
1907#ifdef HAS_VTOHL
73cb7263
NC
1908 if (datumtype == 'V')
1909 au32 = vtohl(au32);
a6ec74c1 1910#endif
08ca2aa3 1911 if (!checksum)
fc241834
RGS
1912 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1913 else if (checksum > bits_in_uv)
1914 cdouble += (NV)au32;
1915 else
1916 cuv += au32;
a6ec74c1
JH
1917 }
1918 break;
7212898e 1919#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1920 case 'V' | TYPE_IS_SHRIEKING:
1921 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1922 while (len-- > 0) {
08ca2aa3
TH
1923 I32 ai32;
1924# if U32SIZE > SIZE32
1925 ai32 = 0;
1926# endif
f337b084 1927 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1928# ifdef HAS_NTOHL
73cb7263
NC
1929 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1930 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1931# endif
1932# ifdef HAS_VTOHL
73cb7263
NC
1933 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1934 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1935# endif
1936 if (!checksum)
ac7f3b1b 1937 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)ai32;
1940 else
1941 cuv += ai32;
068bd2e7
MHM
1942 }
1943 break;
08ca2aa3 1944#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1945 case 'p':
a6ec74c1 1946 while (len-- > 0) {
f7fe979e 1947 const char *aptr;
f337b084 1948 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1949 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1950 /* newSVpv generates undef if aptr is NULL */
1951 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1952 }
1953 break;
1954 case 'w':
a6ec74c1
JH
1955 {
1956 UV auv = 0;
1957 U32 bytes = 0;
fc241834 1958
08ca2aa3
TH
1959 while (len > 0 && s < strend) {
1960 U8 ch;
f337b084 1961 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1962 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1963 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1964 if (ch < 0x80) {
a6ec74c1 1965 bytes = 0;
ac7f3b1b 1966 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1967 len--;
1968 auv = 0;
08ca2aa3 1969 continue;
a6ec74c1 1970 }
08ca2aa3 1971 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1972 const char *t;
a6ec74c1
JH
1973
1974 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1975 while (s < strend) {
f337b084 1976 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1977 sv = mul128(sv, (U8)(ch & 0x7f));
1978 if (!(ch & 0x80)) {
a6ec74c1
JH
1979 bytes = 0;
1980 break;
1981 }
1982 }
10516c54 1983 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1984 while (*t == '0')
1985 t++;
1986 sv_chop(sv, t);
1987 PUSHs(sv_2mortal(sv));
1988 len--;
1989 auv = 0;
1990 }
1991 }
1992 if ((s >= strend) && bytes)
49704364 1993 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1994 }
1995 break;
1996 case 'P':
49704364
WL
1997 if (symptr->howlen == e_star)
1998 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1999 EXTEND(SP, 1);
2d3e0934 2000 if (s + sizeof(char*) <= strend) {
08ca2aa3 2001 char *aptr;
f337b084 2002 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2003 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2004 /* newSVpvn generates undef if aptr is NULL */
2005 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2006 }
a6ec74c1
JH
2007 break;
2008#ifdef HAS_QUAD
2009 case 'q':
73cb7263 2010 while (len-- > 0) {
08ca2aa3 2011 Quad_t aquad;
f337b084
TH
2012 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2013 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2014 if (!checksum)
2015 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2016 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2017 else if (checksum > bits_in_uv)
2018 cdouble += (NV)aquad;
2019 else
2020 cuv += aquad;
2021 }
a6ec74c1
JH
2022 break;
2023 case 'Q':
73cb7263 2024 while (len-- > 0) {
08ca2aa3 2025 Uquad_t auquad;
f337b084
TH
2026 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2027 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2028 if (!checksum)
2029 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2030 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2031 else if (checksum > bits_in_uv)
2032 cdouble += (NV)auquad;
2033 else
2034 cuv += auquad;
a6ec74c1
JH
2035 }
2036 break;
08ca2aa3 2037#endif /* HAS_QUAD */
a6ec74c1
JH
2038 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2039 case 'f':
73cb7263 2040 while (len-- > 0) {
08ca2aa3 2041 float afloat;
f337b084 2042 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2043 DO_BO_UNPACK_N(afloat, float);
2044 if (!checksum)
2045 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2046 else
73cb7263 2047 cdouble += afloat;
fc241834 2048 }
a6ec74c1
JH
2049 break;
2050 case 'd':
73cb7263 2051 while (len-- > 0) {
08ca2aa3 2052 double adouble;
f337b084 2053 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2054 DO_BO_UNPACK_N(adouble, double);
2055 if (!checksum)
2056 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2057 else
73cb7263 2058 cdouble += adouble;
fc241834 2059 }
a6ec74c1 2060 break;
92d41999 2061 case 'F':
73cb7263 2062 while (len-- > 0) {
08ca2aa3 2063 NV anv;
f337b084 2064 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2065 DO_BO_UNPACK_N(anv, NV);
2066 if (!checksum)
2067 PUSHs(sv_2mortal(newSVnv(anv)));
2068 else
73cb7263 2069 cdouble += anv;
fc241834 2070 }
92d41999
JH
2071 break;
2072#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2073 case 'D':
73cb7263 2074 while (len-- > 0) {
08ca2aa3 2075 long double aldouble;
f337b084 2076 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2077 DO_BO_UNPACK_N(aldouble, long double);
2078 if (!checksum)
2079 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2080 else
2081 cdouble += aldouble;
92d41999
JH
2082 }
2083 break;
2084#endif
a6ec74c1
JH
2085 case 'u':
2086 /* MKS:
2087 * Initialise the decode mapping. By using a table driven
2088 * algorithm, the code will be character-set independent
2089 * (and just as fast as doing character arithmetic)
2090 */
2091 if (PL_uudmap['M'] == 0) {
2d3e0934 2092 size_t i;
a6ec74c1 2093
2d3e0934 2094 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2095 PL_uudmap[(U8)PL_uuemap[i]] = i;
2096 /*
2097 * Because ' ' and '`' map to the same value,
2098 * we need to decode them both the same.
2099 */
2100 PL_uudmap[' '] = 0;
2101 }
08ca2aa3 2102 {
f7fe979e 2103 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2104 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2105 if (l) SvPOK_on(sv);
2106 }
2107 if (utf8) {
2108 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2109 I32 a, b, c, d;
2110 char hunk[4];
2111
2112 hunk[3] = '\0';
2113 while (len > 0) {
2114 next_uni_uu(aTHX_ &s, strend, &a);
2115 next_uni_uu(aTHX_ &s, strend, &b);
2116 next_uni_uu(aTHX_ &s, strend, &c);
2117 next_uni_uu(aTHX_ &s, strend, &d);
2118 hunk[0] = (char)((a << 2) | (b >> 4));
2119 hunk[1] = (char)((b << 4) | (c >> 2));
2120 hunk[2] = (char)((c << 6) | d);
2121 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2122 len -= 3;
2123 }
2124 if (s < strend) {
f7fe979e
AL
2125 if (*s == '\n') {
2126 s++;
2127 }
08ca2aa3
TH
2128 else {
2129 /* possible checksum byte */
f7fe979e
AL
2130 const char *skip = s+UTF8SKIP(s);
2131 if (skip < strend && *skip == '\n')
2132 s = skip+1;
08ca2aa3
TH
2133 }
2134 }
2135 }
2136 } else {
fc241834
RGS
2137 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2138 I32 a, b, c, d;
2139 char hunk[4];
a6ec74c1 2140
fc241834
RGS
2141 hunk[3] = '\0';
2142 len = PL_uudmap[*(U8*)s++] & 077;
2143 while (len > 0) {
2144 if (s < strend && ISUUCHAR(*s))
2145 a = PL_uudmap[*(U8*)s++] & 077;
2146 else
2147 a = 0;
2148 if (s < strend && ISUUCHAR(*s))
2149 b = PL_uudmap[*(U8*)s++] & 077;
2150 else
2151 b = 0;
2152 if (s < strend && ISUUCHAR(*s))
2153 c = PL_uudmap[*(U8*)s++] & 077;
2154 else
2155 c = 0;
2156 if (s < strend && ISUUCHAR(*s))
2157 d = PL_uudmap[*(U8*)s++] & 077;
2158 else
2159 d = 0;
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2164 len -= 3;
2165 }
2166 if (*s == '\n')
2167 s++;
2168 else /* possible checksum byte */
2169 if (s + 1 < strend && s[1] == '\n')
2170 s += 2;
a6ec74c1 2171 }
08ca2aa3
TH
2172 }
2173 XPUSHs(sv);
a6ec74c1
JH
2174 break;
2175 }
49704364 2176
a6ec74c1 2177 if (checksum) {
1109a392 2178 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2179 (checksum > bits_in_uv &&
08ca2aa3
TH
2180 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2181 NV trouble, anv;
a6ec74c1 2182
08ca2aa3 2183 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2184 while (checksum >= 16) {
2185 checksum -= 16;
08ca2aa3 2186 anv *= 65536.0;
a6ec74c1 2187 }
a6ec74c1 2188 while (cdouble < 0.0)
08ca2aa3
TH
2189 cdouble += anv;
2190 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2191 sv = newSVnv(cdouble);
a6ec74c1
JH
2192 }
2193 else {
fa8ec7c1
NC
2194 if (checksum < bits_in_uv) {
2195 UV mask = ((UV)1 << checksum) - 1;
92d41999 2196 cuv &= mask;
a6ec74c1 2197 }
c4c5f44a 2198 sv = newSVuv(cuv);
a6ec74c1
JH
2199 }
2200 XPUSHs(sv_2mortal(sv));
2201 checksum = 0;
2202 }
fc241834 2203
49704364
WL
2204 if (symptr->flags & FLAG_SLASH){
2205 if (SP - PL_stack_base - start_sp_offset <= 0)
2206 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2207 if( next_symbol(symptr) ){
2208 if( symptr->howlen == e_number )
2209 Perl_croak(aTHX_ "Count after length/code in unpack" );
2210 if( beyond ){
2211 /* ...end of char buffer then no decent length available */
2212 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2213 } else {
2214 /* take top of stack (hope it's numeric) */
2215 len = POPi;
2216 if( len < 0 )
2217 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2218 }
2219 } else {
2220 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2221 }
2222 datumtype = symptr->code;
21c16052 2223 explicit_length = FALSE;
49704364
WL
2224 goto redo_switch;
2225 }
a6ec74c1 2226 }
49704364 2227
18529408
IZ
2228 if (new_s)
2229 *new_s = s;
2230 PUTBACK;
2231 return SP - PL_stack_base - start_sp_offset;
2232}
2233
2234PP(pp_unpack)
2235{
97aff369 2236 dVAR;
18529408 2237 dSP;
bab9c0ac 2238 dPOPPOPssrl;
18529408
IZ
2239 I32 gimme = GIMME_V;
2240 STRLEN llen;
2241 STRLEN rlen;
5c144d81
NC
2242 const char *pat = SvPV_const(left, llen);
2243 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2244 const char *strend = s + rlen;
2245 const char *patend = pat + llen;
08ca2aa3 2246 I32 cnt;
18529408
IZ
2247
2248 PUTBACK;
7accc089 2249 cnt = unpackstring(pat, patend, s, strend,
49704364 2250 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2251 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2252
18529408
IZ
2253 SPAGAIN;
2254 if ( !cnt && gimme == G_SCALAR )
2255 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2256 RETURN;
2257}
2258
f337b084 2259STATIC U8 *
f7fe979e 2260doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2261{
f337b084 2262 *h++ = PL_uuemap[len];
a6ec74c1 2263 while (len > 2) {
f337b084
TH
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2267 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2268 s += 3;
2269 len -= 3;
2270 }
2271 if (len > 0) {
f7fe979e 2272 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2273 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2274 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2275 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2276 *h++ = PL_uuemap[0];
a6ec74c1 2277 }
f337b084
TH
2278 *h++ = '\n';
2279 return h;
a6ec74c1
JH
2280}
2281
2282STATIC SV *
f7fe979e 2283S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2284{
8b6e33c7
AL
2285 SV *result = newSVpvn(s, l);
2286 char *const result_c = SvPV_nolen(result); /* convenience */
2287 char *out = result_c;
2288 bool skip = 1;
2289 bool ignore = 0;
a6ec74c1
JH
2290
2291 while (*s) {
2292 switch (*s) {
2293 case ' ':
2294 break;
2295 case '+':
2296 if (!skip) {
2297 SvREFCNT_dec(result);
2298 return (NULL);
2299 }
2300 break;
2301 case '0':
2302 case '1':
2303 case '2':
2304 case '3':
2305 case '4':
2306 case '5':
2307 case '6':
2308 case '7':
2309 case '8':
2310 case '9':
2311 skip = 0;
2312 if (!ignore) {
2313 *(out++) = *s;
2314 }
2315 break;
2316 case '.':
2317 ignore = 1;
2318 break;
2319 default:
2320 SvREFCNT_dec(result);
2321 return (NULL);
2322 }
2323 s++;
2324 }
2325 *(out++) = '\0';
2326 SvCUR_set(result, out - result_c);
2327 return (result);
2328}
2329
2330/* pnum must be '\0' terminated */
2331STATIC int
2332S_div128(pTHX_ SV *pnum, bool *done)
2333{
8b6e33c7
AL
2334 STRLEN len;
2335 char * const s = SvPV(pnum, len);
2336 char *t = s;
2337 int m = 0;
2338
2339 *done = 1;
2340 while (*t) {
2341 const int i = m * 10 + (*t - '0');
2342 const int r = (i >> 7); /* r < 10 */
2343 m = i & 0x7F;
2344 if (r) {
2345 *done = 0;
2346 }
2347 *(t++) = '0' + r;
a6ec74c1 2348 }
8b6e33c7
AL
2349 *(t++) = '\0';
2350 SvCUR_set(pnum, (STRLEN) (t - s));
2351 return (m);
a6ec74c1
JH
2352}
2353
18529408 2354/*
7accc089
JH
2355=for apidoc packlist
2356
2357The engine implementing pack() Perl function.
2358
bfce84ec
AL
2359=cut
2360*/
7accc089
JH
2361
2362void
f7fe979e 2363Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2364{
97aff369 2365 dVAR;
aadb217d
JH
2366 tempsym_t sym;
2367
f7fe979e 2368 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2369
f337b084
TH
2370 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2371 Also make sure any UTF8 flag is loaded */
56eb0262 2372 SvPV_force_nolen(cat);
bfce84ec
AL
2373 if (DO_UTF8(cat))
2374 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2375
49704364
WL
2376 (void)pack_rec( cat, &sym, beglist, endlist );
2377}
2378
f337b084
TH
2379/* like sv_utf8_upgrade, but also repoint the group start markers */
2380STATIC void
2381marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2382 STRLEN len;
2383 tempsym_t *group;
f7fe979e
AL
2384 const char *from_ptr, *from_start, *from_end, **marks, **m;
2385 char *to_start, *to_ptr;
f337b084
TH
2386
2387 if (SvUTF8(sv)) return;
2388
aa07b2f6 2389 from_start = SvPVX_const(sv);
f337b084
TH
2390 from_end = from_start + SvCUR(sv);
2391 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2392 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2393 if (from_ptr == from_end) {
2394 /* Simple case: no character needs to be changed */
2395 SvUTF8_on(sv);
2396 return;
2397 }
2398
3473cf63 2399 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2400 Newx(to_start, len, char);
f337b084
TH
2401 Copy(from_start, to_start, from_ptr-from_start, char);
2402 to_ptr = to_start + (from_ptr-from_start);
2403
a02a5408 2404 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2405 for (group=sym_ptr; group; group = group->previous)
2406 marks[group->level] = from_start + group->strbeg;
2407 marks[sym_ptr->level+1] = from_end+1;
2408 for (m = marks; *m < from_ptr; m++)
2409 *m = to_start + (*m-from_start);
2410
2411 for (;from_ptr < from_end; from_ptr++) {
2412 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2413 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2414 }
2415 *to_ptr = 0;
2416
2417 while (*m == from_ptr) *m++ = to_ptr;
2418 if (m != marks + sym_ptr->level+1) {
2419 Safefree(marks);
2420 Safefree(to_start);
2421 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2422 }
2423 for (group=sym_ptr; group; group = group->previous)
2424 group->strbeg = marks[group->level] - to_start;
2425 Safefree(marks);
2426
2427 if (SvOOK(sv)) {
2428 if (SvIVX(sv)) {
b162af07 2429 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2430 from_start -= SvIVX(sv);
2431 SvIV_set(sv, 0);
2432 }
2433 SvFLAGS(sv) &= ~SVf_OOK;
2434 }
2435 if (SvLEN(sv) != 0)
2436 Safefree(from_start);
f880fe2f 2437 SvPV_set(sv, to_start);
b162af07
SP
2438 SvCUR_set(sv, to_ptr - to_start);
2439 SvLEN_set(sv, len);
f337b084
TH
2440 SvUTF8_on(sv);
2441}
2442
2443/* Exponential string grower. Makes string extension effectively O(n)
2444 needed says how many extra bytes we need (not counting the final '\0')
2445 Only grows the string if there is an actual lack of space
2446*/
2447STATIC char *
0bd48802 2448S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2449 const STRLEN cur = SvCUR(sv);
2450 const STRLEN len = SvLEN(sv);
f337b084
TH
2451 STRLEN extend;
2452 if (len - cur > needed) return SvPVX(sv);
2453 extend = needed > len ? needed : len;
2454 return SvGROW(sv, len+extend+1);
2455}
49704364
WL
2456
2457STATIC
2458SV **
f337b084 2459S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2460{
97aff369 2461 dVAR;
49704364 2462 tempsym_t lookahead;
f337b084
TH
2463 I32 items = endlist - beglist;
2464 bool found = next_symbol(symptr);
2465 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2466 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2467
2468 if (symptr->level == 0 && found && symptr->code == 'U') {
2469 marked_upgrade(aTHX_ cat, symptr);
2470 symptr->flags |= FLAG_DO_UTF8;
2471 utf8 = 0;
49704364 2472 }
f337b084 2473 symptr->strbeg = SvCUR(cat);
49704364
WL
2474
2475 while (found) {
f337b084
TH
2476 SV *fromstr;
2477 STRLEN fromlen;
2478 I32 len;
a0714e2c 2479 SV *lengthcode = NULL;
49704364 2480 I32 datumtype = symptr->code;
f337b084
TH
2481 howlen_t howlen = symptr->howlen;
2482 char *start = SvPVX(cat);
2483 char *cur = start + SvCUR(cat);
49704364 2484
f337b084
TH
2485#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2486
2487 switch (howlen) {
fc241834 2488 case e_star:
f337b084
TH
2489 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2490 0 : items;
2491 break;
2492 default:
2493 /* e_no_len and e_number */
2494 len = symptr->length;
49704364
WL
2495 break;
2496 }
2497
f337b084 2498 if (len) {
a7a3cfaa 2499 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2500
a7a3cfaa
TH
2501 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2502 /* We can process this letter. */
2503 STRLEN size = props & PACK_SIZE_MASK;
2504 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2505 }
f337b084
TH
2506 }
2507
49704364
WL
2508 /* Look ahead for next symbol. Do we have code/code? */
2509 lookahead = *symptr;
2510 found = next_symbol(&lookahead);
246f24af
TH
2511 if (symptr->flags & FLAG_SLASH) {
2512 IV count;
f337b084 2513 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2514 if (strchr("aAZ", lookahead.code)) {
2515 if (lookahead.howlen == e_number) count = lookahead.length;
2516 else {
ce399ba6
NC
2517 if (items > 0) {
2518 if (SvGAMAGIC(*beglist)) {
2519 /* Avoid reading the active data more than once
2520 by copying it to a temporary. */
2521 STRLEN len;
2522 const char *const pv = SvPV_const(*beglist, len);
2523 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2524 if (SvUTF8(*beglist))
2525 SvUTF8_on(temp);
2526 *beglist = temp;
2527 }
246f24af
TH
2528 count = DO_UTF8(*beglist) ?
2529 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2530 }
246f24af
TH
2531 else count = 0;
2532 if (lookahead.code == 'Z') count++;
2533 }
2534 } else {
2535 if (lookahead.howlen == e_number && lookahead.length < items)
2536 count = lookahead.length;
2537 else count = items;
2538 }
2539 lookahead.howlen = e_number;
2540 lookahead.length = count;
2541 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2542 }
49704364 2543
fc241834
RGS
2544 /* Code inside the switch must take care to properly update
2545 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2546 doesn't simply leave using break */
1109a392 2547 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2548 default:
f337b084
TH
2549 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2550 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2551 case '%':
49704364 2552 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2553 {
2554 char *from;
2555#ifdef PERL_PACK_CAN_SHRIEKSIGN
2556 case '.' | TYPE_IS_SHRIEKING:
2557#endif
2558 case '.':
2559 if (howlen == e_star) from = start;
2560 else if (len == 0) from = cur;
2561 else {
2562 tempsym_t *group = symptr;
2563
2564 while (--len && group) group = group->previous;
2565 from = group ? start + group->strbeg : start;
2566 }
2567 fromstr = NEXTFROM;
2568 len = SvIV(fromstr);
2569 goto resize;
2570#ifdef PERL_PACK_CAN_SHRIEKSIGN
2571 case '@' | TYPE_IS_SHRIEKING:
2572#endif
a6ec74c1 2573 case '@':
28be1210
TH
2574 from = start + symptr->strbeg;
2575 resize:
2576#ifdef PERL_PACK_CAN_SHRIEKSIGN
2577 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2578#else /* PERL_PACK_CAN_SHRIEKSIGN */
2579 if (utf8)
2580#endif
2581 if (len >= 0) {
2582 while (len && from < cur) {
2583 from += UTF8SKIP(from);
2584 len--;
2585 }
2586 if (from > cur)
2587 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2588 if (len) {
2589 /* Here we know from == cur */
2590 grow:
2591 GROWING(0, cat, start, cur, len);
2592 Zero(cur, len, char);
2593 cur += len;
2594 } else if (from < cur) {
2595 len = cur - from;
2596 goto shrink;
2597 } else goto no_change;
2598 } else {
2599 cur = from;
2600 len = -len;
2601 goto utf8_shrink;
f337b084 2602 }
28be1210
TH
2603 else {
2604 len -= cur - from;
f337b084 2605 if (len > 0) goto grow;
28be1210 2606 if (len == 0) goto no_change;
fc241834 2607 len = -len;
28be1210 2608 goto shrink;
f337b084 2609 }
a6ec74c1 2610 break;
28be1210 2611 }
fc241834 2612 case '(': {
49704364 2613 tempsym_t savsym = *symptr;
66c611c5
MHM
2614 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2615 symptr->flags |= group_modifiers;
49704364
WL
2616 symptr->patend = savsym.grpend;
2617 symptr->level++;
f337b084 2618 symptr->previous = &lookahead;
18529408 2619 while (len--) {
f337b084
TH
2620 U32 was_utf8;
2621 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2622 else symptr->flags &= ~FLAG_PARSE_UTF8;
2623 was_utf8 = SvUTF8(cat);
49704364 2624 symptr->patptr = savsym.grpbeg;
f337b084
TH
2625 beglist = pack_rec(cat, symptr, beglist, endlist);
2626 if (SvUTF8(cat) != was_utf8)
2627 /* This had better be an upgrade while in utf8==0 mode */
2628 utf8 = 1;
2629
49704364 2630 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2631 break; /* No way to continue */
2632 }
f337b084
TH
2633 lookahead.flags = symptr->flags & ~group_modifiers;
2634 goto no_change;
18529408 2635 }
62f95557
IZ
2636 case 'X' | TYPE_IS_SHRIEKING:
2637 if (!len) /* Avoid division by 0 */
2638 len = 1;
f337b084
TH
2639 if (utf8) {
2640 char *hop, *last;
2641 I32 l = len;
2642 hop = last = start;
2643 while (hop < cur) {
2644 hop += UTF8SKIP(hop);
2645 if (--l == 0) {
2646 last = hop;
2647 l = len;
2648 }
2649 }
2650 if (last > cur)
2651 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2652 cur = last;
2653 break;
2654 }
2655 len = (cur-start) % len;
62f95557 2656 /* FALL THROUGH */
a6ec74c1 2657 case 'X':
f337b084
TH
2658 if (utf8) {
2659 if (len < 1) goto no_change;
28be1210 2660 utf8_shrink:
f337b084
TH
2661 while (len > 0) {
2662 if (cur <= start)
28be1210
TH
2663 Perl_croak(aTHX_ "'%c' outside of string in pack",
2664 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2665 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2666 if (cur <= start)
28be1210
TH
2667 Perl_croak(aTHX_ "'%c' outside of string in pack",
2668 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2669 }
2670 len--;
2671 }
2672 } else {
fc241834 2673 shrink:
f337b084 2674 if (cur - start < len)
28be1210
TH
2675 Perl_croak(aTHX_ "'%c' outside of string in pack",
2676 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2677 cur -= len;
2678 }
2679 if (cur < start+symptr->strbeg) {
2680 /* Make sure group starts don't point into the void */
2681 tempsym_t *group;
9e27e96a 2682 const STRLEN length = cur-start;
f337b084
TH
2683 for (group = symptr;
2684 group && length < group->strbeg;
2685 group = group->previous) group->strbeg = length;
2686 lookahead.strbeg = length;
2687 }
a6ec74c1 2688 break;
fc241834
RGS
2689 case 'x' | TYPE_IS_SHRIEKING: {
2690 I32 ai32;
62f95557
IZ
2691 if (!len) /* Avoid division by 0 */
2692 len = 1;
230e1fce 2693 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2694 else ai32 = (cur - start) % len;
2695 if (ai32 == 0) goto no_change;
2696 len -= ai32;
2697 }
2698 /* FALL THROUGH */
a6ec74c1 2699 case 'x':
f337b084 2700 goto grow;
a6ec74c1
JH
2701 case 'A':
2702 case 'Z':
f337b084 2703 case 'a': {
f7fe979e 2704 const char *aptr;
f337b084 2705
a6ec74c1 2706 fromstr = NEXTFROM;
e62f0680 2707 aptr = SvPV_const(fromstr, fromlen);
f337b084 2708 if (DO_UTF8(fromstr)) {
f7fe979e 2709 const char *end, *s;
f337b084
TH
2710
2711 if (!utf8 && !SvUTF8(cat)) {
2712 marked_upgrade(aTHX_ cat, symptr);
2713 lookahead.flags |= FLAG_DO_UTF8;
2714 lookahead.strbeg = symptr->strbeg;
2715 utf8 = 1;
2716 start = SvPVX(cat);
2717 cur = start + SvCUR(cat);
2718 }
fc241834 2719 if (howlen == e_star) {
f337b084
TH
2720 if (utf8) goto string_copy;
2721 len = fromlen+1;
2722 }
2723 s = aptr;
2724 end = aptr + fromlen;
2725 fromlen = datumtype == 'Z' ? len-1 : len;
2726 while ((I32) fromlen > 0 && s < end) {
2727 s += UTF8SKIP(s);
2728 fromlen--;
2729 }
2730 if (s > end)
2731 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2732 if (utf8) {
fc241834 2733 len = fromlen;
f337b084
TH
2734 if (datumtype == 'Z') len++;
2735 fromlen = s-aptr;
2736 len += fromlen;
fc241834 2737
f337b084 2738 goto string_copy;
fc241834 2739 }
f337b084
TH
2740 fromlen = len - fromlen;
2741 if (datumtype == 'Z') fromlen--;
2742 if (howlen == e_star) {
2743 len = fromlen;
2744 if (datumtype == 'Z') len++;
fc241834 2745 }
f337b084 2746 GROWING(0, cat, start, cur, len);
fc241834 2747 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2748 datumtype | TYPE_IS_PACK))
2749 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2750 cur += fromlen;
a6ec74c1 2751 len -= fromlen;
f337b084
TH
2752 } else if (utf8) {
2753 if (howlen == e_star) {
2754 len = fromlen;
2755 if (datumtype == 'Z') len++;
a6ec74c1 2756 }
f337b084
TH
2757 if (len <= (I32) fromlen) {
2758 fromlen = len;
2759 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2760 }
fc241834 2761 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2762 upgrade, so:
2763 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2764 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2765 len -= fromlen;
2766 while (fromlen > 0) {
230e1fce 2767 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2768 aptr++;
2769 fromlen--;
fc241834 2770 }
f337b084
TH
2771 } else {
2772 string_copy:
2773 if (howlen == e_star) {
2774 len = fromlen;
2775 if (datumtype == 'Z') len++;
2776 }
2777 if (len <= (I32) fromlen) {
2778 fromlen = len;
2779 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2780 }
f337b084
TH
2781 GROWING(0, cat, start, cur, len);
2782 Copy(aptr, cur, fromlen, char);
2783 cur += fromlen;
2784 len -= fromlen;
a6ec74c1 2785 }
f337b084
TH
2786 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2787 cur += len;
a6ec74c1 2788 break;
f337b084 2789 }
a6ec74c1 2790 case 'B':
f337b084 2791 case 'b': {
b83604b4 2792 const char *str, *end;
f337b084
TH
2793 I32 l, field_len;
2794 U8 bits;
2795 bool utf8_source;
2796 U32 utf8_flags;
a6ec74c1 2797
fc241834 2798 fromstr = NEXTFROM;
b83604b4 2799 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2800 end = str + fromlen;
2801 if (DO_UTF8(fromstr)) {
2802 utf8_source = TRUE;
041457d9 2803 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2804 } else {
2805 utf8_source = FALSE;
2806 utf8_flags = 0; /* Unused, but keep compilers happy */
2807 }
2808 if (howlen == e_star) len = fromlen;
2809 field_len = (len+7)/8;
2810 GROWING(utf8, cat, start, cur, field_len);
2811 if (len > (I32)fromlen) len = fromlen;
2812 bits = 0;
2813 l = 0;
2814 if (datumtype == 'B')
2815 while (l++ < len) {
2816 if (utf8_source) {
95b63a38 2817 UV val = 0;
f337b084
TH
2818 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2819 bits |= val & 1;
2820 } else bits |= *str++ & 1;
2821 if (l & 7) bits <<= 1;
fc241834 2822 else {
f337b084
TH
2823 PUSH_BYTE(utf8, cur, bits);
2824 bits = 0;
a6ec74c1
JH
2825 }
2826 }
f337b084
TH
2827 else
2828 /* datumtype == 'b' */
2829 while (l++ < len) {
2830 if (utf8_source) {
95b63a38 2831 UV val = 0;
f337b084
TH
2832 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2833 if (val & 1) bits |= 0x80;
2834 } else if (*str++ & 1)
2835 bits |= 0x80;
2836 if (l & 7) bits >>= 1;
fc241834 2837 else {
f337b084
TH
2838 PUSH_BYTE(utf8, cur, bits);
2839 bits = 0;
a6ec74c1
JH
2840 }
2841 }
f337b084
TH
2842 l--;
2843 if (l & 7) {
fc241834 2844 if (datumtype == 'B')
f337b084 2845 bits <<= 7 - (l & 7);
fc241834 2846 else
f337b084
TH
2847 bits >>= 7 - (l & 7);
2848 PUSH_BYTE(utf8, cur, bits);
2849 l += 7;
a6ec74c1 2850 }
f337b084
TH
2851 /* Determine how many chars are left in the requested field */
2852 l /= 8;
2853 if (howlen == e_star) field_len = 0;
2854 else field_len -= l;
2855 Zero(cur, field_len, char);
2856 cur += field_len;
a6ec74c1 2857 break;
f337b084 2858 }
a6ec74c1 2859 case 'H':
f337b084 2860 case 'h': {
10516c54 2861 const char *str, *end;
f337b084
TH
2862 I32 l, field_len;
2863 U8 bits;
2864 bool utf8_source;
2865 U32 utf8_flags;
a6ec74c1 2866
fc241834 2867 fromstr = NEXTFROM;
10516c54 2868 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2869 end = str + fromlen;
2870 if (DO_UTF8(fromstr)) {
2871 utf8_source = TRUE;
041457d9 2872 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2873 } else {
2874 utf8_source = FALSE;
2875 utf8_flags = 0; /* Unused, but keep compilers happy */
2876 }
2877 if (howlen == e_star) len = fromlen;
2878 field_len = (len+1)/2;
2879 GROWING(utf8, cat, start, cur, field_len);
2880 if (!utf8 && len > (I32)fromlen) len = fromlen;
2881 bits = 0;
2882 l = 0;
2883 if (datumtype == 'H')
2884 while (l++ < len) {
2885 if (utf8_source) {
95b63a38 2886 UV val = 0;
f337b084
TH
2887 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2888 if (val < 256 && isALPHA(val))
2889 bits |= (val + 9) & 0xf;
a6ec74c1 2890 else
f337b084
TH
2891 bits |= val & 0xf;
2892 } else if (isALPHA(*str))
2893 bits |= (*str++ + 9) & 0xf;
2894 else
2895 bits |= *str++ & 0xf;
2896 if (l & 1) bits <<= 4;
fc241834 2897 else {
f337b084
TH
2898 PUSH_BYTE(utf8, cur, bits);
2899 bits = 0;
a6ec74c1
JH
2900 }
2901 }
f337b084
TH
2902 else
2903 while (l++ < len) {
2904 if (utf8_source) {
95b63a38 2905 UV val = 0;
f337b084
TH
2906 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2907 if (val < 256 && isALPHA(val))
2908 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2909 else
f337b084
TH
2910 bits |= (val & 0xf) << 4;
2911 } else if (isALPHA(*str))
2912 bits |= ((*str++ + 9) & 0xf) << 4;
2913 else
2914 bits |= (*str++ & 0xf) << 4;
2915 if (l & 1) bits >>= 4;
fc241834 2916 else {
f337b084
TH
2917 PUSH_BYTE(utf8, cur, bits);
2918 bits = 0;
a6ec74c1 2919 }
fc241834 2920 }
f337b084
TH
2921 l--;
2922 if (l & 1) {
2923 PUSH_BYTE(utf8, cur, bits);
2924 l++;
2925 }
2926 /* Determine how many chars are left in the requested field */
2927 l /= 2;
2928 if (howlen == e_star) field_len = 0;
2929 else field_len -= l;
2930 Zero(cur, field_len, char);
2931 cur += field_len;
2932 break;
fc241834
RGS
2933 }
2934 case 'c':
f337b084
TH
2935 while (len-- > 0) {
2936 IV aiv;
2937 fromstr = NEXTFROM;
2938 aiv = SvIV(fromstr);
2939 if ((-128 > aiv || aiv > 127) &&
2940 ckWARN(WARN_PACK))
2941 Perl_warner(aTHX_ packWARN(WARN_PACK),
2942 "Character in 'c' format wrapped in pack");
585ec06d 2943 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2944 }
2945 break;
2946 case 'C':
f337b084
TH
2947 if (len == 0) {
2948 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2949 break;
2950 }
2951 GROWING(0, cat, start, cur, len);
a6ec74c1 2952 while (len-- > 0) {
f337b084 2953 IV aiv;
a6ec74c1 2954 fromstr = NEXTFROM;
f337b084
TH
2955 aiv = SvIV(fromstr);
2956 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2957 ckWARN(WARN_PACK))
2958 Perl_warner(aTHX_ packWARN(WARN_PACK),
2959 "Character in 'C' format wrapped in pack");
585ec06d 2960 *cur++ = (char)(aiv & 0xff);
f337b084 2961 }
fc241834
RGS
2962 break;
2963 case 'W': {
2964 char *end;
2965 U8 in_bytes = IN_BYTES;
2966
2967 end = start+SvLEN(cat)-1;
2968 if (utf8) end -= UTF8_MAXLEN-1;
2969 while (len-- > 0) {
2970 UV auv;
2971 fromstr = NEXTFROM;
2972 auv = SvUV(fromstr);
2973 if (in_bytes) auv = auv % 0x100;
2974 if (utf8) {
2975 W_utf8:
2976 if (cur > end) {
2977 *cur = '\0';
b162af07 2978 SvCUR_set(cat, cur - start);
fc241834
RGS
2979
2980 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2981 end = start+SvLEN(cat)-UTF8_MAXLEN;
2982 }
230e1fce
NC
2983 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2984 NATIVE_TO_UNI(auv),
041457d9 2985 warn_utf8 ?
230e1fce 2986 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2987 } else {
2988 if (auv >= 0x100) {
2989 if (!SvUTF8(cat)) {
2990 *cur = '\0';
b162af07 2991 SvCUR_set(cat, cur - start);
fc241834
RGS
2992 marked_upgrade(aTHX_ cat, symptr);
2993 lookahead.flags |= FLAG_DO_UTF8;
2994 lookahead.strbeg = symptr->strbeg;
2995 utf8 = 1;
2996 start = SvPVX(cat);
2997 cur = start + SvCUR(cat);
2998 end = start+SvLEN(cat)-UTF8_MAXLEN;
2999 goto W_utf8;
3000 }
3001 if (ckWARN(WARN_PACK))
3002 Perl_warner(aTHX_ packWARN(WARN_PACK),
3003 "Character in 'W' format wrapped in pack");
3004 auv &= 0xff;
3005 }
3006 if (cur >= end) {
3007 *cur = '\0';
b162af07 3008 SvCUR_set(cat, cur - start);
fc241834
RGS
3009 GROWING(0, cat, start, cur, len+1);
3010 end = start+SvLEN(cat)-1;
3011 }
fe2774ed 3012 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3013 }
3014 }
3015 break;
fc241834
RGS
3016 }
3017 case 'U': {
3018 char *end;
3019
3020 if (len == 0) {
3021 if (!(symptr->flags & FLAG_DO_UTF8)) {
3022 marked_upgrade(aTHX_ cat, symptr);
3023 lookahead.flags |= FLAG_DO_UTF8;
3024 lookahead.strbeg = symptr->strbeg;
3025 }
3026 utf8 = 0;
3027 goto no_change;
3028 }
3029
3030 end = start+SvLEN(cat);
3031 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3032 while (len-- > 0) {
fc241834 3033 UV auv;
a6ec74c1 3034 fromstr = NEXTFROM;
fc241834
RGS
3035 auv = SvUV(fromstr);
3036 if (utf8) {
230e1fce 3037 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3038 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3039 warn_utf8 ?
fc241834
RGS
3040 0 : UNICODE_ALLOW_ANY);
3041 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3042 *cur = '\0';
b162af07 3043 SvCUR_set(cat, cur - start);
fc241834
RGS
3044 GROWING(0, cat, start, cur,
3045 len+(endb-buffer)*UTF8_EXPAND);
3046 end = start+SvLEN(cat);
3047 }
64844641 3048 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3049 } else {
3050 if (cur >= end) {
3051 *cur = '\0';
b162af07 3052 SvCUR_set(cat, cur - start);
fc241834
RGS
3053 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3054 end = start+SvLEN(cat)-UTF8_MAXLEN;
3055 }
230e1fce 3056 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3057 warn_utf8 ?
230e1fce 3058 0 : UNICODE_ALLOW_ANY);
fc241834 3059 }
a6ec74c1 3060 }
a6ec74c1 3061 break;
fc241834 3062 }
a6ec74c1
JH
3063 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3064 case 'f':
a6ec74c1 3065 while (len-- > 0) {
f337b084
TH
3066 float afloat;
3067 NV anv;
a6ec74c1 3068 fromstr = NEXTFROM;
f337b084 3069 anv = SvNV(fromstr);
5cdb9e01 3070#ifdef __VOS__
f337b084 3071 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3072 during conversion from double to float into infinity, so we
3073 do it by hand. This code should either be generalized for
3074 any OS that needs it, or removed if and when VOS implements
3075 posix-976 (suggestion to support mapping to infinity).
3076 Paul.Green@stratus.com 02-04-02. */
f337b084 3077 if (anv > FLT_MAX)
fc241834 3078 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3079 else if (anv < -FLT_MAX)
fc241834 3080 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3081 else afloat = (float) anv;
3082#else /* __VOS__ */
baf3cf9c 3083# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3084 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3085 * on Alpha; fake it if we don't have them.
3086 */
f337b084 3087 if (anv > FLT_MAX)
fc241834 3088 afloat = FLT_MAX;
f337b084 3089 else if (anv < -FLT_MAX)
fc241834 3090 afloat = -FLT_MAX;
f337b084 3091 else afloat = (float)anv;
baf3cf9c 3092# else
f337b084 3093 afloat = (float)anv;
baf3cf9c 3094# endif
f337b084 3095#endif /* __VOS__ */
1109a392 3096 DO_BO_PACK_N(afloat, float);
f337b084 3097 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3098 }
3099 break;
3100 case 'd':
a6ec74c1 3101 while (len-- > 0) {
f337b084
TH
3102 double adouble;
3103 NV anv;
a6ec74c1 3104 fromstr = NEXTFROM;
f337b084 3105 anv = SvNV(fromstr);
5cdb9e01 3106#ifdef __VOS__
f337b084 3107 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3108 during conversion from long double to double into infinity,
3109 so we do it by hand. This code should either be generalized
3110 for any OS that needs it, or removed if and when VOS
3111 implements posix-976 (suggestion to support mapping to
3112 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3113 if (anv > DBL_MAX)
fc241834 3114 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3115 else if (anv < -DBL_MAX)
fc241834 3116 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3117 else adouble = (double) anv;
3118#else /* __VOS__ */
baf3cf9c 3119# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3120 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3121 * on Alpha; fake it if we don't have them.
3122 */
f337b084 3123 if (anv > DBL_MAX)
fc241834 3124 adouble = DBL_MAX;
f337b084 3125 else if (anv < -DBL_MAX)
fc241834 3126 adouble = -DBL_MAX;
f337b084 3127 else adouble = (double)anv;
baf3cf9c 3128# else
f337b084 3129 adouble = (double)anv;
baf3cf9c 3130# endif
f337b084 3131#endif /* __VOS__ */
1109a392 3132 DO_BO_PACK_N(adouble, double);
f337b084 3133 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3134 }
3135 break;
fc241834
RGS
3136 case 'F': {
3137 NV anv;
1109a392 3138 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3139 while (len-- > 0) {
3140 fromstr = NEXTFROM;
3141 anv = SvNV(fromstr);
1109a392 3142 DO_BO_PACK_N(anv, NV);
fc241834 3143 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3144 }
3145 break;
fc241834 3146 }
92d41999 3147#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3148 case 'D': {
3149 long double aldouble;
1109a392
MHM
3150 /* long doubles can have unused bits, which may be nonzero */
3151 Zero(&aldouble, 1, long double);
92d41999
JH
3152 while (len-- > 0) {
3153 fromstr = NEXTFROM;
3154 aldouble = (long double)SvNV(fromstr);
1109a392 3155 DO_BO_PACK_N(aldouble, long double);
fc241834 3156 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3157 }
3158 break;
fc241834 3159 }
92d41999 3160#endif
7212898e 3161#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3162 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3163#endif
a6ec74c1
JH
3164 case 'n':
3165 while (len-- > 0) {
f337b084 3166 I16 ai16;
a6ec74c1 3167 fromstr = NEXTFROM;
ef108786 3168 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3169#ifdef HAS_HTONS
ef108786 3170 ai16 = PerlSock_htons(ai16);
a6ec74c1 3171#endif
f337b084 3172 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3173 }
3174 break;
7212898e 3175#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3176 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3177#endif
a6ec74c1
JH
3178 case 'v':
3179 while (len-- > 0) {
f337b084 3180 I16 ai16;
a6ec74c1 3181 fromstr = NEXTFROM;
ef108786 3182 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3183#ifdef HAS_HTOVS
ef108786 3184 ai16 = htovs(ai16);
a6ec74c1 3185#endif
f337b084 3186 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3187 }
3188 break;
49704364 3189 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3190#if SHORTSIZE != SIZE16
fc241834 3191 while (len-- > 0) {
f337b084 3192 unsigned short aushort;
fc241834
RGS
3193 fromstr = NEXTFROM;
3194 aushort = SvUV(fromstr);
3195 DO_BO_PACK(aushort, s);
f337b084 3196 PUSH_VAR(utf8, cur, aushort);
fc241834 3197 }
49704364
WL
3198 break;
3199#else
3200 /* Fall through! */
a6ec74c1 3201#endif
49704364 3202 case 'S':
fc241834 3203 while (len-- > 0) {
f337b084 3204 U16 au16;
fc241834
RGS
3205 fromstr = NEXTFROM;
3206 au16 = (U16)SvUV(fromstr);
3207 DO_BO_PACK(au16, 16);
f337b084 3208 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3209 }
3210 break;
49704364 3211 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3212#if SHORTSIZE != SIZE16
fc241834 3213 while (len-- > 0) {
f337b084 3214 short ashort;
fc241834
RGS
3215 fromstr = NEXTFROM;
3216 ashort = SvIV(fromstr);
3217 DO_BO_PACK(ashort, s);
f337b084 3218 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3219 }
49704364
WL
3220 break;
3221#else
3222 /* Fall through! */
a6ec74c1 3223#endif
49704364
WL
3224 case 's':
3225 while (len-- > 0) {
f337b084 3226 I16 ai16;
49704364 3227 fromstr = NEXTFROM;
ef108786
MHM
3228 ai16 = (I16)SvIV(fromstr);
3229 DO_BO_PACK(ai16, 16);
f337b084 3230 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3231 }
3232 break;
3233 case 'I':
49704364 3234 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3235 while (len-- > 0) {
f337b084 3236 unsigned int auint;
a6ec74c1
JH
3237 fromstr = NEXTFROM;
3238 auint = SvUV(fromstr);
1109a392 3239 DO_BO_PACK(auint, i);
f337b084 3240 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3241 }
3242 break;
92d41999
JH
3243 case 'j':
3244 while (len-- > 0) {
f337b084 3245 IV aiv;
92d41999
JH
3246 fromstr = NEXTFROM;
3247 aiv = SvIV(fromstr);
1109a392
MHM
3248#if IVSIZE == INTSIZE
3249 DO_BO_PACK(aiv, i);
3250#elif IVSIZE == LONGSIZE
3251 DO_BO_PACK(aiv, l);
3252#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3253 DO_BO_PACK(aiv, 64);
f337b084
TH
3254#else
3255 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3256#endif
f337b084 3257 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3258 }
3259 break;
3260 case 'J':
3261 while (len-- > 0) {
f337b084 3262 UV auv;
92d41999
JH
3263 fromstr = NEXTFROM;
3264 auv = SvUV(fromstr);
1109a392
MHM
3265#if UVSIZE == INTSIZE
3266 DO_BO_PACK(auv, i);
3267#elif UVSIZE == LONGSIZE
3268 DO_BO_PACK(auv, l);
3269#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3270 DO_BO_PACK(auv, 64);
f337b084
TH
3271#else
3272 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3273#endif
f337b084 3274 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3275 }
3276 break;
a6ec74c1
JH
3277 case 'w':
3278 while (len-- > 0) {
f337b084 3279 NV anv;
a6ec74c1 3280 fromstr = NEXTFROM;
15e9f109 3281 anv = SvNV(fromstr);
a6ec74c1 3282
f337b084
TH
3283 if (anv < 0) {
3284 *cur = '\0';
b162af07 3285 SvCUR_set(cat, cur - start);
49704364 3286 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3287 }
a6ec74c1 3288
196b62db
NC
3289 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3290 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3291 any negative IVs will have already been got by the croak()
3292 above. IOK is untrue for fractions, so we test them
3293 against UV_MAX_P1. */
f337b084
TH
3294 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3295 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3296 char *in = buf + sizeof(buf);
196b62db 3297 UV auv = SvUV(fromstr);
a6ec74c1
JH
3298
3299 do {
eb160463 3300 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3301 auv >>= 7;
3302 } while (auv);
3303 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3304 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3305 in, (buf + sizeof(buf)) - in);
3306 } else if (SvPOKp(fromstr))
3307 goto w_string;
a6ec74c1 3308 else if (SvNOKp(fromstr)) {
0258719b
NC
3309 /* 10**NV_MAX_10_EXP is the largest power of 10
3310 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3311 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3312 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3313 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3314 Some C compilers are strict about integral constant
3315 expressions so we conservatively divide by a slightly
3316 smaller integer instead of multiplying by the exact
3317 floating-point value.
0258719b
NC
3318 */
3319#ifdef NV_MAX_10_EXP
f337b084 3320 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3321 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3322#else
f337b084 3323 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3324 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3325#endif
a6ec74c1
JH
3326 char *in = buf + sizeof(buf);
3327
8b6e33c7 3328 anv = Perl_floor(anv);
a6ec74c1 3329 do {
8b6e33c7 3330 const NV next = Perl_floor(anv / 128);
a6ec74c1 3331 if (in <= buf) /* this cannot happen ;-) */
49704364 3332 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3333 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3334 anv = next;
3335 } while (anv > 0);
a6ec74c1 3336 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3337 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3338 in, (buf + sizeof(buf)) - in);
3339 } else {
8b6e33c7
AL
3340 const char *from;
3341 char *result, *in;
735b914b
JH
3342 SV *norm;
3343 STRLEN len;
3344 bool done;
3345
f337b084 3346 w_string:
735b914b 3347 /* Copy string and check for compliance */
349d4f2f 3348 from = SvPV_const(fromstr, len);
735b914b 3349 if ((norm = is_an_int(from, len)) == NULL)
49704364 3350 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3351
a02a5408 3352 Newx(result, len, char);
735b914b
JH
3353 in = result + len;
3354 done = FALSE;
f337b084 3355 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3356 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3357 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3358 in, (result + len) - in);
735b914b
JH
3359 Safefree(result);
3360 SvREFCNT_dec(norm); /* free norm */
fc241834 3361 }
a6ec74c1
JH
3362 }
3363 break;
3364 case 'i':
49704364 3365 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3366 while (len-- > 0) {
f337b084 3367 int aint;
a6ec74c1
JH
3368 fromstr = NEXTFROM;
3369 aint = SvIV(fromstr);
1109a392 3370 DO_BO_PACK(aint, i);
f337b084 3371 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3372 }
3373 break;
7212898e 3374#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3375 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3376#endif
a6ec74c1
JH
3377 case 'N':
3378 while (len-- > 0) {
f337b084 3379 U32 au32;
a6ec74c1 3380 fromstr = NEXTFROM;
ef108786 3381 au32 = SvUV(fromstr);
a6ec74c1 3382#ifdef HAS_HTONL
ef108786 3383 au32 = PerlSock_htonl(au32);
a6ec74c1 3384#endif
f337b084 3385 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3386 }
3387 break;
7212898e 3388#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3389 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3390#endif
a6ec74c1
JH
3391 case 'V':
3392 while (len-- > 0) {
f337b084 3393 U32 au32;
a6ec74c1 3394 fromstr = NEXTFROM;
ef108786 3395 au32 = SvUV(fromstr);
a6ec74c1 3396#ifdef HAS_HTOVL
ef108786 3397 au32 = htovl(au32);
a6ec74c1 3398#endif
f337b084 3399 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3400 }
3401 break;
49704364 3402 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3403#if LONGSIZE != SIZE32
fc241834 3404 while (len-- > 0) {
f337b084 3405 unsigned long aulong;
fc241834
RGS
3406 fromstr = NEXTFROM;
3407 aulong = SvUV(fromstr);
3408 DO_BO_PACK(aulong, l);
f337b084 3409 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3410 }
49704364
WL
3411 break;
3412#else
3413 /* Fall though! */
a6ec74c1 3414#endif
49704364 3415 case 'L':
fc241834 3416 while (len-- > 0) {
f337b084 3417 U32 au32;
fc241834
RGS
3418 fromstr = NEXTFROM;
3419 au32 = SvUV(fromstr);
3420 DO_BO_PACK(au32, 32);
f337b084 3421 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3422 }
3423 break;
49704364 3424 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3425#if LONGSIZE != SIZE32
fc241834 3426 while (len-- > 0) {
f337b084 3427 long along;
fc241834
RGS
3428 fromstr = NEXTFROM;
3429 along = SvIV(fromstr);
3430 DO_BO_PACK(along, l);
f337b084 3431 PUSH_VAR(utf8, cur, along);
a6ec74c1 3432 }
49704364
WL
3433 break;
3434#else
3435 /* Fall though! */
a6ec74c1 3436#endif
49704364
WL
3437 case 'l':
3438 while (len-- > 0) {
f337b084 3439 I32 ai32;
49704364 3440 fromstr = NEXTFROM;
ef108786
MHM
3441 ai32 = SvIV(fromstr);
3442 DO_BO_PACK(ai32, 32);
f337b084 3443 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3444 }
3445 break;
3446#ifdef HAS_QUAD
3447 case 'Q':
3448 while (len-- > 0) {
f337b084 3449 Uquad_t auquad;
a6ec74c1 3450 fromstr = NEXTFROM;
f337b084 3451 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3452 DO_BO_PACK(auquad, 64);
f337b084 3453 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3454 }
3455 break;
3456 case 'q':
3457 while (len-- > 0) {
f337b084 3458 Quad_t aquad;
a6ec74c1
JH
3459 fromstr = NEXTFROM;
3460 aquad = (Quad_t)SvIV(fromstr);
1109a392 3461 DO_BO_PACK(aquad, 64);
f337b084 3462 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3463 }
3464 break;
f337b084 3465#endif /* HAS_QUAD */
a6ec74c1
JH
3466 case 'P':
3467 len = 1; /* assume SV is correct length */
f337b084 3468 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3469 /* Fall through! */
a6ec74c1
JH
3470 case 'p':
3471 while (len-- > 0) {
83003860 3472 const char *aptr;
f337b084 3473
a6ec74c1 3474 fromstr = NEXTFROM;
28a4f200
TH
3475 SvGETMAGIC(fromstr);
3476 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3477 else {
a6ec74c1
JH
3478 /* XXX better yet, could spirit away the string to
3479 * a safe spot and hang on to it until the result
3480 * of pack() (and all copies of the result) are
3481 * gone.
3482 */
041457d9
DM
3483 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3484 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3485 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3486 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3487 }
3488 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3489 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3490 else
2596d9fe 3491 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3492 }
07409e01 3493 DO_BO_PACK_PC(aptr);
f337b084 3494 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3495 }
3496 break;
fc241834 3497 case 'u': {
f7fe979e 3498 const char *aptr, *aend;
fc241834 3499 bool from_utf8;
f337b084 3500
a6ec74c1 3501 fromstr = NEXTFROM;
fc241834
RGS
3502 if (len <= 2) len = 45;
3503 else len = len / 3 * 3;
3504 if (len >= 64) {
0fdc08b6
RGS
3505 if (ckWARN(WARN_PACK))
3506 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3507 "Field too wide in 'u' format in pack");
3508 len = 63;
3509 }
83003860 3510 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3511 from_utf8 = DO_UTF8(fromstr);
3512 if (from_utf8) {
3513 aend = aptr + fromlen;
3514 fromlen = sv_len_utf8(fromstr);
3515 } else aend = NULL; /* Unused, but keep compilers happy */
3516 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3517 while (fromlen > 0) {
fc241834 3518 U8 *end;
a6ec74c1 3519 I32 todo;
fc241834 3520 U8 hunk[1+63/3*4+1];
a6ec74c1 3521
eb160463 3522 if ((I32)fromlen > len)
a6ec74c1
JH
3523 todo = len;
3524 else
3525 todo = fromlen;
fc241834
RGS
3526 if (from_utf8) {
3527 char buffer[64];
3528 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3529 'u' | TYPE_IS_PACK)) {
3530 *cur = '\0';
b162af07 3531 SvCUR_set(cat, cur - start);
fc241834
RGS
3532 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3533 }
3534 end = doencodes(hunk, buffer, todo);
3535 } else {
3536 end = doencodes(hunk, aptr, todo);
3537 aptr += todo;
3538 }
3539 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3540 fromlen -= todo;
3541 }
a6ec74c1
JH
3542 break;
3543 }
f337b084
TH
3544 }
3545 *cur = '\0';
b162af07 3546 SvCUR_set(cat, cur - start);
f337b084 3547 no_change:
49704364 3548 *symptr = lookahead;
a6ec74c1 3549 }
49704364 3550 return beglist;
18529408
IZ
3551}
3552#undef NEXTFROM
3553
3554
3555PP(pp_pack)
3556{
97aff369 3557 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3558 register SV *cat = TARG;
3559 STRLEN fromlen;
349d4f2f
NC
3560 SV *pat_sv = *++MARK;
3561 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3562 register const char *patend = pat + fromlen;
18529408
IZ
3563
3564 MARK++;
3565 sv_setpvn(cat, "", 0);
f337b084 3566 SvUTF8_off(cat);
18529408 3567
7accc089 3568 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3569
a6ec74c1
JH
3570 SvSETMAGIC(cat);
3571 SP = ORIGMARK;
3572 PUSHs(cat);
3573 RETURN;
3574}
a6ec74c1 3575
73cb7263
NC
3576/*
3577 * Local variables:
3578 * c-indentation-style: bsd
3579 * c-basic-offset: 4
3580 * indent-tabs-mode: t
3581 * End:
3582 *
37442d52
RGS
3583 * ex: set ts=8 sts=4 sw=4 noet:
3584 */