This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to ExtUtils-Install-1.40
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
f337b084 705STATIC void
f7fe979e 706bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 707 U8 buffer[UTF8_MAXLEN];
0bcc34c2 708 const U8 * const end = start + len;
f337b084
TH
709 char *d = *dest;
710 while (start < end) {
f7fe979e 711 const int length =
f337b084
TH
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
713 switch(length) {
714 case 1:
715 *d++ = buffer[0];
716 break;
717 case 2:
718 *d++ = buffer[0];
719 *d++ = buffer[1];
720 break;
721 default:
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
723 *start, length);
724 }
725 start++;
726 }
727 *dest = d;
728}
729
230e1fce
NC
730#define PUSH_BYTES(utf8, cur, buf, len) \
731STMT_START { \
732 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
733 else { \
734 Copy(buf, cur, len, char); \
735 (cur) += (len); \
736 } \
f337b084
TH
737} STMT_END
738
739#define GROWING(utf8, cat, start, cur, in_len) \
740STMT_START { \
741 STRLEN glen = (in_len); \
3473cf63 742 if (utf8) glen *= UTF8_EXPAND; \
f337b084 743 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 744 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
745 (cur) = (start) + SvCUR(cat); \
746 } \
747} STMT_END
748
749#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
750STMT_START { \
f7fe979e 751 const STRLEN glen = (in_len); \
f337b084 752 STRLEN gl = glen; \
3473cf63 753 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
754 if ((cur) + gl >= (start) + SvLEN(cat)) { \
755 *cur = '\0'; \
b162af07 756 SvCUR_set((cat), (cur) - (start)); \
0bd48802 757 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
758 (cur) = (start) + SvCUR(cat); \
759 } \
760 PUSH_BYTES(utf8, cur, buf, glen); \
761} STMT_END
762
763#define PUSH_BYTE(utf8, s, byte) \
764STMT_START { \
765 if (utf8) { \
f7fe979e 766 const U8 au8 = (byte); \
f337b084
TH
767 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
768 } else *(U8 *)(s)++ = (byte); \
769} STMT_END
770
771/* Only to be used inside a loop (see the break) */
772#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773STMT_START { \
774 STRLEN retlen; \
775 if (str >= end) break; \
776 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
777 if (retlen == (STRLEN) -1 || retlen == 0) { \
778 *cur = '\0'; \
779 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
780 } \
781 str += retlen; \
782} STMT_END
783
f7fe979e
AL
784static const char *_action( const tempsym_t* symptr )
785{
786 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
787}
788
206947d2 789/* Returns the sizeof() struct described by pat */
028d1f6d 790STATIC I32
f337b084 791S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 792{
f337b084 793 I32 total = 0;
206947d2 794
49704364 795 while (next_symbol(symptr)) {
f337b084 796 I32 len;
f7fe979e 797 int size;
f337b084
TH
798
799 switch (symptr->howlen) {
fc241834 800 case e_star:
49704364 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 802 _action( symptr ) );
49704364 803 break;
f337b084
TH
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
49704364
WL
808 }
809
a7a3cfaa 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 811 if (!size) {
f7fe979e 812 int star;
80a13697
NC
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 818 _action( symptr ) );
28be1210
TH
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
80a13697 823 case '@':
28be1210 824 case '.':
80a13697
NC
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 830 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 831 _action( symptr ) );
80a13697
NC
832 case '%':
833 size = 0;
834 break;
835 case '(':
fc241834
RGS
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
80a13697
NC
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
f7fe979e 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
80a13697
NC
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
78d46eaa 885
80a13697
NC
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
78d46eaa
NC
889 break;
890 }
206947d2
IZ
891 }
892 total += len * size;
893 }
894 return total;
895}
896
49704364
WL
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
f7fe979e
AL
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 903{
49704364 904 while (patptr < patend) {
f7fe979e 905 const char c = *patptr++;
49704364
WL
906
907 if (isSPACE(c))
908 continue;
909 else if (c == ender)
910 return patptr-1;
911 else if (c == '#') {
912 while (patptr < patend && *patptr != '\n')
913 patptr++;
914 continue;
915 } else if (c == '(')
916 patptr = group_end(patptr, patend, ')') + 1;
917 else if (c == '[')
918 patptr = group_end(patptr, patend, ']') + 1;
18529408 919 }
49704364
WL
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
921 ender);
922 return 0;
18529408
IZ
923}
924
49704364
WL
925
926/* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
fc241834 929 */
f7fe979e
AL
930STATIC const char *
931S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
932{
933 I32 len = *patptr++ - '0';
934 while (isDIGIT(*patptr)) {
935 if (len >= 0x7FFFFFFF/10)
936 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
937 len = (len * 10) + (*patptr++ - '0');
938 }
939 *lenptr = len;
940 return patptr;
941}
942
943/* The marvellous template parsing routine: Using state stored in *symptr,
944 * locates next template code and count
945 */
946STATIC bool
f337b084 947S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 948{
f7fe979e 949 const char* patptr = symptr->patptr;
0bcc34c2 950 const char* const patend = symptr->patend;
49704364
WL
951
952 symptr->flags &= ~FLAG_SLASH;
953
954 while (patptr < patend) {
955 if (isSPACE(*patptr))
956 patptr++;
957 else if (*patptr == '#') {
958 patptr++;
959 while (patptr < patend && *patptr != '\n')
960 patptr++;
961 if (patptr < patend)
962 patptr++;
963 } else {
fc241834 964 /* We should have found a template code */
49704364 965 I32 code = *patptr++ & 0xFF;
66c611c5 966 U32 inherited_modifiers = 0;
49704364
WL
967
968 if (code == ','){ /* grandfather in commas but with a warning */
969 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
970 symptr->flags |= FLAG_COMMA;
971 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 972 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
973 }
974 continue;
975 }
fc241834 976
49704364 977 /* for '(', skip to ')' */
fc241834 978 if (code == '(') {
49704364
WL
979 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
980 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 981 _action( symptr ) );
49704364
WL
982 symptr->grpbeg = patptr;
983 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
984 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
985 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 986 _action( symptr ) );
49704364
WL
987 }
988
66c611c5
MHM
989 /* look for group modifiers to inherit */
990 if (TYPE_ENDIANNESS(symptr->flags)) {
991 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
992 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
993 }
994
1109a392
MHM
995 /* look for modifiers */
996 while (patptr < patend) {
b7787f18
AL
997 const char *allowed;
998 I32 modifier;
1109a392
MHM
999 switch (*patptr) {
1000 case '!':
1001 modifier = TYPE_IS_SHRIEKING;
7212898e 1002 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1003 break;
7212898e 1004#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1005 case '>':
1006 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1008 break;
1009 case '<':
1010 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1011 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1012 break;
f337b084 1013#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1014 default:
b7787f18
AL
1015 allowed = "";
1016 modifier = 0;
1109a392
MHM
1017 break;
1018 }
66c611c5 1019
1109a392
MHM
1020 if (modifier == 0)
1021 break;
66c611c5 1022
1109a392
MHM
1023 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1024 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1025 allowed, _action( symptr ) );
66c611c5
MHM
1026
1027 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1028 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1029 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1030 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1031 TYPE_ENDIANNESS_MASK)
1032 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1033 *patptr, _action( symptr ) );
66c611c5 1034
041457d9 1035 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1036 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1037 "Duplicate modifier '%c' after '%c' in %s",
1038 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1039 _action( symptr ) );
1109a392 1040 }
66c611c5 1041
1109a392
MHM
1042 code |= modifier;
1043 patptr++;
49704364
WL
1044 }
1045
66c611c5
MHM
1046 /* inherit modifiers */
1047 code |= inherited_modifiers;
1048
fc241834 1049 /* look for count and/or / */
49704364
WL
1050 if (patptr < patend) {
1051 if (isDIGIT(*patptr)) {
1052 patptr = get_num( patptr, &symptr->length );
1053 symptr->howlen = e_number;
1054
1055 } else if (*patptr == '*') {
1056 patptr++;
1057 symptr->howlen = e_star;
1058
1059 } else if (*patptr == '[') {
f7fe979e 1060 const char* lenptr = ++patptr;
49704364
WL
1061 symptr->howlen = e_number;
1062 patptr = group_end( patptr, patend, ']' ) + 1;
1063 /* what kind of [] is it? */
1064 if (isDIGIT(*lenptr)) {
1065 lenptr = get_num( lenptr, &symptr->length );
1066 if( *lenptr != ']' )
1067 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1068 _action( symptr ) );
49704364
WL
1069 } else {
1070 tempsym_t savsym = *symptr;
1071 symptr->patend = patptr-1;
1072 symptr->patptr = lenptr;
1073 savsym.length = measure_struct(symptr);
1074 *symptr = savsym;
1075 }
1076 } else {
1077 symptr->howlen = e_no_len;
1078 symptr->length = 1;
1079 }
1080
1081 /* try to find / */
1082 while (patptr < patend) {
1083 if (isSPACE(*patptr))
1084 patptr++;
1085 else if (*patptr == '#') {
1086 patptr++;
1087 while (patptr < patend && *patptr != '\n')
1088 patptr++;
1089 if (patptr < patend)
1090 patptr++;
1091 } else {
66c611c5 1092 if (*patptr == '/') {
49704364
WL
1093 symptr->flags |= FLAG_SLASH;
1094 patptr++;
66c611c5
MHM
1095 if (patptr < patend &&
1096 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1097 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1098 _action( symptr ) );
49704364
WL
1099 }
1100 break;
1101 }
18529408 1102 }
49704364
WL
1103 } else {
1104 /* at end - no count, no / */
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 symptr->code = code;
fc241834 1110 symptr->patptr = patptr;
49704364 1111 return TRUE;
18529408 1112 }
49704364 1113 }
fc241834 1114 symptr->patptr = patptr;
49704364 1115 return FALSE;
18529408
IZ
1116}
1117
18529408 1118/*
fc241834 1119 There is no way to cleanly handle the case where we should process the
08ca2aa3 1120 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1121 (e.g. estimates like strend-s as an upper bound for the number of
1122 characters left wouldn't work). So if we foresee the need of this
1123 (pattern starts with U or contains U0), we want to work on the encoded
1124 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1125 themselves if they need to do a lot of unpacks like this on it
1126*/
fc241834 1127STATIC bool
08ca2aa3
TH
1128need_utf8(const char *pat, const char *patend)
1129{
1130 bool first = TRUE;
1131 while (pat < patend) {
1132 if (pat[0] == '#') {
1133 pat++;
f7fe979e 1134 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1135 if (!pat) return FALSE;
1136 } else if (pat[0] == 'U') {
1137 if (first || pat[1] == '0') return TRUE;
1138 } else first = FALSE;
1139 pat++;
1140 }
1141 return FALSE;
1142}
1143
1144STATIC char
1145first_symbol(const char *pat, const char *patend) {
1146 while (pat < patend) {
1147 if (pat[0] != '#') return pat[0];
1148 pat++;
f7fe979e 1149 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1150 if (!pat) return 0;
1151 pat++;
1152 }
1153 return 0;
1154}
1155
1156/*
7accc089
JH
1157=for apidoc unpackstring
1158
608d3aed
WL
1159The engine implementing unpack() Perl function. C<unpackstring> puts the
1160extracted list items on the stack and returns the number of elements.
1161Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1162
1163=cut */
1164
1165I32
f7fe979e 1166Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1167{
f7fe979e 1168 tempsym_t sym;
08ca2aa3 1169
f337b084 1170 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1171 else if (need_utf8(pat, patend)) {
1172 /* We probably should try to avoid this in case a scalar context call
1173 wouldn't get to the "U0" */
1174 STRLEN len = strend - s;
230e1fce 1175 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1176 SAVEFREEPV(s);
1177 strend = s + len;
f337b084 1178 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1179 }
1180
f337b084
TH
1181 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1182 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1183
f7fe979e 1184 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1185
1186 return unpack_rec(&sym, s, s, strend, NULL );
1187}
1188
49704364
WL
1189STATIC
1190I32
f7fe979e 1191S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1192{
27da23d5 1193 dVAR; dSP;
18529408 1194 SV *sv;
f7fe979e 1195 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1196 howlen_t howlen;
a6ec74c1 1197
a6ec74c1 1198 I32 checksum = 0;
92d41999 1199 UV cuv = 0;
a6ec74c1 1200 NV cdouble = 0.0;
f337b084 1201 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1202 bool beyond = FALSE;
21c16052 1203 bool explicit_length;
9e27e96a 1204 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1205 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1206 symptr->strbeg = s - strbeg;
49704364 1207
49704364 1208 while (next_symbol(symptr)) {
a7a3cfaa 1209 packprops_t props;
9e27e96a 1210 I32 len;
f337b084 1211 I32 datumtype = symptr->code;
206947d2 1212 /* do first one only unless in list context
08ca2aa3 1213 / is implemented by unpacking the count, then popping it from the
206947d2 1214 stack, so must check that we're not in the middle of a / */
49704364 1215 if ( unpack_only_one
206947d2 1216 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1217 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1218 break;
49704364 1219
f337b084 1220 switch (howlen = symptr->howlen) {
fc241834
RGS
1221 case e_star:
1222 len = strend - strbeg; /* long enough */
49704364 1223 break;
f337b084
TH
1224 default:
1225 /* e_no_len and e_number */
1226 len = symptr->length;
1227 break;
49704364 1228 }
18529408 1229
21c16052 1230 explicit_length = TRUE;
a6ec74c1 1231 redo_switch:
49704364 1232 beyond = s >= strend;
a7a3cfaa
TH
1233
1234 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1235 if (props) {
1236 /* props nonzero means we can process this letter. */
9e27e96a
AL
1237 const long size = props & PACK_SIZE_MASK;
1238 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1239 if (len > howmany)
1240 len = howmany;
1241
1242 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1243 if (len && unpack_only_one) len = 1;
1244 EXTEND(SP, len);
1245 EXTEND_MORTAL(len);
78d46eaa
NC
1246 }
1247 }
a7a3cfaa 1248
1109a392 1249 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1250 default:
1109a392 1251 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1252
a6ec74c1 1253 case '%':
49704364 1254 if (howlen == e_no_len)
18529408 1255 len = 16; /* len is not specified */
a6ec74c1 1256 checksum = len;
92d41999 1257 cuv = 0;
a6ec74c1 1258 cdouble = 0;
18529408 1259 continue;
a6ec74c1 1260 break;
18529408
IZ
1261 case '(':
1262 {
49704364 1263 tempsym_t savsym = *symptr;
9e27e96a 1264 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1265 symptr->flags |= group_modifiers;
49704364 1266 symptr->patend = savsym.grpend;
28be1210 1267 symptr->previous = &savsym;
49704364 1268 symptr->level++;
18529408
IZ
1269 PUTBACK;
1270 while (len--) {
49704364 1271 symptr->patptr = savsym.grpbeg;
f337b084
TH
1272 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1273 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1274 unpack_rec(symptr, s, strbeg, strend, &s);
1275 if (s == strend && savsym.howlen == e_star)
49704364 1276 break; /* No way to continue */
18529408
IZ
1277 }
1278 SPAGAIN;
28be1210 1279 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1280 *symptr = savsym;
18529408
IZ
1281 break;
1282 }
28be1210
TH
1283#ifdef PERL_PACK_CAN_SHRIEKSIGN
1284 case '.' | TYPE_IS_SHRIEKING:
1285#endif
1286 case '.': {
9e27e96a 1287 const char *from;
28be1210
TH
1288 SV *sv;
1289#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1290 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1291#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1292 const bool u8 = utf8;
28be1210
TH
1293#endif
1294 if (howlen == e_star) from = strbeg;
1295 else if (len <= 0) from = s;
1296 else {
1297 tempsym_t *group = symptr;
1298
1299 while (--len && group) group = group->previous;
1300 from = group ? strbeg + group->strbeg : strbeg;
1301 }
1302 sv = from <= s ?
00646304
CB
1303 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1304 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1305 XPUSHs(sv_2mortal(sv));
1306 break;
1307 }
1308#ifdef PERL_PACK_CAN_SHRIEKSIGN
1309 case '@' | TYPE_IS_SHRIEKING:
1310#endif
a6ec74c1 1311 case '@':
28be1210
TH
1312 s = strbeg + symptr->strbeg;
1313#ifdef PERL_PACK_CAN_SHRIEKSIGN
1314 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1315#else /* PERL_PACK_CAN_SHRIEKSIGN */
1316 if (utf8)
1317#endif
1318 {
08ca2aa3
TH
1319 while (len > 0) {
1320 if (s >= strend)
1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
1322 s += UTF8SKIP(s);
1323 len--;
1324 }
1325 if (s > strend)
1326 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1327 } else {
28be1210 1328 if (strend-s < len)
fc241834 1329 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1330 s += len;
08ca2aa3 1331 }
a6ec74c1 1332 break;
62f95557
IZ
1333 case 'X' | TYPE_IS_SHRIEKING:
1334 if (!len) /* Avoid division by 0 */
1335 len = 1;
08ca2aa3 1336 if (utf8) {
f7fe979e 1337 const char *hop, *last;
f337b084
TH
1338 I32 l = len;
1339 hop = last = strbeg;
1340 while (hop < s) {
1341 hop += UTF8SKIP(hop);
1342 if (--l == 0) {
08ca2aa3 1343 last = hop;
f337b084
TH
1344 l = len;
1345 }
fc241834 1346 }
f337b084
TH
1347 if (last > s)
1348 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1349 s = last;
1350 break;
f337b084
TH
1351 }
1352 len = (s - strbeg) % len;
62f95557 1353 /* FALL THROUGH */
a6ec74c1 1354 case 'X':
08ca2aa3
TH
1355 if (utf8) {
1356 while (len > 0) {
1357 if (s <= strbeg)
1358 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1359 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1360 if (s <= strbeg)
1361 Perl_croak(aTHX_ "'X' outside of string in unpack");
1362 }
1363 len--;
1364 }
1365 } else {
fc241834
RGS
1366 if (len > s - strbeg)
1367 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1368 s -= len;
08ca2aa3 1369 }
a6ec74c1 1370 break;
9e27e96a
AL
1371 case 'x' | TYPE_IS_SHRIEKING: {
1372 I32 ai32;
62f95557
IZ
1373 if (!len) /* Avoid division by 0 */
1374 len = 1;
230e1fce
NC
1375 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1376 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1377 if (ai32 == 0) break;
1378 len -= ai32;
9e27e96a 1379 }
62f95557 1380 /* FALL THROUGH */
a6ec74c1 1381 case 'x':
08ca2aa3
TH
1382 if (utf8) {
1383 while (len>0) {
1384 if (s >= strend)
1385 Perl_croak(aTHX_ "'x' outside of string in unpack");
1386 s += UTF8SKIP(s);
1387 len--;
1388 }
1389 } else {
fc241834
RGS
1390 if (len > strend - s)
1391 Perl_croak(aTHX_ "'x' outside of string in unpack");
1392 s += len;
f337b084 1393 }
a6ec74c1
JH
1394 break;
1395 case '/':
49704364
WL
1396 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1397 break;
a6ec74c1
JH
1398 case 'A':
1399 case 'Z':
1400 case 'a':
08ca2aa3
TH
1401 if (checksum) {
1402 /* Preliminary length estimate is assumed done in 'W' */
1403 if (len > strend - s) len = strend - s;
1404 goto W_checksum;
1405 }
1406 if (utf8) {
1407 I32 l;
f7fe979e 1408 const char *hop;
08ca2aa3
TH
1409 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1410 if (hop >= strend) {
1411 if (hop > strend)
1412 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1413 break;
fc241834 1414 }
a6ec74c1 1415 }
08ca2aa3
TH
1416 if (hop > strend)
1417 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1418 len = hop - s;
1419 } else if (len > strend - s)
1420 len = strend - s;
1421
1422 if (datumtype == 'Z') {
1423 /* 'Z' strips stuff after first null */
f7fe979e 1424 const char *ptr, *end;
f337b084
TH
1425 end = s + len;
1426 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1427 sv = newSVpvn(s, ptr-s);
1428 if (howlen == e_star) /* exact for 'Z*' */
1429 len = ptr-s + (ptr != strend ? 1 : 0);
1430 } else if (datumtype == 'A') {
1431 /* 'A' strips both nulls and spaces */
f7fe979e 1432 const char *ptr;
18bdf90a
TH
1433 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1436 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1437 if (ptr >= s) ptr += UTF8SKIP(ptr);
1438 else ptr++;
28be1210 1439 if (ptr > s+len)
18bdf90a
TH
1440 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1441 } else {
1442 for (ptr = s+len-1; ptr >= s; ptr--)
1443 if (*ptr != 0 && !isSPACE(*ptr)) break;
1444 ptr++;
1445 }
08ca2aa3
TH
1446 sv = newSVpvn(s, ptr-s);
1447 } else sv = newSVpvn(s, len);
1448
1449 if (utf8) {
1450 SvUTF8_on(sv);
1451 /* Undo any upgrade done due to need_utf8() */
f337b084 1452 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1453 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1454 }
1455 XPUSHs(sv_2mortal(sv));
08ca2aa3 1456 s += len;
a6ec74c1
JH
1457 break;
1458 case 'B':
08ca2aa3
TH
1459 case 'b': {
1460 char *str;
49704364 1461 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1462 len = (strend - s) * 8;
1463 if (checksum) {
1464 if (!PL_bitcount) {
08ca2aa3 1465 int bits;
a02a5408 1466 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1467 for (bits = 1; bits < 256; bits++) {
1468 if (bits & 1) PL_bitcount[bits]++;
1469 if (bits & 2) PL_bitcount[bits]++;
1470 if (bits & 4) PL_bitcount[bits]++;
1471 if (bits & 8) PL_bitcount[bits]++;
1472 if (bits & 16) PL_bitcount[bits]++;
1473 if (bits & 32) PL_bitcount[bits]++;
1474 if (bits & 64) PL_bitcount[bits]++;
1475 if (bits & 128) PL_bitcount[bits]++;
1476 }
1477 }
f337b084 1478 if (utf8)
08ca2aa3 1479 while (len >= 8 && s < strend) {
f337b084 1480 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1481 len -= 8;
1482 }
f337b084 1483 else
fc241834 1484 while (len >= 8) {
08ca2aa3 1485 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1486 len -= 8;
1487 }
08ca2aa3
TH
1488 if (len && s < strend) {
1489 U8 bits;
f337b084
TH
1490 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1491 if (datumtype == 'b')
a6ec74c1 1492 while (len-- > 0) {
92d41999 1493 if (bits & 1) cuv++;
a6ec74c1
JH
1494 bits >>= 1;
1495 }
f337b084 1496 else
a6ec74c1 1497 while (len-- > 0) {
08ca2aa3 1498 if (bits & 0x80) cuv++;
a6ec74c1
JH
1499 bits <<= 1;
1500 }
fc241834 1501 }
a6ec74c1
JH
1502 break;
1503 }
08ca2aa3 1504
561b68a9 1505 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1506 SvPOK_on(sv);
1507 str = SvPVX(sv);
1508 if (datumtype == 'b') {
f337b084 1509 U8 bits = 0;
f7fe979e 1510 const I32 ai32 = len;
08ca2aa3
TH
1511 for (len = 0; len < ai32; len++) {
1512 if (len & 7) bits >>= 1;
1513 else if (utf8) {
1514 if (s >= strend) break;
f337b084 1515 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1516 } else bits = *(U8 *) s++;
1517 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1518 }
08ca2aa3 1519 } else {
f337b084 1520 U8 bits = 0;
f7fe979e 1521 const I32 ai32 = len;
08ca2aa3
TH
1522 for (len = 0; len < ai32; len++) {
1523 if (len & 7) bits <<= 1;
1524 else if (utf8) {
1525 if (s >= strend) break;
f337b084 1526 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1527 } else bits = *(U8 *) s++;
1528 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1529 }
1530 }
1531 *str = '\0';
aa07b2f6 1532 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1533 XPUSHs(sv);
a6ec74c1 1534 break;
08ca2aa3 1535 }
a6ec74c1 1536 case 'H':
08ca2aa3
TH
1537 case 'h': {
1538 char *str;
fc241834 1539 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1540 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1541 len = (strend - s) * 2;
561b68a9 1542 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1543 SvPOK_on(sv);
1544 str = SvPVX(sv);
1545 if (datumtype == 'h') {
f337b084 1546 U8 bits = 0;
9e27e96a 1547 I32 ai32 = len;
fc241834
RGS
1548 for (len = 0; len < ai32; len++) {
1549 if (len & 1) bits >>= 4;
1550 else if (utf8) {
1551 if (s >= strend) break;
f337b084 1552 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1553 } else bits = * (U8 *) s++;
a6ec74c1
JH
1554 *str++ = PL_hexdigit[bits & 15];
1555 }
08ca2aa3 1556 } else {
f337b084 1557 U8 bits = 0;
f7fe979e 1558 const I32 ai32 = len;
08ca2aa3
TH
1559 for (len = 0; len < ai32; len++) {
1560 if (len & 1) bits <<= 4;
1561 else if (utf8) {
1562 if (s >= strend) break;
f337b084 1563 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1564 } else bits = *(U8 *) s++;
a6ec74c1
JH
1565 *str++ = PL_hexdigit[(bits >> 4) & 15];
1566 }
1567 }
1568 *str = '\0';
aa07b2f6 1569 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1570 XPUSHs(sv);
a6ec74c1 1571 break;
08ca2aa3 1572 }
a6ec74c1 1573 case 'c':
73cb7263 1574 while (len-- > 0) {
f337b084 1575 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1576 if (aint >= 128) /* fake up signed chars */
1577 aint -= 256;
08ca2aa3 1578 if (!checksum)
ac7f3b1b 1579 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1580 else if (checksum > bits_in_uv)
1581 cdouble += (NV)aint;
1582 else
1583 cuv += aint;
a6ec74c1
JH
1584 }
1585 break;
1586 case 'C':
08ca2aa3
TH
1587 case 'W':
1588 W_checksum:
35bcd338 1589 if (len == 0) {
fc241834 1590 if (explicit_length && datumtype == 'C')
08ca2aa3 1591 /* Switch to "character" mode */
f337b084 1592 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1593 break;
1594 }
fc241834 1595 if (datumtype == 'C' ?
f337b084
TH
1596 (symptr->flags & FLAG_DO_UTF8) &&
1597 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1598 while (len-- > 0 && s < strend) {
08ca2aa3 1599 STRLEN retlen;
f7fe979e 1600 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1601 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1602 if (retlen == (STRLEN) -1 || retlen == 0)
1603 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1604 s += retlen;
1605 if (!checksum)
1606 PUSHs(sv_2mortal(newSVuv((UV) val)));
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV) val;
d6d3e8bd 1609 else
08ca2aa3 1610 cuv += val;
fc241834 1611 }
08ca2aa3 1612 } else if (!checksum)
a6ec74c1 1613 while (len-- > 0) {
f7fe979e 1614 const U8 ch = *(U8 *) s++;
08ca2aa3 1615 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1616 }
08ca2aa3
TH
1617 else if (checksum > bits_in_uv)
1618 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1619 else
1620 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1621 break;
1622 case 'U':
35bcd338 1623 if (len == 0) {
08ca2aa3
TH
1624 if (explicit_length) {
1625 /* Switch to "bytes in UTF-8" mode */
f337b084 1626 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1627 else
1628 /* Should be impossible due to the need_utf8() test */
1629 Perl_croak(aTHX_ "U0 mode on a byte string");
1630 }
35bcd338
JH
1631 break;
1632 }
08ca2aa3 1633 if (len > strend - s) len = strend - s;
fc241834 1634 if (!checksum) {
08ca2aa3
TH
1635 if (len && unpack_only_one) len = 1;
1636 EXTEND(SP, len);
1637 EXTEND_MORTAL(len);
fc241834 1638 }
08ca2aa3
TH
1639 while (len-- > 0 && s < strend) {
1640 STRLEN retlen;
1641 UV auv;
1642 if (utf8) {
1643 U8 result[UTF8_MAXLEN];
f7fe979e 1644 const char *ptr = s;
08ca2aa3 1645 STRLEN len;
08ca2aa3
TH
1646 /* Bug: warns about bad utf8 even if we are short on bytes
1647 and will break out of the loop */
230e1fce
NC
1648 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1649 'U'))
08ca2aa3
TH
1650 break;
1651 len = UTF8SKIP(result);
fc241834 1652 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1653 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1654 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1655 s = ptr;
1656 } else {
1657 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1658 if (retlen == (STRLEN) -1 || retlen == 0)
1659 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1660 s += retlen;
1661 }
1662 if (!checksum)
1663 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1664 else if (checksum > bits_in_uv)
08ca2aa3 1665 cdouble += (NV) auv;
73cb7263 1666 else
08ca2aa3 1667 cuv += auv;
a6ec74c1
JH
1668 }
1669 break;
49704364
WL
1670 case 's' | TYPE_IS_SHRIEKING:
1671#if SHORTSIZE != SIZE16
73cb7263 1672 while (len-- > 0) {
08ca2aa3 1673 short ashort;
f337b084
TH
1674 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1675 DO_BO_UNPACK(ashort, s);
08ca2aa3 1676 if (!checksum)
ac7f3b1b 1677 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1678 else if (checksum > bits_in_uv)
1679 cdouble += (NV)ashort;
1680 else
1681 cuv += ashort;
49704364
WL
1682 }
1683 break;
1684#else
1685 /* Fallthrough! */
a6ec74c1 1686#endif
49704364 1687 case 's':
73cb7263 1688 while (len-- > 0) {
08ca2aa3
TH
1689 I16 ai16;
1690
1691#if U16SIZE > SIZE16
1692 ai16 = 0;
1693#endif
f337b084 1694 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1695 DO_BO_UNPACK(ai16, 16);
1109a392 1696#if U16SIZE > SIZE16
73cb7263
NC
1697 if (ai16 > 32767)
1698 ai16 -= 65536;
a6ec74c1 1699#endif
08ca2aa3 1700 if (!checksum)
ac7f3b1b 1701 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1702 else if (checksum > bits_in_uv)
1703 cdouble += (NV)ai16;
1704 else
1705 cuv += ai16;
a6ec74c1
JH
1706 }
1707 break;
49704364
WL
1708 case 'S' | TYPE_IS_SHRIEKING:
1709#if SHORTSIZE != SIZE16
73cb7263 1710 while (len-- > 0) {
08ca2aa3 1711 unsigned short aushort;
f337b084
TH
1712 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1713 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1714 if (!checksum)
1715 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1716 else if (checksum > bits_in_uv)
1717 cdouble += (NV)aushort;
1718 else
1719 cuv += aushort;
49704364
WL
1720 }
1721 break;
1722#else
1723 /* Fallhrough! */
1724#endif
a6ec74c1
JH
1725 case 'v':
1726 case 'n':
1727 case 'S':
73cb7263 1728 while (len-- > 0) {
08ca2aa3
TH
1729 U16 au16;
1730#if U16SIZE > SIZE16
1731 au16 = 0;
1732#endif
f337b084 1733 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1734 DO_BO_UNPACK(au16, 16);
a6ec74c1 1735#ifdef HAS_NTOHS
73cb7263
NC
1736 if (datumtype == 'n')
1737 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1738#endif
1739#ifdef HAS_VTOHS
73cb7263
NC
1740 if (datumtype == 'v')
1741 au16 = vtohs(au16);
a6ec74c1 1742#endif
08ca2aa3
TH
1743 if (!checksum)
1744 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1745 else if (checksum > bits_in_uv)
f337b084 1746 cdouble += (NV) au16;
73cb7263
NC
1747 else
1748 cuv += au16;
a6ec74c1
JH
1749 }
1750 break;
7212898e 1751#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1752 case 'v' | TYPE_IS_SHRIEKING:
1753 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1754 while (len-- > 0) {
08ca2aa3
TH
1755 I16 ai16;
1756# if U16SIZE > SIZE16
1757 ai16 = 0;
1758# endif
f337b084 1759 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1760# ifdef HAS_NTOHS
73cb7263 1761 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1762 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1763# endif /* HAS_NTOHS */
1764# ifdef HAS_VTOHS
73cb7263 1765 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1766 ai16 = (I16) vtohs((U16) ai16);
1767# endif /* HAS_VTOHS */
1768 if (!checksum)
ac7f3b1b 1769 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1770 else if (checksum > bits_in_uv)
08ca2aa3 1771 cdouble += (NV) ai16;
73cb7263
NC
1772 else
1773 cuv += ai16;
068bd2e7
MHM
1774 }
1775 break;
08ca2aa3 1776#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1777 case 'i':
49704364 1778 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1779 while (len-- > 0) {
08ca2aa3 1780 int aint;
f337b084
TH
1781 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1782 DO_BO_UNPACK(aint, i);
08ca2aa3 1783 if (!checksum)
ac7f3b1b 1784 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)aint;
1787 else
1788 cuv += aint;
a6ec74c1
JH
1789 }
1790 break;
1791 case 'I':
49704364 1792 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1793 while (len-- > 0) {
08ca2aa3 1794 unsigned int auint;
f337b084
TH
1795 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1796 DO_BO_UNPACK(auint, i);
08ca2aa3 1797 if (!checksum)
ac7f3b1b 1798 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)auint;
1801 else
1802 cuv += auint;
a6ec74c1
JH
1803 }
1804 break;
92d41999 1805 case 'j':
73cb7263 1806 while (len-- > 0) {
08ca2aa3 1807 IV aiv;
f337b084 1808 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1809#if IVSIZE == INTSIZE
f337b084 1810 DO_BO_UNPACK(aiv, i);
1109a392 1811#elif IVSIZE == LONGSIZE
f337b084 1812 DO_BO_UNPACK(aiv, l);
1109a392 1813#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1814 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1815#else
1816 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1817#endif
08ca2aa3 1818 if (!checksum)
ac7f3b1b 1819 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1820 else if (checksum > bits_in_uv)
1821 cdouble += (NV)aiv;
1822 else
1823 cuv += aiv;
92d41999
JH
1824 }
1825 break;
1826 case 'J':
73cb7263 1827 while (len-- > 0) {
08ca2aa3 1828 UV auv;
f337b084 1829 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1830#if IVSIZE == INTSIZE
f337b084 1831 DO_BO_UNPACK(auv, i);
08ca2aa3 1832#elif IVSIZE == LONGSIZE
f337b084 1833 DO_BO_UNPACK(auv, l);
08ca2aa3 1834#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1835 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1836#else
1837 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1838#endif
08ca2aa3 1839 if (!checksum)
ac7f3b1b 1840 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1841 else if (checksum > bits_in_uv)
1842 cdouble += (NV)auv;
1843 else
1844 cuv += auv;
92d41999
JH
1845 }
1846 break;
49704364
WL
1847 case 'l' | TYPE_IS_SHRIEKING:
1848#if LONGSIZE != SIZE32
73cb7263 1849 while (len-- > 0) {
08ca2aa3 1850 long along;
f337b084
TH
1851 SHIFT_VAR(utf8, s, strend, along, datumtype);
1852 DO_BO_UNPACK(along, l);
08ca2aa3 1853 if (!checksum)
ac7f3b1b 1854 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)along;
1857 else
1858 cuv += along;
49704364
WL
1859 }
1860 break;
1861#else
1862 /* Fallthrough! */
a6ec74c1 1863#endif
49704364 1864 case 'l':
73cb7263 1865 while (len-- > 0) {
08ca2aa3
TH
1866 I32 ai32;
1867#if U32SIZE > SIZE32
1868 ai32 = 0;
1869#endif
f337b084 1870 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1871 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1872#if U32SIZE > SIZE32
08ca2aa3 1873 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1874#endif
08ca2aa3 1875 if (!checksum)
ac7f3b1b 1876 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1877 else if (checksum > bits_in_uv)
1878 cdouble += (NV)ai32;
1879 else
1880 cuv += ai32;
a6ec74c1
JH
1881 }
1882 break;
49704364
WL
1883 case 'L' | TYPE_IS_SHRIEKING:
1884#if LONGSIZE != SIZE32
73cb7263 1885 while (len-- > 0) {
08ca2aa3 1886 unsigned long aulong;
f337b084
TH
1887 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1888 DO_BO_UNPACK(aulong, l);
08ca2aa3 1889 if (!checksum)
ac7f3b1b 1890 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1891 else if (checksum > bits_in_uv)
1892 cdouble += (NV)aulong;
1893 else
1894 cuv += aulong;
49704364
WL
1895 }
1896 break;
1897#else
1898 /* Fall through! */
1899#endif
a6ec74c1
JH
1900 case 'V':
1901 case 'N':
1902 case 'L':
73cb7263 1903 while (len-- > 0) {
08ca2aa3
TH
1904 U32 au32;
1905#if U32SIZE > SIZE32
1906 au32 = 0;
1907#endif
f337b084 1908 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1909 DO_BO_UNPACK(au32, 32);
a6ec74c1 1910#ifdef HAS_NTOHL
73cb7263
NC
1911 if (datumtype == 'N')
1912 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1913#endif
1914#ifdef HAS_VTOHL
73cb7263
NC
1915 if (datumtype == 'V')
1916 au32 = vtohl(au32);
a6ec74c1 1917#endif
08ca2aa3 1918 if (!checksum)
fc241834
RGS
1919 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)au32;
1922 else
1923 cuv += au32;
a6ec74c1
JH
1924 }
1925 break;
7212898e 1926#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1927 case 'V' | TYPE_IS_SHRIEKING:
1928 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1929 while (len-- > 0) {
08ca2aa3
TH
1930 I32 ai32;
1931# if U32SIZE > SIZE32
1932 ai32 = 0;
1933# endif
f337b084 1934 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1935# ifdef HAS_NTOHL
73cb7263
NC
1936 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1937 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1938# endif
1939# ifdef HAS_VTOHL
73cb7263
NC
1940 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1941 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1942# endif
1943 if (!checksum)
ac7f3b1b 1944 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1945 else if (checksum > bits_in_uv)
1946 cdouble += (NV)ai32;
1947 else
1948 cuv += ai32;
068bd2e7
MHM
1949 }
1950 break;
08ca2aa3 1951#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1952 case 'p':
a6ec74c1 1953 while (len-- > 0) {
f7fe979e 1954 const char *aptr;
f337b084 1955 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1956 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1957 /* newSVpv generates undef if aptr is NULL */
1958 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1959 }
1960 break;
1961 case 'w':
a6ec74c1
JH
1962 {
1963 UV auv = 0;
1964 U32 bytes = 0;
fc241834 1965
08ca2aa3
TH
1966 while (len > 0 && s < strend) {
1967 U8 ch;
f337b084 1968 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1969 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1970 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1971 if (ch < 0x80) {
a6ec74c1 1972 bytes = 0;
ac7f3b1b 1973 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1974 len--;
1975 auv = 0;
08ca2aa3 1976 continue;
a6ec74c1 1977 }
08ca2aa3 1978 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1979 const char *t;
a6ec74c1
JH
1980
1981 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1982 while (s < strend) {
f337b084 1983 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1984 sv = mul128(sv, (U8)(ch & 0x7f));
1985 if (!(ch & 0x80)) {
a6ec74c1
JH
1986 bytes = 0;
1987 break;
1988 }
1989 }
10516c54 1990 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1991 while (*t == '0')
1992 t++;
1993 sv_chop(sv, t);
1994 PUSHs(sv_2mortal(sv));
1995 len--;
1996 auv = 0;
1997 }
1998 }
1999 if ((s >= strend) && bytes)
49704364 2000 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2001 }
2002 break;
2003 case 'P':
49704364
WL
2004 if (symptr->howlen == e_star)
2005 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2006 EXTEND(SP, 1);
2d3e0934 2007 if (s + sizeof(char*) <= strend) {
08ca2aa3 2008 char *aptr;
f337b084 2009 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2010 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2011 /* newSVpvn generates undef if aptr is NULL */
2012 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2013 }
a6ec74c1
JH
2014 break;
2015#ifdef HAS_QUAD
2016 case 'q':
73cb7263 2017 while (len-- > 0) {
08ca2aa3 2018 Quad_t aquad;
f337b084
TH
2019 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2020 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2021 if (!checksum)
2022 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2023 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2024 else if (checksum > bits_in_uv)
2025 cdouble += (NV)aquad;
2026 else
2027 cuv += aquad;
2028 }
a6ec74c1
JH
2029 break;
2030 case 'Q':
73cb7263 2031 while (len-- > 0) {
08ca2aa3 2032 Uquad_t auquad;
f337b084
TH
2033 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2034 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2035 if (!checksum)
2036 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2037 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2038 else if (checksum > bits_in_uv)
2039 cdouble += (NV)auquad;
2040 else
2041 cuv += auquad;
a6ec74c1
JH
2042 }
2043 break;
08ca2aa3 2044#endif /* HAS_QUAD */
a6ec74c1
JH
2045 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2046 case 'f':
73cb7263 2047 while (len-- > 0) {
08ca2aa3 2048 float afloat;
f337b084 2049 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2050 DO_BO_UNPACK_N(afloat, float);
2051 if (!checksum)
2052 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2053 else
73cb7263 2054 cdouble += afloat;
fc241834 2055 }
a6ec74c1
JH
2056 break;
2057 case 'd':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 double adouble;
f337b084 2060 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2061 DO_BO_UNPACK_N(adouble, double);
2062 if (!checksum)
2063 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2064 else
73cb7263 2065 cdouble += adouble;
fc241834 2066 }
a6ec74c1 2067 break;
92d41999 2068 case 'F':
73cb7263 2069 while (len-- > 0) {
08ca2aa3 2070 NV anv;
f337b084 2071 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2072 DO_BO_UNPACK_N(anv, NV);
2073 if (!checksum)
2074 PUSHs(sv_2mortal(newSVnv(anv)));
2075 else
73cb7263 2076 cdouble += anv;
fc241834 2077 }
92d41999
JH
2078 break;
2079#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2080 case 'D':
73cb7263 2081 while (len-- > 0) {
08ca2aa3 2082 long double aldouble;
f337b084 2083 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2084 DO_BO_UNPACK_N(aldouble, long double);
2085 if (!checksum)
2086 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2087 else
2088 cdouble += aldouble;
92d41999
JH
2089 }
2090 break;
2091#endif
a6ec74c1
JH
2092 case 'u':
2093 /* MKS:
2094 * Initialise the decode mapping. By using a table driven
2095 * algorithm, the code will be character-set independent
2096 * (and just as fast as doing character arithmetic)
2097 */
2098 if (PL_uudmap['M'] == 0) {
2d3e0934 2099 size_t i;
a6ec74c1 2100
2d3e0934 2101 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2102 PL_uudmap[(U8)PL_uuemap[i]] = i;
2103 /*
2104 * Because ' ' and '`' map to the same value,
2105 * we need to decode them both the same.
2106 */
2107 PL_uudmap[' '] = 0;
2108 }
08ca2aa3 2109 {
f7fe979e 2110 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2111 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2112 if (l) SvPOK_on(sv);
2113 }
2114 if (utf8) {
2115 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2116 I32 a, b, c, d;
2117 char hunk[4];
2118
2119 hunk[3] = '\0';
2120 while (len > 0) {
2121 next_uni_uu(aTHX_ &s, strend, &a);
2122 next_uni_uu(aTHX_ &s, strend, &b);
2123 next_uni_uu(aTHX_ &s, strend, &c);
2124 next_uni_uu(aTHX_ &s, strend, &d);
2125 hunk[0] = (char)((a << 2) | (b >> 4));
2126 hunk[1] = (char)((b << 4) | (c >> 2));
2127 hunk[2] = (char)((c << 6) | d);
2128 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2129 len -= 3;
2130 }
2131 if (s < strend) {
f7fe979e
AL
2132 if (*s == '\n') {
2133 s++;
2134 }
08ca2aa3
TH
2135 else {
2136 /* possible checksum byte */
f7fe979e
AL
2137 const char *skip = s+UTF8SKIP(s);
2138 if (skip < strend && *skip == '\n')
2139 s = skip+1;
08ca2aa3
TH
2140 }
2141 }
2142 }
2143 } else {
fc241834
RGS
2144 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2145 I32 a, b, c, d;
2146 char hunk[4];
a6ec74c1 2147
fc241834
RGS
2148 hunk[3] = '\0';
2149 len = PL_uudmap[*(U8*)s++] & 077;
2150 while (len > 0) {
2151 if (s < strend && ISUUCHAR(*s))
2152 a = PL_uudmap[*(U8*)s++] & 077;
2153 else
2154 a = 0;
2155 if (s < strend && ISUUCHAR(*s))
2156 b = PL_uudmap[*(U8*)s++] & 077;
2157 else
2158 b = 0;
2159 if (s < strend && ISUUCHAR(*s))
2160 c = PL_uudmap[*(U8*)s++] & 077;
2161 else
2162 c = 0;
2163 if (s < strend && ISUUCHAR(*s))
2164 d = PL_uudmap[*(U8*)s++] & 077;
2165 else
2166 d = 0;
2167 hunk[0] = (char)((a << 2) | (b >> 4));
2168 hunk[1] = (char)((b << 4) | (c >> 2));
2169 hunk[2] = (char)((c << 6) | d);
2170 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2171 len -= 3;
2172 }
2173 if (*s == '\n')
2174 s++;
2175 else /* possible checksum byte */
2176 if (s + 1 < strend && s[1] == '\n')
2177 s += 2;
a6ec74c1 2178 }
08ca2aa3
TH
2179 }
2180 XPUSHs(sv);
a6ec74c1
JH
2181 break;
2182 }
49704364 2183
a6ec74c1 2184 if (checksum) {
1109a392 2185 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2186 (checksum > bits_in_uv &&
08ca2aa3
TH
2187 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2188 NV trouble, anv;
a6ec74c1 2189
08ca2aa3 2190 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2191 while (checksum >= 16) {
2192 checksum -= 16;
08ca2aa3 2193 anv *= 65536.0;
a6ec74c1 2194 }
a6ec74c1 2195 while (cdouble < 0.0)
08ca2aa3
TH
2196 cdouble += anv;
2197 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2198 sv = newSVnv(cdouble);
a6ec74c1
JH
2199 }
2200 else {
fa8ec7c1
NC
2201 if (checksum < bits_in_uv) {
2202 UV mask = ((UV)1 << checksum) - 1;
92d41999 2203 cuv &= mask;
a6ec74c1 2204 }
c4c5f44a 2205 sv = newSVuv(cuv);
a6ec74c1
JH
2206 }
2207 XPUSHs(sv_2mortal(sv));
2208 checksum = 0;
2209 }
fc241834 2210
49704364
WL
2211 if (symptr->flags & FLAG_SLASH){
2212 if (SP - PL_stack_base - start_sp_offset <= 0)
2213 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2214 if( next_symbol(symptr) ){
2215 if( symptr->howlen == e_number )
2216 Perl_croak(aTHX_ "Count after length/code in unpack" );
2217 if( beyond ){
2218 /* ...end of char buffer then no decent length available */
2219 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2220 } else {
2221 /* take top of stack (hope it's numeric) */
2222 len = POPi;
2223 if( len < 0 )
2224 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2225 }
2226 } else {
2227 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2228 }
2229 datumtype = symptr->code;
21c16052 2230 explicit_length = FALSE;
49704364
WL
2231 goto redo_switch;
2232 }
a6ec74c1 2233 }
49704364 2234
18529408
IZ
2235 if (new_s)
2236 *new_s = s;
2237 PUTBACK;
2238 return SP - PL_stack_base - start_sp_offset;
2239}
2240
2241PP(pp_unpack)
2242{
97aff369 2243 dVAR;
18529408 2244 dSP;
bab9c0ac 2245 dPOPPOPssrl;
18529408
IZ
2246 I32 gimme = GIMME_V;
2247 STRLEN llen;
2248 STRLEN rlen;
5c144d81
NC
2249 const char *pat = SvPV_const(left, llen);
2250 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2251 const char *strend = s + rlen;
2252 const char *patend = pat + llen;
08ca2aa3 2253 I32 cnt;
18529408
IZ
2254
2255 PUTBACK;
7accc089 2256 cnt = unpackstring(pat, patend, s, strend,
49704364 2257 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2258 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2259
18529408
IZ
2260 SPAGAIN;
2261 if ( !cnt && gimme == G_SCALAR )
2262 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2263 RETURN;
2264}
2265
f337b084 2266STATIC U8 *
f7fe979e 2267doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2268{
f337b084 2269 *h++ = PL_uuemap[len];
a6ec74c1 2270 while (len > 2) {
f337b084
TH
2271 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2272 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2273 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2274 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2275 s += 3;
2276 len -= 3;
2277 }
2278 if (len > 0) {
f7fe979e 2279 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2280 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2281 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2282 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2283 *h++ = PL_uuemap[0];
a6ec74c1 2284 }
f337b084
TH
2285 *h++ = '\n';
2286 return h;
a6ec74c1
JH
2287}
2288
2289STATIC SV *
f7fe979e 2290S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2291{
8b6e33c7
AL
2292 SV *result = newSVpvn(s, l);
2293 char *const result_c = SvPV_nolen(result); /* convenience */
2294 char *out = result_c;
2295 bool skip = 1;
2296 bool ignore = 0;
a6ec74c1
JH
2297
2298 while (*s) {
2299 switch (*s) {
2300 case ' ':
2301 break;
2302 case '+':
2303 if (!skip) {
2304 SvREFCNT_dec(result);
2305 return (NULL);
2306 }
2307 break;
2308 case '0':
2309 case '1':
2310 case '2':
2311 case '3':
2312 case '4':
2313 case '5':
2314 case '6':
2315 case '7':
2316 case '8':
2317 case '9':
2318 skip = 0;
2319 if (!ignore) {
2320 *(out++) = *s;
2321 }
2322 break;
2323 case '.':
2324 ignore = 1;
2325 break;
2326 default:
2327 SvREFCNT_dec(result);
2328 return (NULL);
2329 }
2330 s++;
2331 }
2332 *(out++) = '\0';
2333 SvCUR_set(result, out - result_c);
2334 return (result);
2335}
2336
2337/* pnum must be '\0' terminated */
2338STATIC int
2339S_div128(pTHX_ SV *pnum, bool *done)
2340{
8b6e33c7
AL
2341 STRLEN len;
2342 char * const s = SvPV(pnum, len);
2343 char *t = s;
2344 int m = 0;
2345
2346 *done = 1;
2347 while (*t) {
2348 const int i = m * 10 + (*t - '0');
2349 const int r = (i >> 7); /* r < 10 */
2350 m = i & 0x7F;
2351 if (r) {
2352 *done = 0;
2353 }
2354 *(t++) = '0' + r;
a6ec74c1 2355 }
8b6e33c7
AL
2356 *(t++) = '\0';
2357 SvCUR_set(pnum, (STRLEN) (t - s));
2358 return (m);
a6ec74c1
JH
2359}
2360
18529408 2361/*
7accc089
JH
2362=for apidoc packlist
2363
2364The engine implementing pack() Perl function.
2365
bfce84ec
AL
2366=cut
2367*/
7accc089
JH
2368
2369void
f7fe979e 2370Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2371{
97aff369 2372 dVAR;
aadb217d
JH
2373 tempsym_t sym;
2374
f7fe979e 2375 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2376
f337b084
TH
2377 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2378 Also make sure any UTF8 flag is loaded */
56eb0262 2379 SvPV_force_nolen(cat);
bfce84ec
AL
2380 if (DO_UTF8(cat))
2381 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2382
49704364
WL
2383 (void)pack_rec( cat, &sym, beglist, endlist );
2384}
2385
f337b084
TH
2386/* like sv_utf8_upgrade, but also repoint the group start markers */
2387STATIC void
2388marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2389 STRLEN len;
2390 tempsym_t *group;
f7fe979e
AL
2391 const char *from_ptr, *from_start, *from_end, **marks, **m;
2392 char *to_start, *to_ptr;
f337b084
TH
2393
2394 if (SvUTF8(sv)) return;
2395
aa07b2f6 2396 from_start = SvPVX_const(sv);
f337b084
TH
2397 from_end = from_start + SvCUR(sv);
2398 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2399 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2400 if (from_ptr == from_end) {
2401 /* Simple case: no character needs to be changed */
2402 SvUTF8_on(sv);
2403 return;
2404 }
2405
3473cf63 2406 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2407 Newx(to_start, len, char);
f337b084
TH
2408 Copy(from_start, to_start, from_ptr-from_start, char);
2409 to_ptr = to_start + (from_ptr-from_start);
2410
a02a5408 2411 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2412 for (group=sym_ptr; group; group = group->previous)
2413 marks[group->level] = from_start + group->strbeg;
2414 marks[sym_ptr->level+1] = from_end+1;
2415 for (m = marks; *m < from_ptr; m++)
2416 *m = to_start + (*m-from_start);
2417
2418 for (;from_ptr < from_end; from_ptr++) {
2419 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2420 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2421 }
2422 *to_ptr = 0;
2423
2424 while (*m == from_ptr) *m++ = to_ptr;
2425 if (m != marks + sym_ptr->level+1) {
2426 Safefree(marks);
2427 Safefree(to_start);
2428 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2429 }
2430 for (group=sym_ptr; group; group = group->previous)
2431 group->strbeg = marks[group->level] - to_start;
2432 Safefree(marks);
2433
2434 if (SvOOK(sv)) {
2435 if (SvIVX(sv)) {
b162af07 2436 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2437 from_start -= SvIVX(sv);
2438 SvIV_set(sv, 0);
2439 }
2440 SvFLAGS(sv) &= ~SVf_OOK;
2441 }
2442 if (SvLEN(sv) != 0)
2443 Safefree(from_start);
f880fe2f 2444 SvPV_set(sv, to_start);
b162af07
SP
2445 SvCUR_set(sv, to_ptr - to_start);
2446 SvLEN_set(sv, len);
f337b084
TH
2447 SvUTF8_on(sv);
2448}
2449
2450/* Exponential string grower. Makes string extension effectively O(n)
2451 needed says how many extra bytes we need (not counting the final '\0')
2452 Only grows the string if there is an actual lack of space
2453*/
2454STATIC char *
0bd48802 2455S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2456 const STRLEN cur = SvCUR(sv);
2457 const STRLEN len = SvLEN(sv);
f337b084
TH
2458 STRLEN extend;
2459 if (len - cur > needed) return SvPVX(sv);
2460 extend = needed > len ? needed : len;
2461 return SvGROW(sv, len+extend+1);
2462}
49704364
WL
2463
2464STATIC
2465SV **
f337b084 2466S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2467{
97aff369 2468 dVAR;
49704364 2469 tempsym_t lookahead;
f337b084
TH
2470 I32 items = endlist - beglist;
2471 bool found = next_symbol(symptr);
2472 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2473 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2474
2475 if (symptr->level == 0 && found && symptr->code == 'U') {
2476 marked_upgrade(aTHX_ cat, symptr);
2477 symptr->flags |= FLAG_DO_UTF8;
2478 utf8 = 0;
49704364 2479 }
f337b084 2480 symptr->strbeg = SvCUR(cat);
49704364
WL
2481
2482 while (found) {
f337b084
TH
2483 SV *fromstr;
2484 STRLEN fromlen;
2485 I32 len;
a0714e2c 2486 SV *lengthcode = NULL;
49704364 2487 I32 datumtype = symptr->code;
f337b084
TH
2488 howlen_t howlen = symptr->howlen;
2489 char *start = SvPVX(cat);
2490 char *cur = start + SvCUR(cat);
49704364 2491
f337b084
TH
2492#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2493
2494 switch (howlen) {
fc241834 2495 case e_star:
f337b084
TH
2496 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2497 0 : items;
2498 break;
2499 default:
2500 /* e_no_len and e_number */
2501 len = symptr->length;
49704364
WL
2502 break;
2503 }
2504
f337b084 2505 if (len) {
a7a3cfaa 2506 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2507
a7a3cfaa
TH
2508 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2509 /* We can process this letter. */
2510 STRLEN size = props & PACK_SIZE_MASK;
2511 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2512 }
f337b084
TH
2513 }
2514
49704364
WL
2515 /* Look ahead for next symbol. Do we have code/code? */
2516 lookahead = *symptr;
2517 found = next_symbol(&lookahead);
246f24af
TH
2518 if (symptr->flags & FLAG_SLASH) {
2519 IV count;
f337b084 2520 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2521 if (strchr("aAZ", lookahead.code)) {
2522 if (lookahead.howlen == e_number) count = lookahead.length;
2523 else {
ce399ba6
NC
2524 if (items > 0) {
2525 if (SvGAMAGIC(*beglist)) {
2526 /* Avoid reading the active data more than once
2527 by copying it to a temporary. */
2528 STRLEN len;
2529 const char *const pv = SvPV_const(*beglist, len);
2530 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2531 if (SvUTF8(*beglist))
2532 SvUTF8_on(temp);
2533 *beglist = temp;
2534 }
246f24af
TH
2535 count = DO_UTF8(*beglist) ?
2536 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2537 }
246f24af
TH
2538 else count = 0;
2539 if (lookahead.code == 'Z') count++;
2540 }
2541 } else {
2542 if (lookahead.howlen == e_number && lookahead.length < items)
2543 count = lookahead.length;
2544 else count = items;
2545 }
2546 lookahead.howlen = e_number;
2547 lookahead.length = count;
2548 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2549 }
49704364 2550
fc241834
RGS
2551 /* Code inside the switch must take care to properly update
2552 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2553 doesn't simply leave using break */
1109a392 2554 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2555 default:
f337b084
TH
2556 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2557 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2558 case '%':
49704364 2559 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2560 {
2561 char *from;
2562#ifdef PERL_PACK_CAN_SHRIEKSIGN
2563 case '.' | TYPE_IS_SHRIEKING:
2564#endif
2565 case '.':
2566 if (howlen == e_star) from = start;
2567 else if (len == 0) from = cur;
2568 else {
2569 tempsym_t *group = symptr;
2570
2571 while (--len && group) group = group->previous;
2572 from = group ? start + group->strbeg : start;
2573 }
2574 fromstr = NEXTFROM;
2575 len = SvIV(fromstr);
2576 goto resize;
2577#ifdef PERL_PACK_CAN_SHRIEKSIGN
2578 case '@' | TYPE_IS_SHRIEKING:
2579#endif
a6ec74c1 2580 case '@':
28be1210
TH
2581 from = start + symptr->strbeg;
2582 resize:
2583#ifdef PERL_PACK_CAN_SHRIEKSIGN
2584 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2585#else /* PERL_PACK_CAN_SHRIEKSIGN */
2586 if (utf8)
2587#endif
2588 if (len >= 0) {
2589 while (len && from < cur) {
2590 from += UTF8SKIP(from);
2591 len--;
2592 }
2593 if (from > cur)
2594 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2595 if (len) {
2596 /* Here we know from == cur */
2597 grow:
2598 GROWING(0, cat, start, cur, len);
2599 Zero(cur, len, char);
2600 cur += len;
2601 } else if (from < cur) {
2602 len = cur - from;
2603 goto shrink;
2604 } else goto no_change;
2605 } else {
2606 cur = from;
2607 len = -len;
2608 goto utf8_shrink;
f337b084 2609 }
28be1210
TH
2610 else {
2611 len -= cur - from;
f337b084 2612 if (len > 0) goto grow;
28be1210 2613 if (len == 0) goto no_change;
fc241834 2614 len = -len;
28be1210 2615 goto shrink;
f337b084 2616 }
a6ec74c1 2617 break;
28be1210 2618 }
fc241834 2619 case '(': {
49704364 2620 tempsym_t savsym = *symptr;
66c611c5
MHM
2621 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2622 symptr->flags |= group_modifiers;
49704364
WL
2623 symptr->patend = savsym.grpend;
2624 symptr->level++;
f337b084 2625 symptr->previous = &lookahead;
18529408 2626 while (len--) {
f337b084
TH
2627 U32 was_utf8;
2628 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2629 else symptr->flags &= ~FLAG_PARSE_UTF8;
2630 was_utf8 = SvUTF8(cat);
49704364 2631 symptr->patptr = savsym.grpbeg;
f337b084
TH
2632 beglist = pack_rec(cat, symptr, beglist, endlist);
2633 if (SvUTF8(cat) != was_utf8)
2634 /* This had better be an upgrade while in utf8==0 mode */
2635 utf8 = 1;
2636
49704364 2637 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2638 break; /* No way to continue */
2639 }
f337b084
TH
2640 lookahead.flags = symptr->flags & ~group_modifiers;
2641 goto no_change;
18529408 2642 }
62f95557
IZ
2643 case 'X' | TYPE_IS_SHRIEKING:
2644 if (!len) /* Avoid division by 0 */
2645 len = 1;
f337b084
TH
2646 if (utf8) {
2647 char *hop, *last;
2648 I32 l = len;
2649 hop = last = start;
2650 while (hop < cur) {
2651 hop += UTF8SKIP(hop);
2652 if (--l == 0) {
2653 last = hop;
2654 l = len;
2655 }
2656 }
2657 if (last > cur)
2658 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2659 cur = last;
2660 break;
2661 }
2662 len = (cur-start) % len;
62f95557 2663 /* FALL THROUGH */
a6ec74c1 2664 case 'X':
f337b084
TH
2665 if (utf8) {
2666 if (len < 1) goto no_change;
28be1210 2667 utf8_shrink:
f337b084
TH
2668 while (len > 0) {
2669 if (cur <= start)
28be1210
TH
2670 Perl_croak(aTHX_ "'%c' outside of string in pack",
2671 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2672 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2673 if (cur <= start)
28be1210
TH
2674 Perl_croak(aTHX_ "'%c' outside of string in pack",
2675 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2676 }
2677 len--;
2678 }
2679 } else {
fc241834 2680 shrink:
f337b084 2681 if (cur - start < len)
28be1210
TH
2682 Perl_croak(aTHX_ "'%c' outside of string in pack",
2683 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2684 cur -= len;
2685 }
2686 if (cur < start+symptr->strbeg) {
2687 /* Make sure group starts don't point into the void */
2688 tempsym_t *group;
9e27e96a 2689 const STRLEN length = cur-start;
f337b084
TH
2690 for (group = symptr;
2691 group && length < group->strbeg;
2692 group = group->previous) group->strbeg = length;
2693 lookahead.strbeg = length;
2694 }
a6ec74c1 2695 break;
fc241834
RGS
2696 case 'x' | TYPE_IS_SHRIEKING: {
2697 I32 ai32;
62f95557
IZ
2698 if (!len) /* Avoid division by 0 */
2699 len = 1;
230e1fce 2700 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2701 else ai32 = (cur - start) % len;
2702 if (ai32 == 0) goto no_change;
2703 len -= ai32;
2704 }
2705 /* FALL THROUGH */
a6ec74c1 2706 case 'x':
f337b084 2707 goto grow;
a6ec74c1
JH
2708 case 'A':
2709 case 'Z':
f337b084 2710 case 'a': {
f7fe979e 2711 const char *aptr;
f337b084 2712
a6ec74c1 2713 fromstr = NEXTFROM;
e62f0680 2714 aptr = SvPV_const(fromstr, fromlen);
f337b084 2715 if (DO_UTF8(fromstr)) {
f7fe979e 2716 const char *end, *s;
f337b084
TH
2717
2718 if (!utf8 && !SvUTF8(cat)) {
2719 marked_upgrade(aTHX_ cat, symptr);
2720 lookahead.flags |= FLAG_DO_UTF8;
2721 lookahead.strbeg = symptr->strbeg;
2722 utf8 = 1;
2723 start = SvPVX(cat);
2724 cur = start + SvCUR(cat);
2725 }
fc241834 2726 if (howlen == e_star) {
f337b084
TH
2727 if (utf8) goto string_copy;
2728 len = fromlen+1;
2729 }
2730 s = aptr;
2731 end = aptr + fromlen;
2732 fromlen = datumtype == 'Z' ? len-1 : len;
2733 while ((I32) fromlen > 0 && s < end) {
2734 s += UTF8SKIP(s);
2735 fromlen--;
2736 }
2737 if (s > end)
2738 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2739 if (utf8) {
fc241834 2740 len = fromlen;
f337b084
TH
2741 if (datumtype == 'Z') len++;
2742 fromlen = s-aptr;
2743 len += fromlen;
fc241834 2744
f337b084 2745 goto string_copy;
fc241834 2746 }
f337b084
TH
2747 fromlen = len - fromlen;
2748 if (datumtype == 'Z') fromlen--;
2749 if (howlen == e_star) {
2750 len = fromlen;
2751 if (datumtype == 'Z') len++;
fc241834 2752 }
f337b084 2753 GROWING(0, cat, start, cur, len);
fc241834 2754 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2755 datumtype | TYPE_IS_PACK))
2756 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2757 cur += fromlen;
a6ec74c1 2758 len -= fromlen;
f337b084
TH
2759 } else if (utf8) {
2760 if (howlen == e_star) {
2761 len = fromlen;
2762 if (datumtype == 'Z') len++;
a6ec74c1 2763 }
f337b084
TH
2764 if (len <= (I32) fromlen) {
2765 fromlen = len;
2766 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2767 }
fc241834 2768 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2769 upgrade, so:
2770 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2771 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2772 len -= fromlen;
2773 while (fromlen > 0) {
230e1fce 2774 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2775 aptr++;
2776 fromlen--;
fc241834 2777 }
f337b084
TH
2778 } else {
2779 string_copy:
2780 if (howlen == e_star) {
2781 len = fromlen;
2782 if (datumtype == 'Z') len++;
2783 }
2784 if (len <= (I32) fromlen) {
2785 fromlen = len;
2786 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2787 }
f337b084
TH
2788 GROWING(0, cat, start, cur, len);
2789 Copy(aptr, cur, fromlen, char);
2790 cur += fromlen;
2791 len -= fromlen;
a6ec74c1 2792 }
f337b084
TH
2793 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2794 cur += len;
a6ec74c1 2795 break;
f337b084 2796 }
a6ec74c1 2797 case 'B':
f337b084 2798 case 'b': {
b83604b4 2799 const char *str, *end;
f337b084
TH
2800 I32 l, field_len;
2801 U8 bits;
2802 bool utf8_source;
2803 U32 utf8_flags;
a6ec74c1 2804
fc241834 2805 fromstr = NEXTFROM;
b83604b4 2806 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2807 end = str + fromlen;
2808 if (DO_UTF8(fromstr)) {
2809 utf8_source = TRUE;
041457d9 2810 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2811 } else {
2812 utf8_source = FALSE;
2813 utf8_flags = 0; /* Unused, but keep compilers happy */
2814 }
2815 if (howlen == e_star) len = fromlen;
2816 field_len = (len+7)/8;
2817 GROWING(utf8, cat, start, cur, field_len);
2818 if (len > (I32)fromlen) len = fromlen;
2819 bits = 0;
2820 l = 0;
2821 if (datumtype == 'B')
2822 while (l++ < len) {
2823 if (utf8_source) {
95b63a38 2824 UV val = 0;
f337b084
TH
2825 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2826 bits |= val & 1;
2827 } else bits |= *str++ & 1;
2828 if (l & 7) bits <<= 1;
fc241834 2829 else {
f337b084
TH
2830 PUSH_BYTE(utf8, cur, bits);
2831 bits = 0;
a6ec74c1
JH
2832 }
2833 }
f337b084
TH
2834 else
2835 /* datumtype == 'b' */
2836 while (l++ < len) {
2837 if (utf8_source) {
95b63a38 2838 UV val = 0;
f337b084
TH
2839 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2840 if (val & 1) bits |= 0x80;
2841 } else if (*str++ & 1)
2842 bits |= 0x80;
2843 if (l & 7) bits >>= 1;
fc241834 2844 else {
f337b084
TH
2845 PUSH_BYTE(utf8, cur, bits);
2846 bits = 0;
a6ec74c1
JH
2847 }
2848 }
f337b084
TH
2849 l--;
2850 if (l & 7) {
fc241834 2851 if (datumtype == 'B')
f337b084 2852 bits <<= 7 - (l & 7);
fc241834 2853 else
f337b084
TH
2854 bits >>= 7 - (l & 7);
2855 PUSH_BYTE(utf8, cur, bits);
2856 l += 7;
a6ec74c1 2857 }
f337b084
TH
2858 /* Determine how many chars are left in the requested field */
2859 l /= 8;
2860 if (howlen == e_star) field_len = 0;
2861 else field_len -= l;
2862 Zero(cur, field_len, char);
2863 cur += field_len;
a6ec74c1 2864 break;
f337b084 2865 }
a6ec74c1 2866 case 'H':
f337b084 2867 case 'h': {
10516c54 2868 const char *str, *end;
f337b084
TH
2869 I32 l, field_len;
2870 U8 bits;
2871 bool utf8_source;
2872 U32 utf8_flags;
a6ec74c1 2873
fc241834 2874 fromstr = NEXTFROM;
10516c54 2875 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2876 end = str + fromlen;
2877 if (DO_UTF8(fromstr)) {
2878 utf8_source = TRUE;
041457d9 2879 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2880 } else {
2881 utf8_source = FALSE;
2882 utf8_flags = 0; /* Unused, but keep compilers happy */
2883 }
2884 if (howlen == e_star) len = fromlen;
2885 field_len = (len+1)/2;
2886 GROWING(utf8, cat, start, cur, field_len);
2887 if (!utf8 && len > (I32)fromlen) len = fromlen;
2888 bits = 0;
2889 l = 0;
2890 if (datumtype == 'H')
2891 while (l++ < len) {
2892 if (utf8_source) {
95b63a38 2893 UV val = 0;
f337b084
TH
2894 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2895 if (val < 256 && isALPHA(val))
2896 bits |= (val + 9) & 0xf;
a6ec74c1 2897 else
f337b084
TH
2898 bits |= val & 0xf;
2899 } else if (isALPHA(*str))
2900 bits |= (*str++ + 9) & 0xf;
2901 else
2902 bits |= *str++ & 0xf;
2903 if (l & 1) bits <<= 4;
fc241834 2904 else {
f337b084
TH
2905 PUSH_BYTE(utf8, cur, bits);
2906 bits = 0;
a6ec74c1
JH
2907 }
2908 }
f337b084
TH
2909 else
2910 while (l++ < len) {
2911 if (utf8_source) {
95b63a38 2912 UV val = 0;
f337b084
TH
2913 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2914 if (val < 256 && isALPHA(val))
2915 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2916 else
f337b084
TH
2917 bits |= (val & 0xf) << 4;
2918 } else if (isALPHA(*str))
2919 bits |= ((*str++ + 9) & 0xf) << 4;
2920 else
2921 bits |= (*str++ & 0xf) << 4;
2922 if (l & 1) bits >>= 4;
fc241834 2923 else {
f337b084
TH
2924 PUSH_BYTE(utf8, cur, bits);
2925 bits = 0;
a6ec74c1 2926 }
fc241834 2927 }
f337b084
TH
2928 l--;
2929 if (l & 1) {
2930 PUSH_BYTE(utf8, cur, bits);
2931 l++;
2932 }
2933 /* Determine how many chars are left in the requested field */
2934 l /= 2;
2935 if (howlen == e_star) field_len = 0;
2936 else field_len -= l;
2937 Zero(cur, field_len, char);
2938 cur += field_len;
2939 break;
fc241834
RGS
2940 }
2941 case 'c':
f337b084
TH
2942 while (len-- > 0) {
2943 IV aiv;
2944 fromstr = NEXTFROM;
2945 aiv = SvIV(fromstr);
2946 if ((-128 > aiv || aiv > 127) &&
2947 ckWARN(WARN_PACK))
2948 Perl_warner(aTHX_ packWARN(WARN_PACK),
2949 "Character in 'c' format wrapped in pack");
585ec06d 2950 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2951 }
2952 break;
2953 case 'C':
f337b084
TH
2954 if (len == 0) {
2955 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2956 break;
2957 }
2958 GROWING(0, cat, start, cur, len);
a6ec74c1 2959 while (len-- > 0) {
f337b084 2960 IV aiv;
a6ec74c1 2961 fromstr = NEXTFROM;
f337b084
TH
2962 aiv = SvIV(fromstr);
2963 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2964 ckWARN(WARN_PACK))
2965 Perl_warner(aTHX_ packWARN(WARN_PACK),
2966 "Character in 'C' format wrapped in pack");
585ec06d 2967 *cur++ = (char)(aiv & 0xff);
f337b084 2968 }
fc241834
RGS
2969 break;
2970 case 'W': {
2971 char *end;
2972 U8 in_bytes = IN_BYTES;
2973
2974 end = start+SvLEN(cat)-1;
2975 if (utf8) end -= UTF8_MAXLEN-1;
2976 while (len-- > 0) {
2977 UV auv;
2978 fromstr = NEXTFROM;
2979 auv = SvUV(fromstr);
2980 if (in_bytes) auv = auv % 0x100;
2981 if (utf8) {
2982 W_utf8:
2983 if (cur > end) {
2984 *cur = '\0';
b162af07 2985 SvCUR_set(cat, cur - start);
fc241834
RGS
2986
2987 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2988 end = start+SvLEN(cat)-UTF8_MAXLEN;
2989 }
230e1fce
NC
2990 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2991 NATIVE_TO_UNI(auv),
041457d9 2992 warn_utf8 ?
230e1fce 2993 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2994 } else {
2995 if (auv >= 0x100) {
2996 if (!SvUTF8(cat)) {
2997 *cur = '\0';
b162af07 2998 SvCUR_set(cat, cur - start);
fc241834
RGS
2999 marked_upgrade(aTHX_ cat, symptr);
3000 lookahead.flags |= FLAG_DO_UTF8;
3001 lookahead.strbeg = symptr->strbeg;
3002 utf8 = 1;
3003 start = SvPVX(cat);
3004 cur = start + SvCUR(cat);
3005 end = start+SvLEN(cat)-UTF8_MAXLEN;
3006 goto W_utf8;
3007 }
3008 if (ckWARN(WARN_PACK))
3009 Perl_warner(aTHX_ packWARN(WARN_PACK),
3010 "Character in 'W' format wrapped in pack");
3011 auv &= 0xff;
3012 }
3013 if (cur >= end) {
3014 *cur = '\0';
b162af07 3015 SvCUR_set(cat, cur - start);
fc241834
RGS
3016 GROWING(0, cat, start, cur, len+1);
3017 end = start+SvLEN(cat)-1;
3018 }
fe2774ed 3019 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3020 }
3021 }
3022 break;
fc241834
RGS
3023 }
3024 case 'U': {
3025 char *end;
3026
3027 if (len == 0) {
3028 if (!(symptr->flags & FLAG_DO_UTF8)) {
3029 marked_upgrade(aTHX_ cat, symptr);
3030 lookahead.flags |= FLAG_DO_UTF8;
3031 lookahead.strbeg = symptr->strbeg;
3032 }
3033 utf8 = 0;
3034 goto no_change;
3035 }
3036
3037 end = start+SvLEN(cat);
3038 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3039 while (len-- > 0) {
fc241834 3040 UV auv;
a6ec74c1 3041 fromstr = NEXTFROM;
fc241834
RGS
3042 auv = SvUV(fromstr);
3043 if (utf8) {
230e1fce 3044 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3045 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3046 warn_utf8 ?
fc241834
RGS
3047 0 : UNICODE_ALLOW_ANY);
3048 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3049 *cur = '\0';
b162af07 3050 SvCUR_set(cat, cur - start);
fc241834
RGS
3051 GROWING(0, cat, start, cur,
3052 len+(endb-buffer)*UTF8_EXPAND);
3053 end = start+SvLEN(cat);
3054 }
3055 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3056 } else {
3057 if (cur >= end) {
3058 *cur = '\0';
b162af07 3059 SvCUR_set(cat, cur - start);
fc241834
RGS
3060 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3061 end = start+SvLEN(cat)-UTF8_MAXLEN;
3062 }
230e1fce 3063 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3064 warn_utf8 ?
230e1fce 3065 0 : UNICODE_ALLOW_ANY);
fc241834 3066 }
a6ec74c1 3067 }
a6ec74c1 3068 break;
fc241834 3069 }
a6ec74c1
JH
3070 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3071 case 'f':
a6ec74c1 3072 while (len-- > 0) {
f337b084
TH
3073 float afloat;
3074 NV anv;
a6ec74c1 3075 fromstr = NEXTFROM;
f337b084 3076 anv = SvNV(fromstr);
5cdb9e01 3077#ifdef __VOS__
f337b084 3078 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3079 during conversion from double to float into infinity, so we
3080 do it by hand. This code should either be generalized for
3081 any OS that needs it, or removed if and when VOS implements
3082 posix-976 (suggestion to support mapping to infinity).
3083 Paul.Green@stratus.com 02-04-02. */
f337b084 3084 if (anv > FLT_MAX)
fc241834 3085 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3086 else if (anv < -FLT_MAX)
fc241834 3087 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3088 else afloat = (float) anv;
3089#else /* __VOS__ */
baf3cf9c 3090# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3091 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3092 * on Alpha; fake it if we don't have them.
3093 */
f337b084 3094 if (anv > FLT_MAX)
fc241834 3095 afloat = FLT_MAX;
f337b084 3096 else if (anv < -FLT_MAX)
fc241834 3097 afloat = -FLT_MAX;
f337b084 3098 else afloat = (float)anv;
baf3cf9c 3099# else
f337b084 3100 afloat = (float)anv;
baf3cf9c 3101# endif
f337b084 3102#endif /* __VOS__ */
1109a392 3103 DO_BO_PACK_N(afloat, float);
f337b084 3104 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3105 }
3106 break;
3107 case 'd':
a6ec74c1 3108 while (len-- > 0) {
f337b084
TH
3109 double adouble;
3110 NV anv;
a6ec74c1 3111 fromstr = NEXTFROM;
f337b084 3112 anv = SvNV(fromstr);
5cdb9e01 3113#ifdef __VOS__
f337b084 3114 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3115 during conversion from long double to double into infinity,
3116 so we do it by hand. This code should either be generalized
3117 for any OS that needs it, or removed if and when VOS
3118 implements posix-976 (suggestion to support mapping to
3119 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3120 if (anv > DBL_MAX)
fc241834 3121 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3122 else if (anv < -DBL_MAX)
fc241834 3123 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3124 else adouble = (double) anv;
3125#else /* __VOS__ */
baf3cf9c 3126# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3127 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3128 * on Alpha; fake it if we don't have them.
3129 */
f337b084 3130 if (anv > DBL_MAX)
fc241834 3131 adouble = DBL_MAX;
f337b084 3132 else if (anv < -DBL_MAX)
fc241834 3133 adouble = -DBL_MAX;
f337b084 3134 else adouble = (double)anv;
baf3cf9c 3135# else
f337b084 3136 adouble = (double)anv;
baf3cf9c 3137# endif
f337b084 3138#endif /* __VOS__ */
1109a392 3139 DO_BO_PACK_N(adouble, double);
f337b084 3140 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3141 }
3142 break;
fc241834
RGS
3143 case 'F': {
3144 NV anv;
1109a392 3145 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3146 while (len-- > 0) {
3147 fromstr = NEXTFROM;
3148 anv = SvNV(fromstr);
1109a392 3149 DO_BO_PACK_N(anv, NV);
fc241834 3150 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3151 }
3152 break;
fc241834 3153 }
92d41999 3154#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3155 case 'D': {
3156 long double aldouble;
1109a392
MHM
3157 /* long doubles can have unused bits, which may be nonzero */
3158 Zero(&aldouble, 1, long double);
92d41999
JH
3159 while (len-- > 0) {
3160 fromstr = NEXTFROM;
3161 aldouble = (long double)SvNV(fromstr);
1109a392 3162 DO_BO_PACK_N(aldouble, long double);
fc241834 3163 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3164 }
3165 break;
fc241834 3166 }
92d41999 3167#endif
7212898e 3168#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3169 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3170#endif
a6ec74c1
JH
3171 case 'n':
3172 while (len-- > 0) {
f337b084 3173 I16 ai16;
a6ec74c1 3174 fromstr = NEXTFROM;
ef108786 3175 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3176#ifdef HAS_HTONS
ef108786 3177 ai16 = PerlSock_htons(ai16);
a6ec74c1 3178#endif
f337b084 3179 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3180 }
3181 break;
7212898e 3182#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3183 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3184#endif
a6ec74c1
JH
3185 case 'v':
3186 while (len-- > 0) {
f337b084 3187 I16 ai16;
a6ec74c1 3188 fromstr = NEXTFROM;
ef108786 3189 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3190#ifdef HAS_HTOVS
ef108786 3191 ai16 = htovs(ai16);
a6ec74c1 3192#endif
f337b084 3193 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3194 }
3195 break;
49704364 3196 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3197#if SHORTSIZE != SIZE16
fc241834 3198 while (len-- > 0) {
f337b084 3199 unsigned short aushort;
fc241834
RGS
3200 fromstr = NEXTFROM;
3201 aushort = SvUV(fromstr);
3202 DO_BO_PACK(aushort, s);
f337b084 3203 PUSH_VAR(utf8, cur, aushort);
fc241834 3204 }
49704364
WL
3205 break;
3206#else
3207 /* Fall through! */
a6ec74c1 3208#endif
49704364 3209 case 'S':
fc241834 3210 while (len-- > 0) {
f337b084 3211 U16 au16;
fc241834
RGS
3212 fromstr = NEXTFROM;
3213 au16 = (U16)SvUV(fromstr);
3214 DO_BO_PACK(au16, 16);
f337b084 3215 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3216 }
3217 break;
49704364 3218 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3219#if SHORTSIZE != SIZE16
fc241834 3220 while (len-- > 0) {
f337b084 3221 short ashort;
fc241834
RGS
3222 fromstr = NEXTFROM;
3223 ashort = SvIV(fromstr);
3224 DO_BO_PACK(ashort, s);
f337b084 3225 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3226 }
49704364
WL
3227 break;
3228#else
3229 /* Fall through! */
a6ec74c1 3230#endif
49704364
WL
3231 case 's':
3232 while (len-- > 0) {
f337b084 3233 I16 ai16;
49704364 3234 fromstr = NEXTFROM;
ef108786
MHM
3235 ai16 = (I16)SvIV(fromstr);
3236 DO_BO_PACK(ai16, 16);
f337b084 3237 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3238 }
3239 break;
3240 case 'I':
49704364 3241 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3242 while (len-- > 0) {
f337b084 3243 unsigned int auint;
a6ec74c1
JH
3244 fromstr = NEXTFROM;
3245 auint = SvUV(fromstr);
1109a392 3246 DO_BO_PACK(auint, i);
f337b084 3247 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3248 }
3249 break;
92d41999
JH
3250 case 'j':
3251 while (len-- > 0) {
f337b084 3252 IV aiv;
92d41999
JH
3253 fromstr = NEXTFROM;
3254 aiv = SvIV(fromstr);
1109a392
MHM
3255#if IVSIZE == INTSIZE
3256 DO_BO_PACK(aiv, i);
3257#elif IVSIZE == LONGSIZE
3258 DO_BO_PACK(aiv, l);
3259#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3260 DO_BO_PACK(aiv, 64);
f337b084
TH
3261#else
3262 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3263#endif
f337b084 3264 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3265 }
3266 break;
3267 case 'J':
3268 while (len-- > 0) {
f337b084 3269 UV auv;
92d41999
JH
3270 fromstr = NEXTFROM;
3271 auv = SvUV(fromstr);
1109a392
MHM
3272#if UVSIZE == INTSIZE
3273 DO_BO_PACK(auv, i);
3274#elif UVSIZE == LONGSIZE
3275 DO_BO_PACK(auv, l);
3276#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3277 DO_BO_PACK(auv, 64);
f337b084
TH
3278#else
3279 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3280#endif
f337b084 3281 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3282 }
3283 break;
a6ec74c1
JH
3284 case 'w':
3285 while (len-- > 0) {
f337b084 3286 NV anv;
a6ec74c1 3287 fromstr = NEXTFROM;
15e9f109 3288 anv = SvNV(fromstr);
a6ec74c1 3289
f337b084
TH
3290 if (anv < 0) {
3291 *cur = '\0';
b162af07 3292 SvCUR_set(cat, cur - start);
49704364 3293 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3294 }
a6ec74c1 3295
196b62db
NC
3296 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3297 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3298 any negative IVs will have already been got by the croak()
3299 above. IOK is untrue for fractions, so we test them
3300 against UV_MAX_P1. */
f337b084
TH
3301 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3302 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3303 char *in = buf + sizeof(buf);
196b62db 3304 UV auv = SvUV(fromstr);
a6ec74c1
JH
3305
3306 do {
eb160463 3307 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3308 auv >>= 7;
3309 } while (auv);
3310 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3311 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3312 in, (buf + sizeof(buf)) - in);
3313 } else if (SvPOKp(fromstr))
3314 goto w_string;
a6ec74c1 3315 else if (SvNOKp(fromstr)) {
0258719b
NC
3316 /* 10**NV_MAX_10_EXP is the largest power of 10
3317 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3318 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3319 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3320 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3321 Some C compilers are strict about integral constant
3322 expressions so we conservatively divide by a slightly
3323 smaller integer instead of multiplying by the exact
3324 floating-point value.
0258719b
NC
3325 */
3326#ifdef NV_MAX_10_EXP
f337b084 3327 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3328 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3329#else
f337b084 3330 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3331 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3332#endif
a6ec74c1
JH
3333 char *in = buf + sizeof(buf);
3334
8b6e33c7 3335 anv = Perl_floor(anv);
a6ec74c1 3336 do {
8b6e33c7 3337 const NV next = Perl_floor(anv / 128);
a6ec74c1 3338 if (in <= buf) /* this cannot happen ;-) */
49704364 3339 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3340 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3341 anv = next;
3342 } while (anv > 0);
a6ec74c1 3343 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3344 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3345 in, (buf + sizeof(buf)) - in);
3346 } else {
8b6e33c7
AL
3347 const char *from;
3348 char *result, *in;
735b914b
JH
3349 SV *norm;
3350 STRLEN len;
3351 bool done;
3352
f337b084 3353 w_string:
735b914b 3354 /* Copy string and check for compliance */
349d4f2f 3355 from = SvPV_const(fromstr, len);
735b914b 3356 if ((norm = is_an_int(from, len)) == NULL)
49704364 3357 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3358
a02a5408 3359 Newx(result, len, char);
735b914b
JH
3360 in = result + len;
3361 done = FALSE;
f337b084 3362 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3363 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3364 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3365 in, (result + len) - in);
735b914b
JH
3366 Safefree(result);
3367 SvREFCNT_dec(norm); /* free norm */
fc241834 3368 }
a6ec74c1
JH
3369 }
3370 break;
3371 case 'i':
49704364 3372 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3373 while (len-- > 0) {
f337b084 3374 int aint;
a6ec74c1
JH
3375 fromstr = NEXTFROM;
3376 aint = SvIV(fromstr);
1109a392 3377 DO_BO_PACK(aint, i);
f337b084 3378 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3379 }
3380 break;
7212898e 3381#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3382 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3383#endif
a6ec74c1
JH
3384 case 'N':
3385 while (len-- > 0) {
f337b084 3386 U32 au32;
a6ec74c1 3387 fromstr = NEXTFROM;
ef108786 3388 au32 = SvUV(fromstr);
a6ec74c1 3389#ifdef HAS_HTONL
ef108786 3390 au32 = PerlSock_htonl(au32);
a6ec74c1 3391#endif
f337b084 3392 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3393 }
3394 break;
7212898e 3395#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3396 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3397#endif
a6ec74c1
JH
3398 case 'V':
3399 while (len-- > 0) {
f337b084 3400 U32 au32;
a6ec74c1 3401 fromstr = NEXTFROM;
ef108786 3402 au32 = SvUV(fromstr);
a6ec74c1 3403#ifdef HAS_HTOVL
ef108786 3404 au32 = htovl(au32);
a6ec74c1 3405#endif
f337b084 3406 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3407 }
3408 break;
49704364 3409 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3410#if LONGSIZE != SIZE32
fc241834 3411 while (len-- > 0) {
f337b084 3412 unsigned long aulong;
fc241834
RGS
3413 fromstr = NEXTFROM;
3414 aulong = SvUV(fromstr);
3415 DO_BO_PACK(aulong, l);
f337b084 3416 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3417 }
49704364
WL
3418 break;
3419#else
3420 /* Fall though! */
a6ec74c1 3421#endif
49704364 3422 case 'L':
fc241834 3423 while (len-- > 0) {
f337b084 3424 U32 au32;
fc241834
RGS
3425 fromstr = NEXTFROM;
3426 au32 = SvUV(fromstr);
3427 DO_BO_PACK(au32, 32);
f337b084 3428 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3429 }
3430 break;
49704364 3431 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3432#if LONGSIZE != SIZE32
fc241834 3433 while (len-- > 0) {
f337b084 3434 long along;
fc241834
RGS
3435 fromstr = NEXTFROM;
3436 along = SvIV(fromstr);
3437 DO_BO_PACK(along, l);
f337b084 3438 PUSH_VAR(utf8, cur, along);
a6ec74c1 3439 }
49704364
WL
3440 break;
3441#else
3442 /* Fall though! */
a6ec74c1 3443#endif
49704364
WL
3444 case 'l':
3445 while (len-- > 0) {
f337b084 3446 I32 ai32;
49704364 3447 fromstr = NEXTFROM;
ef108786
MHM
3448 ai32 = SvIV(fromstr);
3449 DO_BO_PACK(ai32, 32);
f337b084 3450 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3451 }
3452 break;
3453#ifdef HAS_QUAD
3454 case 'Q':
3455 while (len-- > 0) {
f337b084 3456 Uquad_t auquad;
a6ec74c1 3457 fromstr = NEXTFROM;
f337b084 3458 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3459 DO_BO_PACK(auquad, 64);
f337b084 3460 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3461 }
3462 break;
3463 case 'q':
3464 while (len-- > 0) {
f337b084 3465 Quad_t aquad;
a6ec74c1
JH
3466 fromstr = NEXTFROM;
3467 aquad = (Quad_t)SvIV(fromstr);
1109a392 3468 DO_BO_PACK(aquad, 64);
f337b084 3469 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3470 }
3471 break;
f337b084 3472#endif /* HAS_QUAD */
a6ec74c1
JH
3473 case 'P':
3474 len = 1; /* assume SV is correct length */
f337b084 3475 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3476 /* Fall through! */
a6ec74c1
JH
3477 case 'p':
3478 while (len-- > 0) {
83003860 3479 const char *aptr;
f337b084 3480
a6ec74c1 3481 fromstr = NEXTFROM;
28a4f200
TH
3482 SvGETMAGIC(fromstr);
3483 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3484 else {
a6ec74c1
JH
3485 /* XXX better yet, could spirit away the string to
3486 * a safe spot and hang on to it until the result
3487 * of pack() (and all copies of the result) are
3488 * gone.
3489 */
041457d9
DM
3490 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3491 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3492 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3493 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3494 }
3495 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3496 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3497 else
2596d9fe 3498 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3499 }
07409e01 3500 DO_BO_PACK_PC(aptr);
f337b084 3501 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3502 }
3503 break;
fc241834 3504 case 'u': {
f7fe979e 3505 const char *aptr, *aend;
fc241834 3506 bool from_utf8;
f337b084 3507
a6ec74c1 3508 fromstr = NEXTFROM;
fc241834
RGS
3509 if (len <= 2) len = 45;
3510 else len = len / 3 * 3;
3511 if (len >= 64) {
0fdc08b6
RGS
3512 if (ckWARN(WARN_PACK))
3513 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3514 "Field too wide in 'u' format in pack");
3515 len = 63;
3516 }
83003860 3517 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3518 from_utf8 = DO_UTF8(fromstr);
3519 if (from_utf8) {
3520 aend = aptr + fromlen;
3521 fromlen = sv_len_utf8(fromstr);
3522 } else aend = NULL; /* Unused, but keep compilers happy */
3523 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3524 while (fromlen > 0) {
fc241834 3525 U8 *end;
a6ec74c1 3526 I32 todo;
fc241834 3527 U8 hunk[1+63/3*4+1];
a6ec74c1 3528
eb160463 3529 if ((I32)fromlen > len)
a6ec74c1
JH
3530 todo = len;
3531 else
3532 todo = fromlen;
fc241834
RGS
3533 if (from_utf8) {
3534 char buffer[64];
3535 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3536 'u' | TYPE_IS_PACK)) {
3537 *cur = '\0';
b162af07 3538 SvCUR_set(cat, cur - start);
fc241834
RGS
3539 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3540 }
3541 end = doencodes(hunk, buffer, todo);
3542 } else {
3543 end = doencodes(hunk, aptr, todo);
3544 aptr += todo;
3545 }
3546 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3547 fromlen -= todo;
3548 }
a6ec74c1
JH
3549 break;
3550 }
f337b084
TH
3551 }
3552 *cur = '\0';
b162af07 3553 SvCUR_set(cat, cur - start);
f337b084 3554 no_change:
49704364 3555 *symptr = lookahead;
a6ec74c1 3556 }
49704364 3557 return beglist;
18529408
IZ
3558}
3559#undef NEXTFROM
3560
3561
3562PP(pp_pack)
3563{
97aff369 3564 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3565 register SV *cat = TARG;
3566 STRLEN fromlen;
349d4f2f
NC
3567 SV *pat_sv = *++MARK;
3568 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3569 register const char *patend = pat + fromlen;
18529408
IZ
3570
3571 MARK++;
3572 sv_setpvn(cat, "", 0);
f337b084 3573 SvUTF8_off(cat);
18529408 3574
7accc089 3575 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3576
a6ec74c1
JH
3577 SvSETMAGIC(cat);
3578 SP = ORIGMARK;
3579 PUSHs(cat);
3580 RETURN;
3581}
a6ec74c1 3582
73cb7263
NC
3583/*
3584 * Local variables:
3585 * c-indentation-style: bsd
3586 * c-basic-offset: 4
3587 * indent-tabs-mode: t
3588 * End:
3589 *
37442d52
RGS
3590 * ex: set ts=8 sts=4 sw=4 noet:
3591 */