This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
I believe that it makes little sense for the simple user filters
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
f337b084 705STATIC void
f7fe979e 706bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 707 U8 buffer[UTF8_MAXLEN];
0bcc34c2 708 const U8 * const end = start + len;
f337b084
TH
709 char *d = *dest;
710 while (start < end) {
f7fe979e 711 const int length =
f337b084
TH
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
713 switch(length) {
714 case 1:
715 *d++ = buffer[0];
716 break;
717 case 2:
718 *d++ = buffer[0];
719 *d++ = buffer[1];
720 break;
721 default:
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
723 *start, length);
724 }
725 start++;
726 }
727 *dest = d;
728}
729
230e1fce
NC
730#define PUSH_BYTES(utf8, cur, buf, len) \
731STMT_START { \
732 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
733 else { \
734 Copy(buf, cur, len, char); \
735 (cur) += (len); \
736 } \
f337b084
TH
737} STMT_END
738
739#define GROWING(utf8, cat, start, cur, in_len) \
740STMT_START { \
741 STRLEN glen = (in_len); \
3473cf63 742 if (utf8) glen *= UTF8_EXPAND; \
f337b084 743 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 744 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
745 (cur) = (start) + SvCUR(cat); \
746 } \
747} STMT_END
748
749#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
750STMT_START { \
f7fe979e 751 const STRLEN glen = (in_len); \
f337b084 752 STRLEN gl = glen; \
3473cf63 753 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
754 if ((cur) + gl >= (start) + SvLEN(cat)) { \
755 *cur = '\0'; \
b162af07 756 SvCUR_set((cat), (cur) - (start)); \
0bd48802 757 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
758 (cur) = (start) + SvCUR(cat); \
759 } \
760 PUSH_BYTES(utf8, cur, buf, glen); \
761} STMT_END
762
763#define PUSH_BYTE(utf8, s, byte) \
764STMT_START { \
765 if (utf8) { \
f7fe979e 766 const U8 au8 = (byte); \
f337b084
TH
767 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
768 } else *(U8 *)(s)++ = (byte); \
769} STMT_END
770
771/* Only to be used inside a loop (see the break) */
772#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773STMT_START { \
774 STRLEN retlen; \
775 if (str >= end) break; \
776 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
777 if (retlen == (STRLEN) -1 || retlen == 0) { \
778 *cur = '\0'; \
779 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
780 } \
781 str += retlen; \
782} STMT_END
783
f7fe979e
AL
784static const char *_action( const tempsym_t* symptr )
785{
786 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
787}
788
206947d2 789/* Returns the sizeof() struct described by pat */
028d1f6d 790STATIC I32
f337b084 791S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 792{
f337b084 793 I32 total = 0;
206947d2 794
49704364 795 while (next_symbol(symptr)) {
f337b084 796 I32 len;
f7fe979e 797 int size;
f337b084
TH
798
799 switch (symptr->howlen) {
fc241834 800 case e_star:
49704364 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 802 _action( symptr ) );
49704364 803 break;
f337b084
TH
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
49704364
WL
808 }
809
a7a3cfaa 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 811 if (!size) {
f7fe979e 812 int star;
80a13697
NC
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 818 _action( symptr ) );
28be1210
TH
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
80a13697 823 case '@':
28be1210 824 case '.':
80a13697
NC
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 830 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 831 _action( symptr ) );
80a13697
NC
832 case '%':
833 size = 0;
834 break;
835 case '(':
fc241834
RGS
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
80a13697
NC
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
f7fe979e 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
80a13697
NC
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
78d46eaa 885
80a13697
NC
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
78d46eaa
NC
889 break;
890 }
206947d2
IZ
891 }
892 total += len * size;
893 }
894 return total;
895}
896
49704364
WL
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
f7fe979e
AL
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 903{
49704364 904 while (patptr < patend) {
f7fe979e 905 const char c = *patptr++;
49704364
WL
906
907 if (isSPACE(c))
908 continue;
909 else if (c == ender)
910 return patptr-1;
911 else if (c == '#') {
912 while (patptr < patend && *patptr != '\n')
913 patptr++;
914 continue;
915 } else if (c == '(')
916 patptr = group_end(patptr, patend, ')') + 1;
917 else if (c == '[')
918 patptr = group_end(patptr, patend, ']') + 1;
18529408 919 }
49704364
WL
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
921 ender);
922 return 0;
18529408
IZ
923}
924
49704364
WL
925
926/* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
fc241834 929 */
f7fe979e
AL
930STATIC const char *
931S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
932{
933 I32 len = *patptr++ - '0';
934 while (isDIGIT(*patptr)) {
935 if (len >= 0x7FFFFFFF/10)
936 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
937 len = (len * 10) + (*patptr++ - '0');
938 }
939 *lenptr = len;
940 return patptr;
941}
942
943/* The marvellous template parsing routine: Using state stored in *symptr,
944 * locates next template code and count
945 */
946STATIC bool
f337b084 947S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 948{
f7fe979e 949 const char* patptr = symptr->patptr;
0bcc34c2 950 const char* const patend = symptr->patend;
49704364
WL
951
952 symptr->flags &= ~FLAG_SLASH;
953
954 while (patptr < patend) {
955 if (isSPACE(*patptr))
956 patptr++;
957 else if (*patptr == '#') {
958 patptr++;
959 while (patptr < patend && *patptr != '\n')
960 patptr++;
961 if (patptr < patend)
962 patptr++;
963 } else {
fc241834 964 /* We should have found a template code */
49704364 965 I32 code = *patptr++ & 0xFF;
66c611c5 966 U32 inherited_modifiers = 0;
49704364
WL
967
968 if (code == ','){ /* grandfather in commas but with a warning */
969 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
970 symptr->flags |= FLAG_COMMA;
971 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 972 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
973 }
974 continue;
975 }
fc241834 976
49704364 977 /* for '(', skip to ')' */
fc241834 978 if (code == '(') {
49704364
WL
979 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
980 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 981 _action( symptr ) );
49704364
WL
982 symptr->grpbeg = patptr;
983 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
984 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
985 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 986 _action( symptr ) );
49704364
WL
987 }
988
66c611c5
MHM
989 /* look for group modifiers to inherit */
990 if (TYPE_ENDIANNESS(symptr->flags)) {
991 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
992 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
993 }
994
1109a392
MHM
995 /* look for modifiers */
996 while (patptr < patend) {
b7787f18
AL
997 const char *allowed;
998 I32 modifier;
1109a392
MHM
999 switch (*patptr) {
1000 case '!':
1001 modifier = TYPE_IS_SHRIEKING;
7212898e 1002 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1003 break;
7212898e 1004#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1005 case '>':
1006 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1008 break;
1009 case '<':
1010 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1011 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1012 break;
f337b084 1013#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1014 default:
b7787f18
AL
1015 allowed = "";
1016 modifier = 0;
1109a392
MHM
1017 break;
1018 }
66c611c5 1019
1109a392
MHM
1020 if (modifier == 0)
1021 break;
66c611c5 1022
1109a392
MHM
1023 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1024 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1025 allowed, _action( symptr ) );
66c611c5
MHM
1026
1027 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1028 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1029 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1030 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1031 TYPE_ENDIANNESS_MASK)
1032 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1033 *patptr, _action( symptr ) );
66c611c5 1034
041457d9 1035 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1036 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1037 "Duplicate modifier '%c' after '%c' in %s",
1038 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1039 _action( symptr ) );
1109a392 1040 }
66c611c5 1041
1109a392
MHM
1042 code |= modifier;
1043 patptr++;
49704364
WL
1044 }
1045
66c611c5
MHM
1046 /* inherit modifiers */
1047 code |= inherited_modifiers;
1048
fc241834 1049 /* look for count and/or / */
49704364
WL
1050 if (patptr < patend) {
1051 if (isDIGIT(*patptr)) {
1052 patptr = get_num( patptr, &symptr->length );
1053 symptr->howlen = e_number;
1054
1055 } else if (*patptr == '*') {
1056 patptr++;
1057 symptr->howlen = e_star;
1058
1059 } else if (*patptr == '[') {
f7fe979e 1060 const char* lenptr = ++patptr;
49704364
WL
1061 symptr->howlen = e_number;
1062 patptr = group_end( patptr, patend, ']' ) + 1;
1063 /* what kind of [] is it? */
1064 if (isDIGIT(*lenptr)) {
1065 lenptr = get_num( lenptr, &symptr->length );
1066 if( *lenptr != ']' )
1067 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1068 _action( symptr ) );
49704364
WL
1069 } else {
1070 tempsym_t savsym = *symptr;
1071 symptr->patend = patptr-1;
1072 symptr->patptr = lenptr;
1073 savsym.length = measure_struct(symptr);
1074 *symptr = savsym;
1075 }
1076 } else {
1077 symptr->howlen = e_no_len;
1078 symptr->length = 1;
1079 }
1080
1081 /* try to find / */
1082 while (patptr < patend) {
1083 if (isSPACE(*patptr))
1084 patptr++;
1085 else if (*patptr == '#') {
1086 patptr++;
1087 while (patptr < patend && *patptr != '\n')
1088 patptr++;
1089 if (patptr < patend)
1090 patptr++;
1091 } else {
66c611c5 1092 if (*patptr == '/') {
49704364
WL
1093 symptr->flags |= FLAG_SLASH;
1094 patptr++;
66c611c5
MHM
1095 if (patptr < patend &&
1096 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1097 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1098 _action( symptr ) );
49704364
WL
1099 }
1100 break;
1101 }
18529408 1102 }
49704364
WL
1103 } else {
1104 /* at end - no count, no / */
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 symptr->code = code;
fc241834 1110 symptr->patptr = patptr;
49704364 1111 return TRUE;
18529408 1112 }
49704364 1113 }
fc241834 1114 symptr->patptr = patptr;
49704364 1115 return FALSE;
18529408
IZ
1116}
1117
18529408 1118/*
fc241834 1119 There is no way to cleanly handle the case where we should process the
08ca2aa3 1120 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1121 (e.g. estimates like strend-s as an upper bound for the number of
1122 characters left wouldn't work). So if we foresee the need of this
1123 (pattern starts with U or contains U0), we want to work on the encoded
1124 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1125 themselves if they need to do a lot of unpacks like this on it
1126*/
fc241834 1127STATIC bool
08ca2aa3
TH
1128need_utf8(const char *pat, const char *patend)
1129{
1130 bool first = TRUE;
1131 while (pat < patend) {
1132 if (pat[0] == '#') {
1133 pat++;
f7fe979e 1134 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1135 if (!pat) return FALSE;
1136 } else if (pat[0] == 'U') {
1137 if (first || pat[1] == '0') return TRUE;
1138 } else first = FALSE;
1139 pat++;
1140 }
1141 return FALSE;
1142}
1143
1144STATIC char
1145first_symbol(const char *pat, const char *patend) {
1146 while (pat < patend) {
1147 if (pat[0] != '#') return pat[0];
1148 pat++;
f7fe979e 1149 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1150 if (!pat) return 0;
1151 pat++;
1152 }
1153 return 0;
1154}
1155
1156/*
18529408
IZ
1157=for apidoc unpack_str
1158
7accc089
JH
1159The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1160and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1161
1162=cut */
1163
1164I32
f7fe979e 1165Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1166{
f7fe979e 1167 tempsym_t sym;
8772537c
AL
1168 PERL_UNUSED_ARG(strbeg);
1169 PERL_UNUSED_ARG(new_s);
1170 PERL_UNUSED_ARG(ocnt);
08ca2aa3 1171
f337b084 1172 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1173 else if (need_utf8(pat, patend)) {
1174 /* We probably should try to avoid this in case a scalar context call
1175 wouldn't get to the "U0" */
1176 STRLEN len = strend - s;
230e1fce 1177 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1178 SAVEFREEPV(s);
1179 strend = s + len;
f337b084 1180 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1181 }
1182
f337b084
TH
1183 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1184 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1185
f7fe979e 1186 TEMPSYM_INIT(&sym, pat, patend, flags);
49704364
WL
1187
1188 return unpack_rec(&sym, s, s, strend, NULL );
1189}
1190
7accc089
JH
1191/*
1192=for apidoc unpackstring
1193
608d3aed
WL
1194The engine implementing unpack() Perl function. C<unpackstring> puts the
1195extracted list items on the stack and returns the number of elements.
1196Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1197
1198=cut */
1199
1200I32
f7fe979e 1201Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1202{
f7fe979e 1203 tempsym_t sym;
08ca2aa3 1204
f337b084 1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
230e1fce 1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1211 SAVEFREEPV(s);
1212 strend = s + len;
f337b084 1213 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1214 }
1215
f337b084
TH
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1218
f7fe979e 1219 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1220
1221 return unpack_rec(&sym, s, s, strend, NULL );
1222}
1223
49704364
WL
1224STATIC
1225I32
f7fe979e 1226S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1227{
27da23d5 1228 dVAR; dSP;
18529408 1229 SV *sv;
f7fe979e 1230 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1231 howlen_t howlen;
a6ec74c1 1232
a6ec74c1 1233 I32 checksum = 0;
92d41999 1234 UV cuv = 0;
a6ec74c1 1235 NV cdouble = 0.0;
f337b084 1236 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1237 bool beyond = FALSE;
21c16052 1238 bool explicit_length;
9e27e96a 1239 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1240 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1241 symptr->strbeg = s - strbeg;
49704364 1242
49704364 1243 while (next_symbol(symptr)) {
a7a3cfaa 1244 packprops_t props;
9e27e96a 1245 I32 len;
f337b084 1246 I32 datumtype = symptr->code;
206947d2 1247 /* do first one only unless in list context
08ca2aa3 1248 / is implemented by unpacking the count, then popping it from the
206947d2 1249 stack, so must check that we're not in the middle of a / */
49704364 1250 if ( unpack_only_one
206947d2 1251 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1252 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1253 break;
49704364 1254
f337b084 1255 switch (howlen = symptr->howlen) {
fc241834
RGS
1256 case e_star:
1257 len = strend - strbeg; /* long enough */
49704364 1258 break;
f337b084
TH
1259 default:
1260 /* e_no_len and e_number */
1261 len = symptr->length;
1262 break;
49704364 1263 }
18529408 1264
21c16052 1265 explicit_length = TRUE;
a6ec74c1 1266 redo_switch:
49704364 1267 beyond = s >= strend;
a7a3cfaa
TH
1268
1269 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1270 if (props) {
1271 /* props nonzero means we can process this letter. */
9e27e96a
AL
1272 const long size = props & PACK_SIZE_MASK;
1273 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1274 if (len > howmany)
1275 len = howmany;
1276
1277 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1278 if (len && unpack_only_one) len = 1;
1279 EXTEND(SP, len);
1280 EXTEND_MORTAL(len);
78d46eaa
NC
1281 }
1282 }
a7a3cfaa 1283
1109a392 1284 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1285 default:
1109a392 1286 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1287
a6ec74c1 1288 case '%':
49704364 1289 if (howlen == e_no_len)
18529408 1290 len = 16; /* len is not specified */
a6ec74c1 1291 checksum = len;
92d41999 1292 cuv = 0;
a6ec74c1 1293 cdouble = 0;
18529408 1294 continue;
a6ec74c1 1295 break;
18529408
IZ
1296 case '(':
1297 {
49704364 1298 tempsym_t savsym = *symptr;
9e27e96a 1299 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1300 symptr->flags |= group_modifiers;
49704364 1301 symptr->patend = savsym.grpend;
28be1210 1302 symptr->previous = &savsym;
49704364 1303 symptr->level++;
18529408
IZ
1304 PUTBACK;
1305 while (len--) {
49704364 1306 symptr->patptr = savsym.grpbeg;
f337b084
TH
1307 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1308 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1309 unpack_rec(symptr, s, strbeg, strend, &s);
1310 if (s == strend && savsym.howlen == e_star)
49704364 1311 break; /* No way to continue */
18529408
IZ
1312 }
1313 SPAGAIN;
28be1210 1314 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1315 *symptr = savsym;
18529408
IZ
1316 break;
1317 }
28be1210
TH
1318#ifdef PERL_PACK_CAN_SHRIEKSIGN
1319 case '.' | TYPE_IS_SHRIEKING:
1320#endif
1321 case '.': {
9e27e96a 1322 const char *from;
28be1210
TH
1323 SV *sv;
1324#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1325 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1326#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1327 const bool u8 = utf8;
28be1210
TH
1328#endif
1329 if (howlen == e_star) from = strbeg;
1330 else if (len <= 0) from = s;
1331 else {
1332 tempsym_t *group = symptr;
1333
1334 while (--len && group) group = group->previous;
1335 from = group ? strbeg + group->strbeg : strbeg;
1336 }
1337 sv = from <= s ?
00646304
CB
1338 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1339 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1340 XPUSHs(sv_2mortal(sv));
1341 break;
1342 }
1343#ifdef PERL_PACK_CAN_SHRIEKSIGN
1344 case '@' | TYPE_IS_SHRIEKING:
1345#endif
a6ec74c1 1346 case '@':
28be1210
TH
1347 s = strbeg + symptr->strbeg;
1348#ifdef PERL_PACK_CAN_SHRIEKSIGN
1349 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1350#else /* PERL_PACK_CAN_SHRIEKSIGN */
1351 if (utf8)
1352#endif
1353 {
08ca2aa3
TH
1354 while (len > 0) {
1355 if (s >= strend)
1356 Perl_croak(aTHX_ "'@' outside of string in unpack");
1357 s += UTF8SKIP(s);
1358 len--;
1359 }
1360 if (s > strend)
1361 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1362 } else {
28be1210 1363 if (strend-s < len)
fc241834 1364 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1365 s += len;
08ca2aa3 1366 }
a6ec74c1 1367 break;
62f95557
IZ
1368 case 'X' | TYPE_IS_SHRIEKING:
1369 if (!len) /* Avoid division by 0 */
1370 len = 1;
08ca2aa3 1371 if (utf8) {
f7fe979e 1372 const char *hop, *last;
f337b084
TH
1373 I32 l = len;
1374 hop = last = strbeg;
1375 while (hop < s) {
1376 hop += UTF8SKIP(hop);
1377 if (--l == 0) {
08ca2aa3 1378 last = hop;
f337b084
TH
1379 l = len;
1380 }
fc241834 1381 }
f337b084
TH
1382 if (last > s)
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1384 s = last;
1385 break;
f337b084
TH
1386 }
1387 len = (s - strbeg) % len;
62f95557 1388 /* FALL THROUGH */
a6ec74c1 1389 case 'X':
08ca2aa3
TH
1390 if (utf8) {
1391 while (len > 0) {
1392 if (s <= strbeg)
1393 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1394 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1395 if (s <= strbeg)
1396 Perl_croak(aTHX_ "'X' outside of string in unpack");
1397 }
1398 len--;
1399 }
1400 } else {
fc241834
RGS
1401 if (len > s - strbeg)
1402 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1403 s -= len;
08ca2aa3 1404 }
a6ec74c1 1405 break;
9e27e96a
AL
1406 case 'x' | TYPE_IS_SHRIEKING: {
1407 I32 ai32;
62f95557
IZ
1408 if (!len) /* Avoid division by 0 */
1409 len = 1;
230e1fce
NC
1410 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1411 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1412 if (ai32 == 0) break;
1413 len -= ai32;
9e27e96a 1414 }
62f95557 1415 /* FALL THROUGH */
a6ec74c1 1416 case 'x':
08ca2aa3
TH
1417 if (utf8) {
1418 while (len>0) {
1419 if (s >= strend)
1420 Perl_croak(aTHX_ "'x' outside of string in unpack");
1421 s += UTF8SKIP(s);
1422 len--;
1423 }
1424 } else {
fc241834
RGS
1425 if (len > strend - s)
1426 Perl_croak(aTHX_ "'x' outside of string in unpack");
1427 s += len;
f337b084 1428 }
a6ec74c1
JH
1429 break;
1430 case '/':
49704364
WL
1431 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1432 break;
a6ec74c1
JH
1433 case 'A':
1434 case 'Z':
1435 case 'a':
08ca2aa3
TH
1436 if (checksum) {
1437 /* Preliminary length estimate is assumed done in 'W' */
1438 if (len > strend - s) len = strend - s;
1439 goto W_checksum;
1440 }
1441 if (utf8) {
1442 I32 l;
f7fe979e 1443 const char *hop;
08ca2aa3
TH
1444 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1445 if (hop >= strend) {
1446 if (hop > strend)
1447 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1448 break;
fc241834 1449 }
a6ec74c1 1450 }
08ca2aa3
TH
1451 if (hop > strend)
1452 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1453 len = hop - s;
1454 } else if (len > strend - s)
1455 len = strend - s;
1456
1457 if (datumtype == 'Z') {
1458 /* 'Z' strips stuff after first null */
f7fe979e 1459 const char *ptr, *end;
f337b084
TH
1460 end = s + len;
1461 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1462 sv = newSVpvn(s, ptr-s);
1463 if (howlen == e_star) /* exact for 'Z*' */
1464 len = ptr-s + (ptr != strend ? 1 : 0);
1465 } else if (datumtype == 'A') {
1466 /* 'A' strips both nulls and spaces */
f7fe979e 1467 const char *ptr;
18bdf90a
TH
1468 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1471 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1472 if (ptr >= s) ptr += UTF8SKIP(ptr);
1473 else ptr++;
28be1210 1474 if (ptr > s+len)
18bdf90a
TH
1475 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1476 } else {
1477 for (ptr = s+len-1; ptr >= s; ptr--)
1478 if (*ptr != 0 && !isSPACE(*ptr)) break;
1479 ptr++;
1480 }
08ca2aa3
TH
1481 sv = newSVpvn(s, ptr-s);
1482 } else sv = newSVpvn(s, len);
1483
1484 if (utf8) {
1485 SvUTF8_on(sv);
1486 /* Undo any upgrade done due to need_utf8() */
f337b084 1487 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1488 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1489 }
1490 XPUSHs(sv_2mortal(sv));
08ca2aa3 1491 s += len;
a6ec74c1
JH
1492 break;
1493 case 'B':
08ca2aa3
TH
1494 case 'b': {
1495 char *str;
49704364 1496 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1497 len = (strend - s) * 8;
1498 if (checksum) {
1499 if (!PL_bitcount) {
08ca2aa3 1500 int bits;
a02a5408 1501 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1502 for (bits = 1; bits < 256; bits++) {
1503 if (bits & 1) PL_bitcount[bits]++;
1504 if (bits & 2) PL_bitcount[bits]++;
1505 if (bits & 4) PL_bitcount[bits]++;
1506 if (bits & 8) PL_bitcount[bits]++;
1507 if (bits & 16) PL_bitcount[bits]++;
1508 if (bits & 32) PL_bitcount[bits]++;
1509 if (bits & 64) PL_bitcount[bits]++;
1510 if (bits & 128) PL_bitcount[bits]++;
1511 }
1512 }
f337b084 1513 if (utf8)
08ca2aa3 1514 while (len >= 8 && s < strend) {
f337b084 1515 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1516 len -= 8;
1517 }
f337b084 1518 else
fc241834 1519 while (len >= 8) {
08ca2aa3 1520 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1521 len -= 8;
1522 }
08ca2aa3
TH
1523 if (len && s < strend) {
1524 U8 bits;
f337b084
TH
1525 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1526 if (datumtype == 'b')
a6ec74c1 1527 while (len-- > 0) {
92d41999 1528 if (bits & 1) cuv++;
a6ec74c1
JH
1529 bits >>= 1;
1530 }
f337b084 1531 else
a6ec74c1 1532 while (len-- > 0) {
08ca2aa3 1533 if (bits & 0x80) cuv++;
a6ec74c1
JH
1534 bits <<= 1;
1535 }
fc241834 1536 }
a6ec74c1
JH
1537 break;
1538 }
08ca2aa3 1539
561b68a9 1540 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1541 SvPOK_on(sv);
1542 str = SvPVX(sv);
1543 if (datumtype == 'b') {
f337b084 1544 U8 bits = 0;
f7fe979e 1545 const I32 ai32 = len;
08ca2aa3
TH
1546 for (len = 0; len < ai32; len++) {
1547 if (len & 7) bits >>= 1;
1548 else if (utf8) {
1549 if (s >= strend) break;
f337b084 1550 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1551 } else bits = *(U8 *) s++;
1552 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1553 }
08ca2aa3 1554 } else {
f337b084 1555 U8 bits = 0;
f7fe979e 1556 const I32 ai32 = len;
08ca2aa3
TH
1557 for (len = 0; len < ai32; len++) {
1558 if (len & 7) bits <<= 1;
1559 else if (utf8) {
1560 if (s >= strend) break;
f337b084 1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1562 } else bits = *(U8 *) s++;
1563 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1564 }
1565 }
1566 *str = '\0';
aa07b2f6 1567 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1568 XPUSHs(sv);
a6ec74c1 1569 break;
08ca2aa3 1570 }
a6ec74c1 1571 case 'H':
08ca2aa3
TH
1572 case 'h': {
1573 char *str;
fc241834 1574 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1575 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1576 len = (strend - s) * 2;
561b68a9 1577 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1578 SvPOK_on(sv);
1579 str = SvPVX(sv);
1580 if (datumtype == 'h') {
f337b084 1581 U8 bits = 0;
9e27e96a 1582 I32 ai32 = len;
fc241834
RGS
1583 for (len = 0; len < ai32; len++) {
1584 if (len & 1) bits >>= 4;
1585 else if (utf8) {
1586 if (s >= strend) break;
f337b084 1587 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1588 } else bits = * (U8 *) s++;
a6ec74c1
JH
1589 *str++ = PL_hexdigit[bits & 15];
1590 }
08ca2aa3 1591 } else {
f337b084 1592 U8 bits = 0;
f7fe979e 1593 const I32 ai32 = len;
08ca2aa3
TH
1594 for (len = 0; len < ai32; len++) {
1595 if (len & 1) bits <<= 4;
1596 else if (utf8) {
1597 if (s >= strend) break;
f337b084 1598 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1599 } else bits = *(U8 *) s++;
a6ec74c1
JH
1600 *str++ = PL_hexdigit[(bits >> 4) & 15];
1601 }
1602 }
1603 *str = '\0';
aa07b2f6 1604 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1605 XPUSHs(sv);
a6ec74c1 1606 break;
08ca2aa3 1607 }
a6ec74c1 1608 case 'c':
73cb7263 1609 while (len-- > 0) {
f337b084 1610 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1611 if (aint >= 128) /* fake up signed chars */
1612 aint -= 256;
08ca2aa3 1613 if (!checksum)
ac7f3b1b 1614 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1615 else if (checksum > bits_in_uv)
1616 cdouble += (NV)aint;
1617 else
1618 cuv += aint;
a6ec74c1
JH
1619 }
1620 break;
1621 case 'C':
08ca2aa3
TH
1622 case 'W':
1623 W_checksum:
35bcd338 1624 if (len == 0) {
fc241834 1625 if (explicit_length && datumtype == 'C')
08ca2aa3 1626 /* Switch to "character" mode */
f337b084 1627 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1628 break;
1629 }
fc241834 1630 if (datumtype == 'C' ?
f337b084
TH
1631 (symptr->flags & FLAG_DO_UTF8) &&
1632 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1633 while (len-- > 0 && s < strend) {
08ca2aa3 1634 STRLEN retlen;
f7fe979e 1635 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1636 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1637 if (retlen == (STRLEN) -1 || retlen == 0)
1638 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1639 s += retlen;
1640 if (!checksum)
1641 PUSHs(sv_2mortal(newSVuv((UV) val)));
1642 else if (checksum > bits_in_uv)
1643 cdouble += (NV) val;
d6d3e8bd 1644 else
08ca2aa3 1645 cuv += val;
fc241834 1646 }
08ca2aa3 1647 } else if (!checksum)
a6ec74c1 1648 while (len-- > 0) {
f7fe979e 1649 const U8 ch = *(U8 *) s++;
08ca2aa3 1650 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1651 }
08ca2aa3
TH
1652 else if (checksum > bits_in_uv)
1653 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1654 else
1655 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1656 break;
1657 case 'U':
35bcd338 1658 if (len == 0) {
08ca2aa3
TH
1659 if (explicit_length) {
1660 /* Switch to "bytes in UTF-8" mode */
f337b084 1661 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1662 else
1663 /* Should be impossible due to the need_utf8() test */
1664 Perl_croak(aTHX_ "U0 mode on a byte string");
1665 }
35bcd338
JH
1666 break;
1667 }
08ca2aa3 1668 if (len > strend - s) len = strend - s;
fc241834 1669 if (!checksum) {
08ca2aa3
TH
1670 if (len && unpack_only_one) len = 1;
1671 EXTEND(SP, len);
1672 EXTEND_MORTAL(len);
fc241834 1673 }
08ca2aa3
TH
1674 while (len-- > 0 && s < strend) {
1675 STRLEN retlen;
1676 UV auv;
1677 if (utf8) {
1678 U8 result[UTF8_MAXLEN];
f7fe979e 1679 const char *ptr = s;
08ca2aa3 1680 STRLEN len;
08ca2aa3
TH
1681 /* Bug: warns about bad utf8 even if we are short on bytes
1682 and will break out of the loop */
230e1fce
NC
1683 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1684 'U'))
08ca2aa3
TH
1685 break;
1686 len = UTF8SKIP(result);
fc241834 1687 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1688 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1689 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1690 s = ptr;
1691 } else {
1692 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1693 if (retlen == (STRLEN) -1 || retlen == 0)
1694 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1695 s += retlen;
1696 }
1697 if (!checksum)
1698 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1699 else if (checksum > bits_in_uv)
08ca2aa3 1700 cdouble += (NV) auv;
73cb7263 1701 else
08ca2aa3 1702 cuv += auv;
a6ec74c1
JH
1703 }
1704 break;
49704364
WL
1705 case 's' | TYPE_IS_SHRIEKING:
1706#if SHORTSIZE != SIZE16
73cb7263 1707 while (len-- > 0) {
08ca2aa3 1708 short ashort;
f337b084
TH
1709 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1710 DO_BO_UNPACK(ashort, s);
08ca2aa3 1711 if (!checksum)
ac7f3b1b 1712 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1713 else if (checksum > bits_in_uv)
1714 cdouble += (NV)ashort;
1715 else
1716 cuv += ashort;
49704364
WL
1717 }
1718 break;
1719#else
1720 /* Fallthrough! */
a6ec74c1 1721#endif
49704364 1722 case 's':
73cb7263 1723 while (len-- > 0) {
08ca2aa3
TH
1724 I16 ai16;
1725
1726#if U16SIZE > SIZE16
1727 ai16 = 0;
1728#endif
f337b084 1729 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1730 DO_BO_UNPACK(ai16, 16);
1109a392 1731#if U16SIZE > SIZE16
73cb7263
NC
1732 if (ai16 > 32767)
1733 ai16 -= 65536;
a6ec74c1 1734#endif
08ca2aa3 1735 if (!checksum)
ac7f3b1b 1736 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1737 else if (checksum > bits_in_uv)
1738 cdouble += (NV)ai16;
1739 else
1740 cuv += ai16;
a6ec74c1
JH
1741 }
1742 break;
49704364
WL
1743 case 'S' | TYPE_IS_SHRIEKING:
1744#if SHORTSIZE != SIZE16
73cb7263 1745 while (len-- > 0) {
08ca2aa3 1746 unsigned short aushort;
f337b084
TH
1747 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1748 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1749 if (!checksum)
1750 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1751 else if (checksum > bits_in_uv)
1752 cdouble += (NV)aushort;
1753 else
1754 cuv += aushort;
49704364
WL
1755 }
1756 break;
1757#else
1758 /* Fallhrough! */
1759#endif
a6ec74c1
JH
1760 case 'v':
1761 case 'n':
1762 case 'S':
73cb7263 1763 while (len-- > 0) {
08ca2aa3
TH
1764 U16 au16;
1765#if U16SIZE > SIZE16
1766 au16 = 0;
1767#endif
f337b084 1768 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1769 DO_BO_UNPACK(au16, 16);
a6ec74c1 1770#ifdef HAS_NTOHS
73cb7263
NC
1771 if (datumtype == 'n')
1772 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1773#endif
1774#ifdef HAS_VTOHS
73cb7263
NC
1775 if (datumtype == 'v')
1776 au16 = vtohs(au16);
a6ec74c1 1777#endif
08ca2aa3
TH
1778 if (!checksum)
1779 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1780 else if (checksum > bits_in_uv)
f337b084 1781 cdouble += (NV) au16;
73cb7263
NC
1782 else
1783 cuv += au16;
a6ec74c1
JH
1784 }
1785 break;
7212898e 1786#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1787 case 'v' | TYPE_IS_SHRIEKING:
1788 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1789 while (len-- > 0) {
08ca2aa3
TH
1790 I16 ai16;
1791# if U16SIZE > SIZE16
1792 ai16 = 0;
1793# endif
f337b084 1794 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1795# ifdef HAS_NTOHS
73cb7263 1796 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1797 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1798# endif /* HAS_NTOHS */
1799# ifdef HAS_VTOHS
73cb7263 1800 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1801 ai16 = (I16) vtohs((U16) ai16);
1802# endif /* HAS_VTOHS */
1803 if (!checksum)
ac7f3b1b 1804 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1805 else if (checksum > bits_in_uv)
08ca2aa3 1806 cdouble += (NV) ai16;
73cb7263
NC
1807 else
1808 cuv += ai16;
068bd2e7
MHM
1809 }
1810 break;
08ca2aa3 1811#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1812 case 'i':
49704364 1813 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1814 while (len-- > 0) {
08ca2aa3 1815 int aint;
f337b084
TH
1816 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1817 DO_BO_UNPACK(aint, i);
08ca2aa3 1818 if (!checksum)
ac7f3b1b 1819 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1820 else if (checksum > bits_in_uv)
1821 cdouble += (NV)aint;
1822 else
1823 cuv += aint;
a6ec74c1
JH
1824 }
1825 break;
1826 case 'I':
49704364 1827 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1828 while (len-- > 0) {
08ca2aa3 1829 unsigned int auint;
f337b084
TH
1830 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1831 DO_BO_UNPACK(auint, i);
08ca2aa3 1832 if (!checksum)
ac7f3b1b 1833 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1834 else if (checksum > bits_in_uv)
1835 cdouble += (NV)auint;
1836 else
1837 cuv += auint;
a6ec74c1
JH
1838 }
1839 break;
92d41999 1840 case 'j':
73cb7263 1841 while (len-- > 0) {
08ca2aa3 1842 IV aiv;
f337b084 1843 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1844#if IVSIZE == INTSIZE
f337b084 1845 DO_BO_UNPACK(aiv, i);
1109a392 1846#elif IVSIZE == LONGSIZE
f337b084 1847 DO_BO_UNPACK(aiv, l);
1109a392 1848#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1849 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1850#else
1851 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1852#endif
08ca2aa3 1853 if (!checksum)
ac7f3b1b 1854 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)aiv;
1857 else
1858 cuv += aiv;
92d41999
JH
1859 }
1860 break;
1861 case 'J':
73cb7263 1862 while (len-- > 0) {
08ca2aa3 1863 UV auv;
f337b084 1864 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1865#if IVSIZE == INTSIZE
f337b084 1866 DO_BO_UNPACK(auv, i);
08ca2aa3 1867#elif IVSIZE == LONGSIZE
f337b084 1868 DO_BO_UNPACK(auv, l);
08ca2aa3 1869#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1870 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1871#else
1872 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1873#endif
08ca2aa3 1874 if (!checksum)
ac7f3b1b 1875 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1876 else if (checksum > bits_in_uv)
1877 cdouble += (NV)auv;
1878 else
1879 cuv += auv;
92d41999
JH
1880 }
1881 break;
49704364
WL
1882 case 'l' | TYPE_IS_SHRIEKING:
1883#if LONGSIZE != SIZE32
73cb7263 1884 while (len-- > 0) {
08ca2aa3 1885 long along;
f337b084
TH
1886 SHIFT_VAR(utf8, s, strend, along, datumtype);
1887 DO_BO_UNPACK(along, l);
08ca2aa3 1888 if (!checksum)
ac7f3b1b 1889 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1890 else if (checksum > bits_in_uv)
1891 cdouble += (NV)along;
1892 else
1893 cuv += along;
49704364
WL
1894 }
1895 break;
1896#else
1897 /* Fallthrough! */
a6ec74c1 1898#endif
49704364 1899 case 'l':
73cb7263 1900 while (len-- > 0) {
08ca2aa3
TH
1901 I32 ai32;
1902#if U32SIZE > SIZE32
1903 ai32 = 0;
1904#endif
f337b084 1905 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1906 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1907#if U32SIZE > SIZE32
08ca2aa3 1908 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1909#endif
08ca2aa3 1910 if (!checksum)
ac7f3b1b 1911 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1912 else if (checksum > bits_in_uv)
1913 cdouble += (NV)ai32;
1914 else
1915 cuv += ai32;
a6ec74c1
JH
1916 }
1917 break;
49704364
WL
1918 case 'L' | TYPE_IS_SHRIEKING:
1919#if LONGSIZE != SIZE32
73cb7263 1920 while (len-- > 0) {
08ca2aa3 1921 unsigned long aulong;
f337b084
TH
1922 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1923 DO_BO_UNPACK(aulong, l);
08ca2aa3 1924 if (!checksum)
ac7f3b1b 1925 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1926 else if (checksum > bits_in_uv)
1927 cdouble += (NV)aulong;
1928 else
1929 cuv += aulong;
49704364
WL
1930 }
1931 break;
1932#else
1933 /* Fall through! */
1934#endif
a6ec74c1
JH
1935 case 'V':
1936 case 'N':
1937 case 'L':
73cb7263 1938 while (len-- > 0) {
08ca2aa3
TH
1939 U32 au32;
1940#if U32SIZE > SIZE32
1941 au32 = 0;
1942#endif
f337b084 1943 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1944 DO_BO_UNPACK(au32, 32);
a6ec74c1 1945#ifdef HAS_NTOHL
73cb7263
NC
1946 if (datumtype == 'N')
1947 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1948#endif
1949#ifdef HAS_VTOHL
73cb7263
NC
1950 if (datumtype == 'V')
1951 au32 = vtohl(au32);
a6ec74c1 1952#endif
08ca2aa3 1953 if (!checksum)
fc241834
RGS
1954 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1955 else if (checksum > bits_in_uv)
1956 cdouble += (NV)au32;
1957 else
1958 cuv += au32;
a6ec74c1
JH
1959 }
1960 break;
7212898e 1961#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1962 case 'V' | TYPE_IS_SHRIEKING:
1963 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1964 while (len-- > 0) {
08ca2aa3
TH
1965 I32 ai32;
1966# if U32SIZE > SIZE32
1967 ai32 = 0;
1968# endif
f337b084 1969 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1970# ifdef HAS_NTOHL
73cb7263
NC
1971 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1972 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1973# endif
1974# ifdef HAS_VTOHL
73cb7263
NC
1975 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1976 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1977# endif
1978 if (!checksum)
ac7f3b1b 1979 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1980 else if (checksum > bits_in_uv)
1981 cdouble += (NV)ai32;
1982 else
1983 cuv += ai32;
068bd2e7
MHM
1984 }
1985 break;
08ca2aa3 1986#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1987 case 'p':
a6ec74c1 1988 while (len-- > 0) {
f7fe979e 1989 const char *aptr;
f337b084 1990 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1991 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1992 /* newSVpv generates undef if aptr is NULL */
1993 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1994 }
1995 break;
1996 case 'w':
a6ec74c1
JH
1997 {
1998 UV auv = 0;
1999 U32 bytes = 0;
fc241834 2000
08ca2aa3
TH
2001 while (len > 0 && s < strend) {
2002 U8 ch;
f337b084 2003 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2004 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2005 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2006 if (ch < 0x80) {
a6ec74c1 2007 bytes = 0;
ac7f3b1b 2008 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2009 len--;
2010 auv = 0;
08ca2aa3 2011 continue;
a6ec74c1 2012 }
08ca2aa3 2013 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2014 const char *t;
a6ec74c1
JH
2015
2016 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2017 while (s < strend) {
f337b084 2018 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2019 sv = mul128(sv, (U8)(ch & 0x7f));
2020 if (!(ch & 0x80)) {
a6ec74c1
JH
2021 bytes = 0;
2022 break;
2023 }
2024 }
10516c54 2025 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2026 while (*t == '0')
2027 t++;
2028 sv_chop(sv, t);
2029 PUSHs(sv_2mortal(sv));
2030 len--;
2031 auv = 0;
2032 }
2033 }
2034 if ((s >= strend) && bytes)
49704364 2035 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2036 }
2037 break;
2038 case 'P':
49704364
WL
2039 if (symptr->howlen == e_star)
2040 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2041 EXTEND(SP, 1);
2d3e0934 2042 if (s + sizeof(char*) <= strend) {
08ca2aa3 2043 char *aptr;
f337b084 2044 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2045 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2046 /* newSVpvn generates undef if aptr is NULL */
2047 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2048 }
a6ec74c1
JH
2049 break;
2050#ifdef HAS_QUAD
2051 case 'q':
73cb7263 2052 while (len-- > 0) {
08ca2aa3 2053 Quad_t aquad;
f337b084
TH
2054 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2055 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2056 if (!checksum)
2057 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2058 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2059 else if (checksum > bits_in_uv)
2060 cdouble += (NV)aquad;
2061 else
2062 cuv += aquad;
2063 }
a6ec74c1
JH
2064 break;
2065 case 'Q':
73cb7263 2066 while (len-- > 0) {
08ca2aa3 2067 Uquad_t auquad;
f337b084
TH
2068 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2069 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2070 if (!checksum)
2071 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2072 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2073 else if (checksum > bits_in_uv)
2074 cdouble += (NV)auquad;
2075 else
2076 cuv += auquad;
a6ec74c1
JH
2077 }
2078 break;
08ca2aa3 2079#endif /* HAS_QUAD */
a6ec74c1
JH
2080 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2081 case 'f':
73cb7263 2082 while (len-- > 0) {
08ca2aa3 2083 float afloat;
f337b084 2084 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2085 DO_BO_UNPACK_N(afloat, float);
2086 if (!checksum)
2087 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2088 else
73cb7263 2089 cdouble += afloat;
fc241834 2090 }
a6ec74c1
JH
2091 break;
2092 case 'd':
73cb7263 2093 while (len-- > 0) {
08ca2aa3 2094 double adouble;
f337b084 2095 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2096 DO_BO_UNPACK_N(adouble, double);
2097 if (!checksum)
2098 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2099 else
73cb7263 2100 cdouble += adouble;
fc241834 2101 }
a6ec74c1 2102 break;
92d41999 2103 case 'F':
73cb7263 2104 while (len-- > 0) {
08ca2aa3 2105 NV anv;
f337b084 2106 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2107 DO_BO_UNPACK_N(anv, NV);
2108 if (!checksum)
2109 PUSHs(sv_2mortal(newSVnv(anv)));
2110 else
73cb7263 2111 cdouble += anv;
fc241834 2112 }
92d41999
JH
2113 break;
2114#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2115 case 'D':
73cb7263 2116 while (len-- > 0) {
08ca2aa3 2117 long double aldouble;
f337b084 2118 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2119 DO_BO_UNPACK_N(aldouble, long double);
2120 if (!checksum)
2121 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2122 else
2123 cdouble += aldouble;
92d41999
JH
2124 }
2125 break;
2126#endif
a6ec74c1
JH
2127 case 'u':
2128 /* MKS:
2129 * Initialise the decode mapping. By using a table driven
2130 * algorithm, the code will be character-set independent
2131 * (and just as fast as doing character arithmetic)
2132 */
2133 if (PL_uudmap['M'] == 0) {
2d3e0934 2134 size_t i;
a6ec74c1 2135
2d3e0934 2136 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2137 PL_uudmap[(U8)PL_uuemap[i]] = i;
2138 /*
2139 * Because ' ' and '`' map to the same value,
2140 * we need to decode them both the same.
2141 */
2142 PL_uudmap[' '] = 0;
2143 }
08ca2aa3 2144 {
f7fe979e 2145 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2146 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2147 if (l) SvPOK_on(sv);
2148 }
2149 if (utf8) {
2150 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2151 I32 a, b, c, d;
2152 char hunk[4];
2153
2154 hunk[3] = '\0';
2155 while (len > 0) {
2156 next_uni_uu(aTHX_ &s, strend, &a);
2157 next_uni_uu(aTHX_ &s, strend, &b);
2158 next_uni_uu(aTHX_ &s, strend, &c);
2159 next_uni_uu(aTHX_ &s, strend, &d);
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2164 len -= 3;
2165 }
2166 if (s < strend) {
f7fe979e
AL
2167 if (*s == '\n') {
2168 s++;
2169 }
08ca2aa3
TH
2170 else {
2171 /* possible checksum byte */
f7fe979e
AL
2172 const char *skip = s+UTF8SKIP(s);
2173 if (skip < strend && *skip == '\n')
2174 s = skip+1;
08ca2aa3
TH
2175 }
2176 }
2177 }
2178 } else {
fc241834
RGS
2179 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2180 I32 a, b, c, d;
2181 char hunk[4];
a6ec74c1 2182
fc241834
RGS
2183 hunk[3] = '\0';
2184 len = PL_uudmap[*(U8*)s++] & 077;
2185 while (len > 0) {
2186 if (s < strend && ISUUCHAR(*s))
2187 a = PL_uudmap[*(U8*)s++] & 077;
2188 else
2189 a = 0;
2190 if (s < strend && ISUUCHAR(*s))
2191 b = PL_uudmap[*(U8*)s++] & 077;
2192 else
2193 b = 0;
2194 if (s < strend && ISUUCHAR(*s))
2195 c = PL_uudmap[*(U8*)s++] & 077;
2196 else
2197 c = 0;
2198 if (s < strend && ISUUCHAR(*s))
2199 d = PL_uudmap[*(U8*)s++] & 077;
2200 else
2201 d = 0;
2202 hunk[0] = (char)((a << 2) | (b >> 4));
2203 hunk[1] = (char)((b << 4) | (c >> 2));
2204 hunk[2] = (char)((c << 6) | d);
2205 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2206 len -= 3;
2207 }
2208 if (*s == '\n')
2209 s++;
2210 else /* possible checksum byte */
2211 if (s + 1 < strend && s[1] == '\n')
2212 s += 2;
a6ec74c1 2213 }
08ca2aa3
TH
2214 }
2215 XPUSHs(sv);
a6ec74c1
JH
2216 break;
2217 }
49704364 2218
a6ec74c1 2219 if (checksum) {
1109a392 2220 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2221 (checksum > bits_in_uv &&
08ca2aa3
TH
2222 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2223 NV trouble, anv;
a6ec74c1 2224
08ca2aa3 2225 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2226 while (checksum >= 16) {
2227 checksum -= 16;
08ca2aa3 2228 anv *= 65536.0;
a6ec74c1 2229 }
a6ec74c1 2230 while (cdouble < 0.0)
08ca2aa3
TH
2231 cdouble += anv;
2232 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2233 sv = newSVnv(cdouble);
a6ec74c1
JH
2234 }
2235 else {
fa8ec7c1
NC
2236 if (checksum < bits_in_uv) {
2237 UV mask = ((UV)1 << checksum) - 1;
92d41999 2238 cuv &= mask;
a6ec74c1 2239 }
c4c5f44a 2240 sv = newSVuv(cuv);
a6ec74c1
JH
2241 }
2242 XPUSHs(sv_2mortal(sv));
2243 checksum = 0;
2244 }
fc241834 2245
49704364
WL
2246 if (symptr->flags & FLAG_SLASH){
2247 if (SP - PL_stack_base - start_sp_offset <= 0)
2248 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2249 if( next_symbol(symptr) ){
2250 if( symptr->howlen == e_number )
2251 Perl_croak(aTHX_ "Count after length/code in unpack" );
2252 if( beyond ){
2253 /* ...end of char buffer then no decent length available */
2254 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2255 } else {
2256 /* take top of stack (hope it's numeric) */
2257 len = POPi;
2258 if( len < 0 )
2259 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2260 }
2261 } else {
2262 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2263 }
2264 datumtype = symptr->code;
21c16052 2265 explicit_length = FALSE;
49704364
WL
2266 goto redo_switch;
2267 }
a6ec74c1 2268 }
49704364 2269
18529408
IZ
2270 if (new_s)
2271 *new_s = s;
2272 PUTBACK;
2273 return SP - PL_stack_base - start_sp_offset;
2274}
2275
2276PP(pp_unpack)
2277{
97aff369 2278 dVAR;
18529408 2279 dSP;
bab9c0ac 2280 dPOPPOPssrl;
18529408
IZ
2281 I32 gimme = GIMME_V;
2282 STRLEN llen;
2283 STRLEN rlen;
5c144d81
NC
2284 const char *pat = SvPV_const(left, llen);
2285 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2286 const char *strend = s + rlen;
2287 const char *patend = pat + llen;
08ca2aa3 2288 I32 cnt;
18529408
IZ
2289
2290 PUTBACK;
7accc089 2291 cnt = unpackstring(pat, patend, s, strend,
49704364 2292 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2293 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2294
18529408
IZ
2295 SPAGAIN;
2296 if ( !cnt && gimme == G_SCALAR )
2297 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2298 RETURN;
2299}
2300
f337b084 2301STATIC U8 *
f7fe979e 2302doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2303{
f337b084 2304 *h++ = PL_uuemap[len];
a6ec74c1 2305 while (len > 2) {
f337b084
TH
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2309 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2310 s += 3;
2311 len -= 3;
2312 }
2313 if (len > 0) {
f7fe979e 2314 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2315 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2316 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2317 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2318 *h++ = PL_uuemap[0];
a6ec74c1 2319 }
f337b084
TH
2320 *h++ = '\n';
2321 return h;
a6ec74c1
JH
2322}
2323
2324STATIC SV *
f7fe979e 2325S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2326{
8b6e33c7
AL
2327 SV *result = newSVpvn(s, l);
2328 char *const result_c = SvPV_nolen(result); /* convenience */
2329 char *out = result_c;
2330 bool skip = 1;
2331 bool ignore = 0;
a6ec74c1
JH
2332
2333 while (*s) {
2334 switch (*s) {
2335 case ' ':
2336 break;
2337 case '+':
2338 if (!skip) {
2339 SvREFCNT_dec(result);
2340 return (NULL);
2341 }
2342 break;
2343 case '0':
2344 case '1':
2345 case '2':
2346 case '3':
2347 case '4':
2348 case '5':
2349 case '6':
2350 case '7':
2351 case '8':
2352 case '9':
2353 skip = 0;
2354 if (!ignore) {
2355 *(out++) = *s;
2356 }
2357 break;
2358 case '.':
2359 ignore = 1;
2360 break;
2361 default:
2362 SvREFCNT_dec(result);
2363 return (NULL);
2364 }
2365 s++;
2366 }
2367 *(out++) = '\0';
2368 SvCUR_set(result, out - result_c);
2369 return (result);
2370}
2371
2372/* pnum must be '\0' terminated */
2373STATIC int
2374S_div128(pTHX_ SV *pnum, bool *done)
2375{
8b6e33c7
AL
2376 STRLEN len;
2377 char * const s = SvPV(pnum, len);
2378 char *t = s;
2379 int m = 0;
2380
2381 *done = 1;
2382 while (*t) {
2383 const int i = m * 10 + (*t - '0');
2384 const int r = (i >> 7); /* r < 10 */
2385 m = i & 0x7F;
2386 if (r) {
2387 *done = 0;
2388 }
2389 *(t++) = '0' + r;
a6ec74c1 2390 }
8b6e33c7
AL
2391 *(t++) = '\0';
2392 SvCUR_set(pnum, (STRLEN) (t - s));
2393 return (m);
a6ec74c1
JH
2394}
2395
18529408
IZ
2396/*
2397=for apidoc pack_cat
2398
7accc089
JH
2399The engine implementing pack() Perl function. Note: parameters next_in_list and
2400flags are not used. This call should not be used; use packlist instead.
18529408 2401
bfce84ec
AL
2402=cut
2403*/
49704364 2404
18529408 2405void
f7fe979e 2406Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2407{
aadb217d 2408 tempsym_t sym;
8772537c
AL
2409 PERL_UNUSED_ARG(next_in_list);
2410 PERL_UNUSED_ARG(flags);
7accc089 2411
f7fe979e 2412 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2413
7accc089
JH
2414 (void)pack_rec( cat, &sym, beglist, endlist );
2415}
2416
2417
2418/*
2419=for apidoc packlist
2420
2421The engine implementing pack() Perl function.
2422
bfce84ec
AL
2423=cut
2424*/
7accc089
JH
2425
2426void
f7fe979e 2427Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2428{
97aff369 2429 dVAR;
f337b084 2430 STRLEN no_len;
aadb217d
JH
2431 tempsym_t sym;
2432
f7fe979e 2433 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2434
f337b084
TH
2435 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2436 Also make sure any UTF8 flag is loaded */
2437 SvPV_force(cat, no_len);
bfce84ec
AL
2438 if (DO_UTF8(cat))
2439 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2440
49704364
WL
2441 (void)pack_rec( cat, &sym, beglist, endlist );
2442}
2443
f337b084
TH
2444/* like sv_utf8_upgrade, but also repoint the group start markers */
2445STATIC void
2446marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2447 STRLEN len;
2448 tempsym_t *group;
f7fe979e
AL
2449 const char *from_ptr, *from_start, *from_end, **marks, **m;
2450 char *to_start, *to_ptr;
f337b084
TH
2451
2452 if (SvUTF8(sv)) return;
2453
aa07b2f6 2454 from_start = SvPVX_const(sv);
f337b084
TH
2455 from_end = from_start + SvCUR(sv);
2456 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2457 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2458 if (from_ptr == from_end) {
2459 /* Simple case: no character needs to be changed */
2460 SvUTF8_on(sv);
2461 return;
2462 }
2463
3473cf63 2464 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2465 Newx(to_start, len, char);
f337b084
TH
2466 Copy(from_start, to_start, from_ptr-from_start, char);
2467 to_ptr = to_start + (from_ptr-from_start);
2468
a02a5408 2469 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2470 for (group=sym_ptr; group; group = group->previous)
2471 marks[group->level] = from_start + group->strbeg;
2472 marks[sym_ptr->level+1] = from_end+1;
2473 for (m = marks; *m < from_ptr; m++)
2474 *m = to_start + (*m-from_start);
2475
2476 for (;from_ptr < from_end; from_ptr++) {
2477 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2478 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2479 }
2480 *to_ptr = 0;
2481
2482 while (*m == from_ptr) *m++ = to_ptr;
2483 if (m != marks + sym_ptr->level+1) {
2484 Safefree(marks);
2485 Safefree(to_start);
2486 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2487 }
2488 for (group=sym_ptr; group; group = group->previous)
2489 group->strbeg = marks[group->level] - to_start;
2490 Safefree(marks);
2491
2492 if (SvOOK(sv)) {
2493 if (SvIVX(sv)) {
b162af07 2494 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2495 from_start -= SvIVX(sv);
2496 SvIV_set(sv, 0);
2497 }
2498 SvFLAGS(sv) &= ~SVf_OOK;
2499 }
2500 if (SvLEN(sv) != 0)
2501 Safefree(from_start);
f880fe2f 2502 SvPV_set(sv, to_start);
b162af07
SP
2503 SvCUR_set(sv, to_ptr - to_start);
2504 SvLEN_set(sv, len);
f337b084
TH
2505 SvUTF8_on(sv);
2506}
2507
2508/* Exponential string grower. Makes string extension effectively O(n)
2509 needed says how many extra bytes we need (not counting the final '\0')
2510 Only grows the string if there is an actual lack of space
2511*/
2512STATIC char *
0bd48802 2513S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2514 const STRLEN cur = SvCUR(sv);
2515 const STRLEN len = SvLEN(sv);
f337b084
TH
2516 STRLEN extend;
2517 if (len - cur > needed) return SvPVX(sv);
2518 extend = needed > len ? needed : len;
2519 return SvGROW(sv, len+extend+1);
2520}
49704364
WL
2521
2522STATIC
2523SV **
f337b084 2524S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2525{
97aff369 2526 dVAR;
49704364 2527 tempsym_t lookahead;
f337b084
TH
2528 I32 items = endlist - beglist;
2529 bool found = next_symbol(symptr);
2530 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2531 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2532
2533 if (symptr->level == 0 && found && symptr->code == 'U') {
2534 marked_upgrade(aTHX_ cat, symptr);
2535 symptr->flags |= FLAG_DO_UTF8;
2536 utf8 = 0;
49704364 2537 }
f337b084 2538 symptr->strbeg = SvCUR(cat);
49704364
WL
2539
2540 while (found) {
f337b084
TH
2541 SV *fromstr;
2542 STRLEN fromlen;
2543 I32 len;
a0714e2c 2544 SV *lengthcode = NULL;
49704364 2545 I32 datumtype = symptr->code;
f337b084
TH
2546 howlen_t howlen = symptr->howlen;
2547 char *start = SvPVX(cat);
2548 char *cur = start + SvCUR(cat);
49704364 2549
f337b084
TH
2550#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2551
2552 switch (howlen) {
fc241834 2553 case e_star:
f337b084
TH
2554 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2555 0 : items;
2556 break;
2557 default:
2558 /* e_no_len and e_number */
2559 len = symptr->length;
49704364
WL
2560 break;
2561 }
2562
f337b084 2563 if (len) {
a7a3cfaa 2564 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2565
a7a3cfaa
TH
2566 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2567 /* We can process this letter. */
2568 STRLEN size = props & PACK_SIZE_MASK;
2569 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2570 }
f337b084
TH
2571 }
2572
49704364
WL
2573 /* Look ahead for next symbol. Do we have code/code? */
2574 lookahead = *symptr;
2575 found = next_symbol(&lookahead);
246f24af
TH
2576 if (symptr->flags & FLAG_SLASH) {
2577 IV count;
f337b084 2578 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2579 if (strchr("aAZ", lookahead.code)) {
2580 if (lookahead.howlen == e_number) count = lookahead.length;
2581 else {
2582 if (items > 0)
2583 count = DO_UTF8(*beglist) ?
2584 sv_len_utf8(*beglist) : sv_len(*beglist);
2585 else count = 0;
2586 if (lookahead.code == 'Z') count++;
2587 }
2588 } else {
2589 if (lookahead.howlen == e_number && lookahead.length < items)
2590 count = lookahead.length;
2591 else count = items;
2592 }
2593 lookahead.howlen = e_number;
2594 lookahead.length = count;
2595 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2596 }
49704364 2597
fc241834
RGS
2598 /* Code inside the switch must take care to properly update
2599 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2600 doesn't simply leave using break */
1109a392 2601 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2602 default:
f337b084
TH
2603 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2604 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2605 case '%':
49704364 2606 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2607 {
2608 char *from;
2609#ifdef PERL_PACK_CAN_SHRIEKSIGN
2610 case '.' | TYPE_IS_SHRIEKING:
2611#endif
2612 case '.':
2613 if (howlen == e_star) from = start;
2614 else if (len == 0) from = cur;
2615 else {
2616 tempsym_t *group = symptr;
2617
2618 while (--len && group) group = group->previous;
2619 from = group ? start + group->strbeg : start;
2620 }
2621 fromstr = NEXTFROM;
2622 len = SvIV(fromstr);
2623 goto resize;
2624#ifdef PERL_PACK_CAN_SHRIEKSIGN
2625 case '@' | TYPE_IS_SHRIEKING:
2626#endif
a6ec74c1 2627 case '@':
28be1210
TH
2628 from = start + symptr->strbeg;
2629 resize:
2630#ifdef PERL_PACK_CAN_SHRIEKSIGN
2631 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2632#else /* PERL_PACK_CAN_SHRIEKSIGN */
2633 if (utf8)
2634#endif
2635 if (len >= 0) {
2636 while (len && from < cur) {
2637 from += UTF8SKIP(from);
2638 len--;
2639 }
2640 if (from > cur)
2641 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2642 if (len) {
2643 /* Here we know from == cur */
2644 grow:
2645 GROWING(0, cat, start, cur, len);
2646 Zero(cur, len, char);
2647 cur += len;
2648 } else if (from < cur) {
2649 len = cur - from;
2650 goto shrink;
2651 } else goto no_change;
2652 } else {
2653 cur = from;
2654 len = -len;
2655 goto utf8_shrink;
f337b084 2656 }
28be1210
TH
2657 else {
2658 len -= cur - from;
f337b084 2659 if (len > 0) goto grow;
28be1210 2660 if (len == 0) goto no_change;
fc241834 2661 len = -len;
28be1210 2662 goto shrink;
f337b084 2663 }
a6ec74c1 2664 break;
28be1210 2665 }
fc241834 2666 case '(': {
49704364 2667 tempsym_t savsym = *symptr;
66c611c5
MHM
2668 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2669 symptr->flags |= group_modifiers;
49704364
WL
2670 symptr->patend = savsym.grpend;
2671 symptr->level++;
f337b084 2672 symptr->previous = &lookahead;
18529408 2673 while (len--) {
f337b084
TH
2674 U32 was_utf8;
2675 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2676 else symptr->flags &= ~FLAG_PARSE_UTF8;
2677 was_utf8 = SvUTF8(cat);
49704364 2678 symptr->patptr = savsym.grpbeg;
f337b084
TH
2679 beglist = pack_rec(cat, symptr, beglist, endlist);
2680 if (SvUTF8(cat) != was_utf8)
2681 /* This had better be an upgrade while in utf8==0 mode */
2682 utf8 = 1;
2683
49704364 2684 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2685 break; /* No way to continue */
2686 }
f337b084
TH
2687 lookahead.flags = symptr->flags & ~group_modifiers;
2688 goto no_change;
18529408 2689 }
62f95557
IZ
2690 case 'X' | TYPE_IS_SHRIEKING:
2691 if (!len) /* Avoid division by 0 */
2692 len = 1;
f337b084
TH
2693 if (utf8) {
2694 char *hop, *last;
2695 I32 l = len;
2696 hop = last = start;
2697 while (hop < cur) {
2698 hop += UTF8SKIP(hop);
2699 if (--l == 0) {
2700 last = hop;
2701 l = len;
2702 }
2703 }
2704 if (last > cur)
2705 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2706 cur = last;
2707 break;
2708 }
2709 len = (cur-start) % len;
62f95557 2710 /* FALL THROUGH */
a6ec74c1 2711 case 'X':
f337b084
TH
2712 if (utf8) {
2713 if (len < 1) goto no_change;
28be1210 2714 utf8_shrink:
f337b084
TH
2715 while (len > 0) {
2716 if (cur <= start)
28be1210
TH
2717 Perl_croak(aTHX_ "'%c' outside of string in pack",
2718 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2719 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2720 if (cur <= start)
28be1210
TH
2721 Perl_croak(aTHX_ "'%c' outside of string in pack",
2722 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2723 }
2724 len--;
2725 }
2726 } else {
fc241834 2727 shrink:
f337b084 2728 if (cur - start < len)
28be1210
TH
2729 Perl_croak(aTHX_ "'%c' outside of string in pack",
2730 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2731 cur -= len;
2732 }
2733 if (cur < start+symptr->strbeg) {
2734 /* Make sure group starts don't point into the void */
2735 tempsym_t *group;
9e27e96a 2736 const STRLEN length = cur-start;
f337b084
TH
2737 for (group = symptr;
2738 group && length < group->strbeg;
2739 group = group->previous) group->strbeg = length;
2740 lookahead.strbeg = length;
2741 }
a6ec74c1 2742 break;
fc241834
RGS
2743 case 'x' | TYPE_IS_SHRIEKING: {
2744 I32 ai32;
62f95557
IZ
2745 if (!len) /* Avoid division by 0 */
2746 len = 1;
230e1fce 2747 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2748 else ai32 = (cur - start) % len;
2749 if (ai32 == 0) goto no_change;
2750 len -= ai32;
2751 }
2752 /* FALL THROUGH */
a6ec74c1 2753 case 'x':
f337b084 2754 goto grow;
a6ec74c1
JH
2755 case 'A':
2756 case 'Z':
f337b084 2757 case 'a': {
f7fe979e 2758 const char *aptr;
f337b084 2759
a6ec74c1 2760 fromstr = NEXTFROM;
e62f0680 2761 aptr = SvPV_const(fromstr, fromlen);
f337b084 2762 if (DO_UTF8(fromstr)) {
f7fe979e 2763 const char *end, *s;
f337b084
TH
2764
2765 if (!utf8 && !SvUTF8(cat)) {
2766 marked_upgrade(aTHX_ cat, symptr);
2767 lookahead.flags |= FLAG_DO_UTF8;
2768 lookahead.strbeg = symptr->strbeg;
2769 utf8 = 1;
2770 start = SvPVX(cat);
2771 cur = start + SvCUR(cat);
2772 }
fc241834 2773 if (howlen == e_star) {
f337b084
TH
2774 if (utf8) goto string_copy;
2775 len = fromlen+1;
2776 }
2777 s = aptr;
2778 end = aptr + fromlen;
2779 fromlen = datumtype == 'Z' ? len-1 : len;
2780 while ((I32) fromlen > 0 && s < end) {
2781 s += UTF8SKIP(s);
2782 fromlen--;
2783 }
2784 if (s > end)
2785 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2786 if (utf8) {
fc241834 2787 len = fromlen;
f337b084
TH
2788 if (datumtype == 'Z') len++;
2789 fromlen = s-aptr;
2790 len += fromlen;
fc241834 2791
f337b084 2792 goto string_copy;
fc241834 2793 }
f337b084
TH
2794 fromlen = len - fromlen;
2795 if (datumtype == 'Z') fromlen--;
2796 if (howlen == e_star) {
2797 len = fromlen;
2798 if (datumtype == 'Z') len++;
fc241834 2799 }
f337b084 2800 GROWING(0, cat, start, cur, len);
fc241834 2801 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2802 datumtype | TYPE_IS_PACK))
2803 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2804 cur += fromlen;
a6ec74c1 2805 len -= fromlen;
f337b084
TH
2806 } else if (utf8) {
2807 if (howlen == e_star) {
2808 len = fromlen;
2809 if (datumtype == 'Z') len++;
a6ec74c1 2810 }
f337b084
TH
2811 if (len <= (I32) fromlen) {
2812 fromlen = len;
2813 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2814 }
fc241834 2815 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2816 upgrade, so:
2817 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2818 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2819 len -= fromlen;
2820 while (fromlen > 0) {
230e1fce 2821 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2822 aptr++;
2823 fromlen--;
fc241834 2824 }
f337b084
TH
2825 } else {
2826 string_copy:
2827 if (howlen == e_star) {
2828 len = fromlen;
2829 if (datumtype == 'Z') len++;
2830 }
2831 if (len <= (I32) fromlen) {
2832 fromlen = len;
2833 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2834 }
f337b084
TH
2835 GROWING(0, cat, start, cur, len);
2836 Copy(aptr, cur, fromlen, char);
2837 cur += fromlen;
2838 len -= fromlen;
a6ec74c1 2839 }
f337b084
TH
2840 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2841 cur += len;
a6ec74c1 2842 break;
f337b084 2843 }
a6ec74c1 2844 case 'B':
f337b084 2845 case 'b': {
b83604b4 2846 const char *str, *end;
f337b084
TH
2847 I32 l, field_len;
2848 U8 bits;
2849 bool utf8_source;
2850 U32 utf8_flags;
a6ec74c1 2851
fc241834 2852 fromstr = NEXTFROM;
b83604b4 2853 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2854 end = str + fromlen;
2855 if (DO_UTF8(fromstr)) {
2856 utf8_source = TRUE;
041457d9 2857 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2858 } else {
2859 utf8_source = FALSE;
2860 utf8_flags = 0; /* Unused, but keep compilers happy */
2861 }
2862 if (howlen == e_star) len = fromlen;
2863 field_len = (len+7)/8;
2864 GROWING(utf8, cat, start, cur, field_len);
2865 if (len > (I32)fromlen) len = fromlen;
2866 bits = 0;
2867 l = 0;
2868 if (datumtype == 'B')
2869 while (l++ < len) {
2870 if (utf8_source) {
2871 UV val;
2872 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2873 bits |= val & 1;
2874 } else bits |= *str++ & 1;
2875 if (l & 7) bits <<= 1;
fc241834 2876 else {
f337b084
TH
2877 PUSH_BYTE(utf8, cur, bits);
2878 bits = 0;
a6ec74c1
JH
2879 }
2880 }
f337b084
TH
2881 else
2882 /* datumtype == 'b' */
2883 while (l++ < len) {
2884 if (utf8_source) {
2885 UV val;
2886 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2887 if (val & 1) bits |= 0x80;
2888 } else if (*str++ & 1)
2889 bits |= 0x80;
2890 if (l & 7) bits >>= 1;
fc241834 2891 else {
f337b084
TH
2892 PUSH_BYTE(utf8, cur, bits);
2893 bits = 0;
a6ec74c1
JH
2894 }
2895 }
f337b084
TH
2896 l--;
2897 if (l & 7) {
fc241834 2898 if (datumtype == 'B')
f337b084 2899 bits <<= 7 - (l & 7);
fc241834 2900 else
f337b084
TH
2901 bits >>= 7 - (l & 7);
2902 PUSH_BYTE(utf8, cur, bits);
2903 l += 7;
a6ec74c1 2904 }
f337b084
TH
2905 /* Determine how many chars are left in the requested field */
2906 l /= 8;
2907 if (howlen == e_star) field_len = 0;
2908 else field_len -= l;
2909 Zero(cur, field_len, char);
2910 cur += field_len;
a6ec74c1 2911 break;
f337b084 2912 }
a6ec74c1 2913 case 'H':
f337b084 2914 case 'h': {
10516c54 2915 const char *str, *end;
f337b084
TH
2916 I32 l, field_len;
2917 U8 bits;
2918 bool utf8_source;
2919 U32 utf8_flags;
a6ec74c1 2920
fc241834 2921 fromstr = NEXTFROM;
10516c54 2922 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2923 end = str + fromlen;
2924 if (DO_UTF8(fromstr)) {
2925 utf8_source = TRUE;
041457d9 2926 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2927 } else {
2928 utf8_source = FALSE;
2929 utf8_flags = 0; /* Unused, but keep compilers happy */
2930 }
2931 if (howlen == e_star) len = fromlen;
2932 field_len = (len+1)/2;
2933 GROWING(utf8, cat, start, cur, field_len);
2934 if (!utf8 && len > (I32)fromlen) len = fromlen;
2935 bits = 0;
2936 l = 0;
2937 if (datumtype == 'H')
2938 while (l++ < len) {
2939 if (utf8_source) {
2940 UV val;
2941 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2942 if (val < 256 && isALPHA(val))
2943 bits |= (val + 9) & 0xf;
a6ec74c1 2944 else
f337b084
TH
2945 bits |= val & 0xf;
2946 } else if (isALPHA(*str))
2947 bits |= (*str++ + 9) & 0xf;
2948 else
2949 bits |= *str++ & 0xf;
2950 if (l & 1) bits <<= 4;
fc241834 2951 else {
f337b084
TH
2952 PUSH_BYTE(utf8, cur, bits);
2953 bits = 0;
a6ec74c1
JH
2954 }
2955 }
f337b084
TH
2956 else
2957 while (l++ < len) {
2958 if (utf8_source) {
2959 UV val;
2960 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2961 if (val < 256 && isALPHA(val))
2962 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2963 else
f337b084
TH
2964 bits |= (val & 0xf) << 4;
2965 } else if (isALPHA(*str))
2966 bits |= ((*str++ + 9) & 0xf) << 4;
2967 else
2968 bits |= (*str++ & 0xf) << 4;
2969 if (l & 1) bits >>= 4;
fc241834 2970 else {
f337b084
TH
2971 PUSH_BYTE(utf8, cur, bits);
2972 bits = 0;
a6ec74c1 2973 }
fc241834 2974 }
f337b084
TH
2975 l--;
2976 if (l & 1) {
2977 PUSH_BYTE(utf8, cur, bits);
2978 l++;
2979 }
2980 /* Determine how many chars are left in the requested field */
2981 l /= 2;
2982 if (howlen == e_star) field_len = 0;
2983 else field_len -= l;
2984 Zero(cur, field_len, char);
2985 cur += field_len;
2986 break;
fc241834
RGS
2987 }
2988 case 'c':
f337b084
TH
2989 while (len-- > 0) {
2990 IV aiv;
2991 fromstr = NEXTFROM;
2992 aiv = SvIV(fromstr);
2993 if ((-128 > aiv || aiv > 127) &&
2994 ckWARN(WARN_PACK))
2995 Perl_warner(aTHX_ packWARN(WARN_PACK),
2996 "Character in 'c' format wrapped in pack");
585ec06d 2997 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2998 }
2999 break;
3000 case 'C':
f337b084
TH
3001 if (len == 0) {
3002 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3003 break;
3004 }
3005 GROWING(0, cat, start, cur, len);
a6ec74c1 3006 while (len-- > 0) {
f337b084 3007 IV aiv;
a6ec74c1 3008 fromstr = NEXTFROM;
f337b084
TH
3009 aiv = SvIV(fromstr);
3010 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
3011 ckWARN(WARN_PACK))
3012 Perl_warner(aTHX_ packWARN(WARN_PACK),
3013 "Character in 'C' format wrapped in pack");
585ec06d 3014 *cur++ = (char)(aiv & 0xff);
f337b084 3015 }
fc241834
RGS
3016 break;
3017 case 'W': {
3018 char *end;
3019 U8 in_bytes = IN_BYTES;
3020
3021 end = start+SvLEN(cat)-1;
3022 if (utf8) end -= UTF8_MAXLEN-1;
3023 while (len-- > 0) {
3024 UV auv;
3025 fromstr = NEXTFROM;
3026 auv = SvUV(fromstr);
3027 if (in_bytes) auv = auv % 0x100;
3028 if (utf8) {
3029 W_utf8:
3030 if (cur > end) {
3031 *cur = '\0';
b162af07 3032 SvCUR_set(cat, cur - start);
fc241834
RGS
3033
3034 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3035 end = start+SvLEN(cat)-UTF8_MAXLEN;
3036 }
230e1fce
NC
3037 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3038 NATIVE_TO_UNI(auv),
041457d9 3039 warn_utf8 ?
230e1fce 3040 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3041 } else {
3042 if (auv >= 0x100) {
3043 if (!SvUTF8(cat)) {
3044 *cur = '\0';
b162af07 3045 SvCUR_set(cat, cur - start);
fc241834
RGS
3046 marked_upgrade(aTHX_ cat, symptr);
3047 lookahead.flags |= FLAG_DO_UTF8;
3048 lookahead.strbeg = symptr->strbeg;
3049 utf8 = 1;
3050 start = SvPVX(cat);
3051 cur = start + SvCUR(cat);
3052 end = start+SvLEN(cat)-UTF8_MAXLEN;
3053 goto W_utf8;
3054 }
3055 if (ckWARN(WARN_PACK))
3056 Perl_warner(aTHX_ packWARN(WARN_PACK),
3057 "Character in 'W' format wrapped in pack");
3058 auv &= 0xff;
3059 }
3060 if (cur >= end) {
3061 *cur = '\0';
b162af07 3062 SvCUR_set(cat, cur - start);
fc241834
RGS
3063 GROWING(0, cat, start, cur, len+1);
3064 end = start+SvLEN(cat)-1;
3065 }
fe2774ed 3066 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3067 }
3068 }
3069 break;
fc241834
RGS
3070 }
3071 case 'U': {
3072 char *end;
3073
3074 if (len == 0) {
3075 if (!(symptr->flags & FLAG_DO_UTF8)) {
3076 marked_upgrade(aTHX_ cat, symptr);
3077 lookahead.flags |= FLAG_DO_UTF8;
3078 lookahead.strbeg = symptr->strbeg;
3079 }
3080 utf8 = 0;
3081 goto no_change;
3082 }
3083
3084 end = start+SvLEN(cat);
3085 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3086 while (len-- > 0) {
fc241834 3087 UV auv;
a6ec74c1 3088 fromstr = NEXTFROM;
fc241834
RGS
3089 auv = SvUV(fromstr);
3090 if (utf8) {
230e1fce 3091 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3092 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3093 warn_utf8 ?
fc241834
RGS
3094 0 : UNICODE_ALLOW_ANY);
3095 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3096 *cur = '\0';
b162af07 3097 SvCUR_set(cat, cur - start);
fc241834
RGS
3098 GROWING(0, cat, start, cur,
3099 len+(endb-buffer)*UTF8_EXPAND);
3100 end = start+SvLEN(cat);
3101 }
3102 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3103 } else {
3104 if (cur >= end) {
3105 *cur = '\0';
b162af07 3106 SvCUR_set(cat, cur - start);
fc241834
RGS
3107 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3108 end = start+SvLEN(cat)-UTF8_MAXLEN;
3109 }
230e1fce 3110 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3111 warn_utf8 ?
230e1fce 3112 0 : UNICODE_ALLOW_ANY);
fc241834 3113 }
a6ec74c1 3114 }
a6ec74c1 3115 break;
fc241834 3116 }
a6ec74c1
JH
3117 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3118 case 'f':
a6ec74c1 3119 while (len-- > 0) {
f337b084
TH
3120 float afloat;
3121 NV anv;
a6ec74c1 3122 fromstr = NEXTFROM;
f337b084 3123 anv = SvNV(fromstr);
5cdb9e01 3124#ifdef __VOS__
f337b084 3125 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3126 during conversion from double to float into infinity, so we
3127 do it by hand. This code should either be generalized for
3128 any OS that needs it, or removed if and when VOS implements
3129 posix-976 (suggestion to support mapping to infinity).
3130 Paul.Green@stratus.com 02-04-02. */
f337b084 3131 if (anv > FLT_MAX)
fc241834 3132 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3133 else if (anv < -FLT_MAX)
fc241834 3134 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3135 else afloat = (float) anv;
3136#else /* __VOS__ */
baf3cf9c 3137# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3138 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3139 * on Alpha; fake it if we don't have them.
3140 */
f337b084 3141 if (anv > FLT_MAX)
fc241834 3142 afloat = FLT_MAX;
f337b084 3143 else if (anv < -FLT_MAX)
fc241834 3144 afloat = -FLT_MAX;
f337b084 3145 else afloat = (float)anv;
baf3cf9c 3146# else
f337b084 3147 afloat = (float)anv;
baf3cf9c 3148# endif
f337b084 3149#endif /* __VOS__ */
1109a392 3150 DO_BO_PACK_N(afloat, float);
f337b084 3151 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3152 }
3153 break;
3154 case 'd':
a6ec74c1 3155 while (len-- > 0) {
f337b084
TH
3156 double adouble;
3157 NV anv;
a6ec74c1 3158 fromstr = NEXTFROM;
f337b084 3159 anv = SvNV(fromstr);
5cdb9e01 3160#ifdef __VOS__
f337b084 3161 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3162 during conversion from long double to double into infinity,
3163 so we do it by hand. This code should either be generalized
3164 for any OS that needs it, or removed if and when VOS
3165 implements posix-976 (suggestion to support mapping to
3166 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3167 if (anv > DBL_MAX)
fc241834 3168 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3169 else if (anv < -DBL_MAX)
fc241834 3170 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3171 else adouble = (double) anv;
3172#else /* __VOS__ */
baf3cf9c 3173# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3174 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3175 * on Alpha; fake it if we don't have them.
3176 */
f337b084 3177 if (anv > DBL_MAX)
fc241834 3178 adouble = DBL_MAX;
f337b084 3179 else if (anv < -DBL_MAX)
fc241834 3180 adouble = -DBL_MAX;
f337b084 3181 else adouble = (double)anv;
baf3cf9c 3182# else
f337b084 3183 adouble = (double)anv;
baf3cf9c 3184# endif
f337b084 3185#endif /* __VOS__ */
1109a392 3186 DO_BO_PACK_N(adouble, double);
f337b084 3187 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3188 }
3189 break;
fc241834
RGS
3190 case 'F': {
3191 NV anv;
1109a392 3192 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3193 while (len-- > 0) {
3194 fromstr = NEXTFROM;
3195 anv = SvNV(fromstr);
1109a392 3196 DO_BO_PACK_N(anv, NV);
fc241834 3197 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3198 }
3199 break;
fc241834 3200 }
92d41999 3201#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3202 case 'D': {
3203 long double aldouble;
1109a392
MHM
3204 /* long doubles can have unused bits, which may be nonzero */
3205 Zero(&aldouble, 1, long double);
92d41999
JH
3206 while (len-- > 0) {
3207 fromstr = NEXTFROM;
3208 aldouble = (long double)SvNV(fromstr);
1109a392 3209 DO_BO_PACK_N(aldouble, long double);
fc241834 3210 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3211 }
3212 break;
fc241834 3213 }
92d41999 3214#endif
7212898e 3215#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3216 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3217#endif
a6ec74c1
JH
3218 case 'n':
3219 while (len-- > 0) {
f337b084 3220 I16 ai16;
a6ec74c1 3221 fromstr = NEXTFROM;
ef108786 3222 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3223#ifdef HAS_HTONS
ef108786 3224 ai16 = PerlSock_htons(ai16);
a6ec74c1 3225#endif
f337b084 3226 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3227 }
3228 break;
7212898e 3229#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3230 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3231#endif
a6ec74c1
JH
3232 case 'v':
3233 while (len-- > 0) {
f337b084 3234 I16 ai16;
a6ec74c1 3235 fromstr = NEXTFROM;
ef108786 3236 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3237#ifdef HAS_HTOVS
ef108786 3238 ai16 = htovs(ai16);
a6ec74c1 3239#endif
f337b084 3240 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3241 }
3242 break;
49704364 3243 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3244#if SHORTSIZE != SIZE16
fc241834 3245 while (len-- > 0) {
f337b084 3246 unsigned short aushort;
fc241834
RGS
3247 fromstr = NEXTFROM;
3248 aushort = SvUV(fromstr);
3249 DO_BO_PACK(aushort, s);
f337b084 3250 PUSH_VAR(utf8, cur, aushort);
fc241834 3251 }
49704364
WL
3252 break;
3253#else
3254 /* Fall through! */
a6ec74c1 3255#endif
49704364 3256 case 'S':
fc241834 3257 while (len-- > 0) {
f337b084 3258 U16 au16;
fc241834
RGS
3259 fromstr = NEXTFROM;
3260 au16 = (U16)SvUV(fromstr);
3261 DO_BO_PACK(au16, 16);
f337b084 3262 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3263 }
3264 break;
49704364 3265 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3266#if SHORTSIZE != SIZE16
fc241834 3267 while (len-- > 0) {
f337b084 3268 short ashort;
fc241834
RGS
3269 fromstr = NEXTFROM;
3270 ashort = SvIV(fromstr);
3271 DO_BO_PACK(ashort, s);
f337b084 3272 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3273 }
49704364
WL
3274 break;
3275#else
3276 /* Fall through! */
a6ec74c1 3277#endif
49704364
WL
3278 case 's':
3279 while (len-- > 0) {
f337b084 3280 I16 ai16;
49704364 3281 fromstr = NEXTFROM;
ef108786
MHM
3282 ai16 = (I16)SvIV(fromstr);
3283 DO_BO_PACK(ai16, 16);
f337b084 3284 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3285 }
3286 break;
3287 case 'I':
49704364 3288 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3289 while (len-- > 0) {
f337b084 3290 unsigned int auint;
a6ec74c1
JH
3291 fromstr = NEXTFROM;
3292 auint = SvUV(fromstr);
1109a392 3293 DO_BO_PACK(auint, i);
f337b084 3294 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3295 }
3296 break;
92d41999
JH
3297 case 'j':
3298 while (len-- > 0) {
f337b084 3299 IV aiv;
92d41999
JH
3300 fromstr = NEXTFROM;
3301 aiv = SvIV(fromstr);
1109a392
MHM
3302#if IVSIZE == INTSIZE
3303 DO_BO_PACK(aiv, i);
3304#elif IVSIZE == LONGSIZE
3305 DO_BO_PACK(aiv, l);
3306#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3307 DO_BO_PACK(aiv, 64);
f337b084
TH
3308#else
3309 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3310#endif
f337b084 3311 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3312 }
3313 break;
3314 case 'J':
3315 while (len-- > 0) {
f337b084 3316 UV auv;
92d41999
JH
3317 fromstr = NEXTFROM;
3318 auv = SvUV(fromstr);
1109a392
MHM
3319#if UVSIZE == INTSIZE
3320 DO_BO_PACK(auv, i);
3321#elif UVSIZE == LONGSIZE
3322 DO_BO_PACK(auv, l);
3323#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3324 DO_BO_PACK(auv, 64);
f337b084
TH
3325#else
3326 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3327#endif
f337b084 3328 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3329 }
3330 break;
a6ec74c1
JH
3331 case 'w':
3332 while (len-- > 0) {
f337b084 3333 NV anv;
a6ec74c1 3334 fromstr = NEXTFROM;
15e9f109 3335 anv = SvNV(fromstr);
a6ec74c1 3336
f337b084
TH
3337 if (anv < 0) {
3338 *cur = '\0';
b162af07 3339 SvCUR_set(cat, cur - start);
49704364 3340 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3341 }
a6ec74c1 3342
196b62db
NC
3343 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3344 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3345 any negative IVs will have already been got by the croak()
3346 above. IOK is untrue for fractions, so we test them
3347 against UV_MAX_P1. */
f337b084
TH
3348 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3349 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3350 char *in = buf + sizeof(buf);
196b62db 3351 UV auv = SvUV(fromstr);
a6ec74c1
JH
3352
3353 do {
eb160463 3354 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3355 auv >>= 7;
3356 } while (auv);
3357 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3358 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3359 in, (buf + sizeof(buf)) - in);
3360 } else if (SvPOKp(fromstr))
3361 goto w_string;
a6ec74c1 3362 else if (SvNOKp(fromstr)) {
0258719b
NC
3363 /* 10**NV_MAX_10_EXP is the largest power of 10
3364 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3365 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3366 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3367 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3368 Some C compilers are strict about integral constant
3369 expressions so we conservatively divide by a slightly
3370 smaller integer instead of multiplying by the exact
3371 floating-point value.
0258719b
NC
3372 */
3373#ifdef NV_MAX_10_EXP
f337b084 3374 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3375 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3376#else
f337b084 3377 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3378 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3379#endif
a6ec74c1
JH
3380 char *in = buf + sizeof(buf);
3381
8b6e33c7 3382 anv = Perl_floor(anv);
a6ec74c1 3383 do {
8b6e33c7 3384 const NV next = Perl_floor(anv / 128);
a6ec74c1 3385 if (in <= buf) /* this cannot happen ;-) */
49704364 3386 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3387 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3388 anv = next;
3389 } while (anv > 0);
a6ec74c1 3390 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3391 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3392 in, (buf + sizeof(buf)) - in);
3393 } else {
8b6e33c7
AL
3394 const char *from;
3395 char *result, *in;
735b914b
JH
3396 SV *norm;
3397 STRLEN len;
3398 bool done;
3399
f337b084 3400 w_string:
735b914b 3401 /* Copy string and check for compliance */
349d4f2f 3402 from = SvPV_const(fromstr, len);
735b914b 3403 if ((norm = is_an_int(from, len)) == NULL)
49704364 3404 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3405
a02a5408 3406 Newx(result, len, char);
735b914b
JH
3407 in = result + len;
3408 done = FALSE;
f337b084 3409 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3410 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3411 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3412 in, (result + len) - in);
735b914b
JH
3413 Safefree(result);
3414 SvREFCNT_dec(norm); /* free norm */
fc241834 3415 }
a6ec74c1
JH
3416 }
3417 break;
3418 case 'i':
49704364 3419 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3420 while (len-- > 0) {
f337b084 3421 int aint;
a6ec74c1
JH
3422 fromstr = NEXTFROM;
3423 aint = SvIV(fromstr);
1109a392 3424 DO_BO_PACK(aint, i);
f337b084 3425 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3426 }
3427 break;
7212898e 3428#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3429 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3430#endif
a6ec74c1
JH
3431 case 'N':
3432 while (len-- > 0) {
f337b084 3433 U32 au32;
a6ec74c1 3434 fromstr = NEXTFROM;
ef108786 3435 au32 = SvUV(fromstr);
a6ec74c1 3436#ifdef HAS_HTONL
ef108786 3437 au32 = PerlSock_htonl(au32);
a6ec74c1 3438#endif
f337b084 3439 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3440 }
3441 break;
7212898e 3442#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3443 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3444#endif
a6ec74c1
JH
3445 case 'V':
3446 while (len-- > 0) {
f337b084 3447 U32 au32;
a6ec74c1 3448 fromstr = NEXTFROM;
ef108786 3449 au32 = SvUV(fromstr);
a6ec74c1 3450#ifdef HAS_HTOVL
ef108786 3451 au32 = htovl(au32);
a6ec74c1 3452#endif
f337b084 3453 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3454 }
3455 break;
49704364 3456 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3457#if LONGSIZE != SIZE32
fc241834 3458 while (len-- > 0) {
f337b084 3459 unsigned long aulong;
fc241834
RGS
3460 fromstr = NEXTFROM;
3461 aulong = SvUV(fromstr);
3462 DO_BO_PACK(aulong, l);
f337b084 3463 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3464 }
49704364
WL
3465 break;
3466#else
3467 /* Fall though! */
a6ec74c1 3468#endif
49704364 3469 case 'L':
fc241834 3470 while (len-- > 0) {
f337b084 3471 U32 au32;
fc241834
RGS
3472 fromstr = NEXTFROM;
3473 au32 = SvUV(fromstr);
3474 DO_BO_PACK(au32, 32);
f337b084 3475 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3476 }
3477 break;
49704364 3478 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3479#if LONGSIZE != SIZE32
fc241834 3480 while (len-- > 0) {
f337b084 3481 long along;
fc241834
RGS
3482 fromstr = NEXTFROM;
3483 along = SvIV(fromstr);
3484 DO_BO_PACK(along, l);
f337b084 3485 PUSH_VAR(utf8, cur, along);
a6ec74c1 3486 }
49704364
WL
3487 break;
3488#else
3489 /* Fall though! */
a6ec74c1 3490#endif
49704364
WL
3491 case 'l':
3492 while (len-- > 0) {
f337b084 3493 I32 ai32;
49704364 3494 fromstr = NEXTFROM;
ef108786
MHM
3495 ai32 = SvIV(fromstr);
3496 DO_BO_PACK(ai32, 32);
f337b084 3497 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3498 }
3499 break;
3500#ifdef HAS_QUAD
3501 case 'Q':
3502 while (len-- > 0) {
f337b084 3503 Uquad_t auquad;
a6ec74c1 3504 fromstr = NEXTFROM;
f337b084 3505 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3506 DO_BO_PACK(auquad, 64);
f337b084 3507 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3508 }
3509 break;
3510 case 'q':
3511 while (len-- > 0) {
f337b084 3512 Quad_t aquad;
a6ec74c1
JH
3513 fromstr = NEXTFROM;
3514 aquad = (Quad_t)SvIV(fromstr);
1109a392 3515 DO_BO_PACK(aquad, 64);
f337b084 3516 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3517 }
3518 break;
f337b084 3519#endif /* HAS_QUAD */
a6ec74c1
JH
3520 case 'P':
3521 len = 1; /* assume SV is correct length */
f337b084 3522 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3523 /* Fall through! */
a6ec74c1
JH
3524 case 'p':
3525 while (len-- > 0) {
83003860 3526 const char *aptr;
f337b084 3527
a6ec74c1 3528 fromstr = NEXTFROM;
28a4f200
TH
3529 SvGETMAGIC(fromstr);
3530 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3531 else {
a6ec74c1
JH
3532 /* XXX better yet, could spirit away the string to
3533 * a safe spot and hang on to it until the result
3534 * of pack() (and all copies of the result) are
3535 * gone.
3536 */
041457d9
DM
3537 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3538 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3539 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3540 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3541 }
3542 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3543 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3544 else
2596d9fe 3545 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3546 }
07409e01 3547 DO_BO_PACK_PC(aptr);
f337b084 3548 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3549 }
3550 break;
fc241834 3551 case 'u': {
f7fe979e 3552 const char *aptr, *aend;
fc241834 3553 bool from_utf8;
f337b084 3554
a6ec74c1 3555 fromstr = NEXTFROM;
fc241834
RGS
3556 if (len <= 2) len = 45;
3557 else len = len / 3 * 3;
3558 if (len >= 64) {
0fdc08b6
RGS
3559 if (ckWARN(WARN_PACK))
3560 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3561 "Field too wide in 'u' format in pack");
3562 len = 63;
3563 }
83003860 3564 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3565 from_utf8 = DO_UTF8(fromstr);
3566 if (from_utf8) {
3567 aend = aptr + fromlen;
3568 fromlen = sv_len_utf8(fromstr);
3569 } else aend = NULL; /* Unused, but keep compilers happy */
3570 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3571 while (fromlen > 0) {
fc241834 3572 U8 *end;
a6ec74c1 3573 I32 todo;
fc241834 3574 U8 hunk[1+63/3*4+1];
a6ec74c1 3575
eb160463 3576 if ((I32)fromlen > len)
a6ec74c1
JH
3577 todo = len;
3578 else
3579 todo = fromlen;
fc241834
RGS
3580 if (from_utf8) {
3581 char buffer[64];
3582 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3583 'u' | TYPE_IS_PACK)) {
3584 *cur = '\0';
b162af07 3585 SvCUR_set(cat, cur - start);
fc241834
RGS
3586 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3587 }
3588 end = doencodes(hunk, buffer, todo);
3589 } else {
3590 end = doencodes(hunk, aptr, todo);
3591 aptr += todo;
3592 }
3593 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3594 fromlen -= todo;
3595 }
a6ec74c1
JH
3596 break;
3597 }
f337b084
TH
3598 }
3599 *cur = '\0';
b162af07 3600 SvCUR_set(cat, cur - start);
f337b084 3601 no_change:
49704364 3602 *symptr = lookahead;
a6ec74c1 3603 }
49704364 3604 return beglist;
18529408
IZ
3605}
3606#undef NEXTFROM
3607
3608
3609PP(pp_pack)
3610{
97aff369 3611 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3612 register SV *cat = TARG;
3613 STRLEN fromlen;
349d4f2f
NC
3614 SV *pat_sv = *++MARK;
3615 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3616 register const char *patend = pat + fromlen;
18529408
IZ
3617
3618 MARK++;
3619 sv_setpvn(cat, "", 0);
f337b084 3620 SvUTF8_off(cat);
18529408 3621
7accc089 3622 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3623
a6ec74c1
JH
3624 SvSETMAGIC(cat);
3625 SP = ORIGMARK;
3626 PUSHs(cat);
3627 RETURN;
3628}
a6ec74c1 3629
73cb7263
NC
3630/*
3631 * Local variables:
3632 * c-indentation-style: bsd
3633 * c-basic-offset: 4
3634 * indent-tabs-mode: t
3635 * End:
3636 *
37442d52
RGS
3637 * ex: set ts=8 sts=4 sw=4 noet:
3638 */