This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
use NOOP macro
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
f337b084 705STATIC void
f7fe979e 706bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 707 U8 buffer[UTF8_MAXLEN];
0bcc34c2 708 const U8 * const end = start + len;
f337b084
TH
709 char *d = *dest;
710 while (start < end) {
f7fe979e 711 const int length =
f337b084
TH
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
713 switch(length) {
714 case 1:
715 *d++ = buffer[0];
716 break;
717 case 2:
718 *d++ = buffer[0];
719 *d++ = buffer[1];
720 break;
721 default:
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
723 *start, length);
724 }
725 start++;
726 }
727 *dest = d;
728}
729
230e1fce
NC
730#define PUSH_BYTES(utf8, cur, buf, len) \
731STMT_START { \
732 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
733 else { \
734 Copy(buf, cur, len, char); \
735 (cur) += (len); \
736 } \
f337b084
TH
737} STMT_END
738
739#define GROWING(utf8, cat, start, cur, in_len) \
740STMT_START { \
741 STRLEN glen = (in_len); \
3473cf63 742 if (utf8) glen *= UTF8_EXPAND; \
f337b084 743 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 744 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
745 (cur) = (start) + SvCUR(cat); \
746 } \
747} STMT_END
748
749#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
750STMT_START { \
f7fe979e 751 const STRLEN glen = (in_len); \
f337b084 752 STRLEN gl = glen; \
3473cf63 753 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
754 if ((cur) + gl >= (start) + SvLEN(cat)) { \
755 *cur = '\0'; \
b162af07 756 SvCUR_set((cat), (cur) - (start)); \
0bd48802 757 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
758 (cur) = (start) + SvCUR(cat); \
759 } \
760 PUSH_BYTES(utf8, cur, buf, glen); \
761} STMT_END
762
763#define PUSH_BYTE(utf8, s, byte) \
764STMT_START { \
765 if (utf8) { \
f7fe979e 766 const U8 au8 = (byte); \
f337b084
TH
767 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
768 } else *(U8 *)(s)++ = (byte); \
769} STMT_END
770
771/* Only to be used inside a loop (see the break) */
772#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773STMT_START { \
774 STRLEN retlen; \
775 if (str >= end) break; \
776 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
777 if (retlen == (STRLEN) -1 || retlen == 0) { \
778 *cur = '\0'; \
779 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
780 } \
781 str += retlen; \
782} STMT_END
783
f7fe979e
AL
784static const char *_action( const tempsym_t* symptr )
785{
786 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
787}
788
206947d2 789/* Returns the sizeof() struct described by pat */
028d1f6d 790STATIC I32
f337b084 791S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 792{
f337b084 793 I32 total = 0;
206947d2 794
49704364 795 while (next_symbol(symptr)) {
f337b084 796 I32 len;
f7fe979e 797 int size;
f337b084
TH
798
799 switch (symptr->howlen) {
fc241834 800 case e_star:
49704364 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 802 _action( symptr ) );
49704364 803 break;
f337b084
TH
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
49704364
WL
808 }
809
a7a3cfaa 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 811 if (!size) {
f7fe979e 812 int star;
80a13697
NC
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 818 _action( symptr ) );
28be1210
TH
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
80a13697 823 case '@':
28be1210 824 case '.':
80a13697
NC
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 830 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 831 _action( symptr ) );
80a13697
NC
832 case '%':
833 size = 0;
834 break;
835 case '(':
fc241834
RGS
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
80a13697
NC
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
f7fe979e 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
80a13697
NC
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
78d46eaa 885
80a13697
NC
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
78d46eaa
NC
889 break;
890 }
206947d2
IZ
891 }
892 total += len * size;
893 }
894 return total;
895}
896
49704364
WL
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
f7fe979e
AL
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 903{
49704364 904 while (patptr < patend) {
f7fe979e 905 const char c = *patptr++;
49704364
WL
906
907 if (isSPACE(c))
908 continue;
909 else if (c == ender)
910 return patptr-1;
911 else if (c == '#') {
912 while (patptr < patend && *patptr != '\n')
913 patptr++;
914 continue;
915 } else if (c == '(')
916 patptr = group_end(patptr, patend, ')') + 1;
917 else if (c == '[')
918 patptr = group_end(patptr, patend, ']') + 1;
18529408 919 }
49704364
WL
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
921 ender);
922 return 0;
18529408
IZ
923}
924
49704364
WL
925
926/* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
fc241834 929 */
f7fe979e
AL
930STATIC const char *
931S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
932{
933 I32 len = *patptr++ - '0';
934 while (isDIGIT(*patptr)) {
935 if (len >= 0x7FFFFFFF/10)
936 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
937 len = (len * 10) + (*patptr++ - '0');
938 }
939 *lenptr = len;
940 return patptr;
941}
942
943/* The marvellous template parsing routine: Using state stored in *symptr,
944 * locates next template code and count
945 */
946STATIC bool
f337b084 947S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 948{
f7fe979e 949 const char* patptr = symptr->patptr;
0bcc34c2 950 const char* const patend = symptr->patend;
49704364
WL
951
952 symptr->flags &= ~FLAG_SLASH;
953
954 while (patptr < patend) {
955 if (isSPACE(*patptr))
956 patptr++;
957 else if (*patptr == '#') {
958 patptr++;
959 while (patptr < patend && *patptr != '\n')
960 patptr++;
961 if (patptr < patend)
962 patptr++;
963 } else {
fc241834 964 /* We should have found a template code */
49704364 965 I32 code = *patptr++ & 0xFF;
66c611c5 966 U32 inherited_modifiers = 0;
49704364
WL
967
968 if (code == ','){ /* grandfather in commas but with a warning */
969 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
970 symptr->flags |= FLAG_COMMA;
971 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 972 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
973 }
974 continue;
975 }
fc241834 976
49704364 977 /* for '(', skip to ')' */
fc241834 978 if (code == '(') {
49704364
WL
979 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
980 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 981 _action( symptr ) );
49704364
WL
982 symptr->grpbeg = patptr;
983 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
984 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
985 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 986 _action( symptr ) );
49704364
WL
987 }
988
66c611c5
MHM
989 /* look for group modifiers to inherit */
990 if (TYPE_ENDIANNESS(symptr->flags)) {
991 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
992 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
993 }
994
1109a392
MHM
995 /* look for modifiers */
996 while (patptr < patend) {
b7787f18
AL
997 const char *allowed;
998 I32 modifier;
1109a392
MHM
999 switch (*patptr) {
1000 case '!':
1001 modifier = TYPE_IS_SHRIEKING;
7212898e 1002 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1003 break;
7212898e 1004#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1005 case '>':
1006 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1008 break;
1009 case '<':
1010 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1011 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1012 break;
f337b084 1013#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1014 default:
b7787f18
AL
1015 allowed = "";
1016 modifier = 0;
1109a392
MHM
1017 break;
1018 }
66c611c5 1019
1109a392
MHM
1020 if (modifier == 0)
1021 break;
66c611c5 1022
1109a392
MHM
1023 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1024 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1025 allowed, _action( symptr ) );
66c611c5
MHM
1026
1027 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1028 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1029 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1030 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1031 TYPE_ENDIANNESS_MASK)
1032 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1033 *patptr, _action( symptr ) );
66c611c5 1034
041457d9 1035 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1036 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1037 "Duplicate modifier '%c' after '%c' in %s",
1038 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1039 _action( symptr ) );
1109a392 1040 }
66c611c5 1041
1109a392
MHM
1042 code |= modifier;
1043 patptr++;
49704364
WL
1044 }
1045
66c611c5
MHM
1046 /* inherit modifiers */
1047 code |= inherited_modifiers;
1048
fc241834 1049 /* look for count and/or / */
49704364
WL
1050 if (patptr < patend) {
1051 if (isDIGIT(*patptr)) {
1052 patptr = get_num( patptr, &symptr->length );
1053 symptr->howlen = e_number;
1054
1055 } else if (*patptr == '*') {
1056 patptr++;
1057 symptr->howlen = e_star;
1058
1059 } else if (*patptr == '[') {
f7fe979e 1060 const char* lenptr = ++patptr;
49704364
WL
1061 symptr->howlen = e_number;
1062 patptr = group_end( patptr, patend, ']' ) + 1;
1063 /* what kind of [] is it? */
1064 if (isDIGIT(*lenptr)) {
1065 lenptr = get_num( lenptr, &symptr->length );
1066 if( *lenptr != ']' )
1067 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1068 _action( symptr ) );
49704364
WL
1069 } else {
1070 tempsym_t savsym = *symptr;
1071 symptr->patend = patptr-1;
1072 symptr->patptr = lenptr;
1073 savsym.length = measure_struct(symptr);
1074 *symptr = savsym;
1075 }
1076 } else {
1077 symptr->howlen = e_no_len;
1078 symptr->length = 1;
1079 }
1080
1081 /* try to find / */
1082 while (patptr < patend) {
1083 if (isSPACE(*patptr))
1084 patptr++;
1085 else if (*patptr == '#') {
1086 patptr++;
1087 while (patptr < patend && *patptr != '\n')
1088 patptr++;
1089 if (patptr < patend)
1090 patptr++;
1091 } else {
66c611c5 1092 if (*patptr == '/') {
49704364
WL
1093 symptr->flags |= FLAG_SLASH;
1094 patptr++;
66c611c5
MHM
1095 if (patptr < patend &&
1096 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1097 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1098 _action( symptr ) );
49704364
WL
1099 }
1100 break;
1101 }
18529408 1102 }
49704364
WL
1103 } else {
1104 /* at end - no count, no / */
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 symptr->code = code;
fc241834 1110 symptr->patptr = patptr;
49704364 1111 return TRUE;
18529408 1112 }
49704364 1113 }
fc241834 1114 symptr->patptr = patptr;
49704364 1115 return FALSE;
18529408
IZ
1116}
1117
18529408 1118/*
fc241834 1119 There is no way to cleanly handle the case where we should process the
08ca2aa3 1120 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1121 (e.g. estimates like strend-s as an upper bound for the number of
1122 characters left wouldn't work). So if we foresee the need of this
1123 (pattern starts with U or contains U0), we want to work on the encoded
1124 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1125 themselves if they need to do a lot of unpacks like this on it
1126*/
fc241834 1127STATIC bool
08ca2aa3
TH
1128need_utf8(const char *pat, const char *patend)
1129{
1130 bool first = TRUE;
1131 while (pat < patend) {
1132 if (pat[0] == '#') {
1133 pat++;
f7fe979e 1134 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1135 if (!pat) return FALSE;
1136 } else if (pat[0] == 'U') {
1137 if (first || pat[1] == '0') return TRUE;
1138 } else first = FALSE;
1139 pat++;
1140 }
1141 return FALSE;
1142}
1143
1144STATIC char
1145first_symbol(const char *pat, const char *patend) {
1146 while (pat < patend) {
1147 if (pat[0] != '#') return pat[0];
1148 pat++;
f7fe979e 1149 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1150 if (!pat) return 0;
1151 pat++;
1152 }
1153 return 0;
1154}
1155
1156/*
7accc089
JH
1157=for apidoc unpackstring
1158
608d3aed
WL
1159The engine implementing unpack() Perl function. C<unpackstring> puts the
1160extracted list items on the stack and returns the number of elements.
1161Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1162
1163=cut */
1164
1165I32
f7fe979e 1166Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1167{
f7fe979e 1168 tempsym_t sym;
08ca2aa3 1169
f337b084 1170 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1171 else if (need_utf8(pat, patend)) {
1172 /* We probably should try to avoid this in case a scalar context call
1173 wouldn't get to the "U0" */
1174 STRLEN len = strend - s;
230e1fce 1175 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1176 SAVEFREEPV(s);
1177 strend = s + len;
f337b084 1178 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1179 }
1180
f337b084
TH
1181 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1182 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1183
f7fe979e 1184 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1185
1186 return unpack_rec(&sym, s, s, strend, NULL );
1187}
1188
49704364
WL
1189STATIC
1190I32
f7fe979e 1191S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1192{
27da23d5 1193 dVAR; dSP;
18529408 1194 SV *sv;
f7fe979e 1195 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1196 howlen_t howlen;
a6ec74c1 1197
a6ec74c1 1198 I32 checksum = 0;
92d41999 1199 UV cuv = 0;
a6ec74c1 1200 NV cdouble = 0.0;
f337b084 1201 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1202 bool beyond = FALSE;
21c16052 1203 bool explicit_length;
9e27e96a 1204 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1205 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1206 symptr->strbeg = s - strbeg;
49704364 1207
49704364 1208 while (next_symbol(symptr)) {
a7a3cfaa 1209 packprops_t props;
9e27e96a 1210 I32 len;
f337b084 1211 I32 datumtype = symptr->code;
206947d2 1212 /* do first one only unless in list context
08ca2aa3 1213 / is implemented by unpacking the count, then popping it from the
206947d2 1214 stack, so must check that we're not in the middle of a / */
49704364 1215 if ( unpack_only_one
206947d2 1216 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1217 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1218 break;
49704364 1219
f337b084 1220 switch (howlen = symptr->howlen) {
fc241834
RGS
1221 case e_star:
1222 len = strend - strbeg; /* long enough */
49704364 1223 break;
f337b084
TH
1224 default:
1225 /* e_no_len and e_number */
1226 len = symptr->length;
1227 break;
49704364 1228 }
18529408 1229
21c16052 1230 explicit_length = TRUE;
a6ec74c1 1231 redo_switch:
49704364 1232 beyond = s >= strend;
a7a3cfaa
TH
1233
1234 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1235 if (props) {
1236 /* props nonzero means we can process this letter. */
9e27e96a
AL
1237 const long size = props & PACK_SIZE_MASK;
1238 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1239 if (len > howmany)
1240 len = howmany;
1241
1242 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1243 if (len && unpack_only_one) len = 1;
1244 EXTEND(SP, len);
1245 EXTEND_MORTAL(len);
78d46eaa
NC
1246 }
1247 }
a7a3cfaa 1248
1109a392 1249 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1250 default:
1109a392 1251 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1252
a6ec74c1 1253 case '%':
49704364 1254 if (howlen == e_no_len)
18529408 1255 len = 16; /* len is not specified */
a6ec74c1 1256 checksum = len;
92d41999 1257 cuv = 0;
a6ec74c1 1258 cdouble = 0;
18529408 1259 continue;
a6ec74c1 1260 break;
18529408
IZ
1261 case '(':
1262 {
49704364 1263 tempsym_t savsym = *symptr;
9e27e96a 1264 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1265 symptr->flags |= group_modifiers;
49704364 1266 symptr->patend = savsym.grpend;
28be1210 1267 symptr->previous = &savsym;
49704364 1268 symptr->level++;
18529408
IZ
1269 PUTBACK;
1270 while (len--) {
49704364 1271 symptr->patptr = savsym.grpbeg;
f337b084
TH
1272 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1273 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1274 unpack_rec(symptr, s, strbeg, strend, &s);
1275 if (s == strend && savsym.howlen == e_star)
49704364 1276 break; /* No way to continue */
18529408
IZ
1277 }
1278 SPAGAIN;
28be1210 1279 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1280 *symptr = savsym;
18529408
IZ
1281 break;
1282 }
28be1210
TH
1283#ifdef PERL_PACK_CAN_SHRIEKSIGN
1284 case '.' | TYPE_IS_SHRIEKING:
1285#endif
1286 case '.': {
9e27e96a 1287 const char *from;
28be1210
TH
1288 SV *sv;
1289#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1290 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1291#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1292 const bool u8 = utf8;
28be1210
TH
1293#endif
1294 if (howlen == e_star) from = strbeg;
1295 else if (len <= 0) from = s;
1296 else {
1297 tempsym_t *group = symptr;
1298
1299 while (--len && group) group = group->previous;
1300 from = group ? strbeg + group->strbeg : strbeg;
1301 }
1302 sv = from <= s ?
00646304
CB
1303 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1304 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1305 XPUSHs(sv_2mortal(sv));
1306 break;
1307 }
1308#ifdef PERL_PACK_CAN_SHRIEKSIGN
1309 case '@' | TYPE_IS_SHRIEKING:
1310#endif
a6ec74c1 1311 case '@':
28be1210
TH
1312 s = strbeg + symptr->strbeg;
1313#ifdef PERL_PACK_CAN_SHRIEKSIGN
1314 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1315#else /* PERL_PACK_CAN_SHRIEKSIGN */
1316 if (utf8)
1317#endif
1318 {
08ca2aa3
TH
1319 while (len > 0) {
1320 if (s >= strend)
1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
1322 s += UTF8SKIP(s);
1323 len--;
1324 }
1325 if (s > strend)
1326 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1327 } else {
28be1210 1328 if (strend-s < len)
fc241834 1329 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1330 s += len;
08ca2aa3 1331 }
a6ec74c1 1332 break;
62f95557
IZ
1333 case 'X' | TYPE_IS_SHRIEKING:
1334 if (!len) /* Avoid division by 0 */
1335 len = 1;
08ca2aa3 1336 if (utf8) {
f7fe979e 1337 const char *hop, *last;
f337b084
TH
1338 I32 l = len;
1339 hop = last = strbeg;
1340 while (hop < s) {
1341 hop += UTF8SKIP(hop);
1342 if (--l == 0) {
08ca2aa3 1343 last = hop;
f337b084
TH
1344 l = len;
1345 }
fc241834 1346 }
f337b084
TH
1347 if (last > s)
1348 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1349 s = last;
1350 break;
f337b084
TH
1351 }
1352 len = (s - strbeg) % len;
62f95557 1353 /* FALL THROUGH */
a6ec74c1 1354 case 'X':
08ca2aa3
TH
1355 if (utf8) {
1356 while (len > 0) {
1357 if (s <= strbeg)
1358 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1359 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1360 if (s <= strbeg)
1361 Perl_croak(aTHX_ "'X' outside of string in unpack");
1362 }
1363 len--;
1364 }
1365 } else {
fc241834
RGS
1366 if (len > s - strbeg)
1367 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1368 s -= len;
08ca2aa3 1369 }
a6ec74c1 1370 break;
9e27e96a
AL
1371 case 'x' | TYPE_IS_SHRIEKING: {
1372 I32 ai32;
62f95557
IZ
1373 if (!len) /* Avoid division by 0 */
1374 len = 1;
230e1fce
NC
1375 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1376 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1377 if (ai32 == 0) break;
1378 len -= ai32;
9e27e96a 1379 }
62f95557 1380 /* FALL THROUGH */
a6ec74c1 1381 case 'x':
08ca2aa3
TH
1382 if (utf8) {
1383 while (len>0) {
1384 if (s >= strend)
1385 Perl_croak(aTHX_ "'x' outside of string in unpack");
1386 s += UTF8SKIP(s);
1387 len--;
1388 }
1389 } else {
fc241834
RGS
1390 if (len > strend - s)
1391 Perl_croak(aTHX_ "'x' outside of string in unpack");
1392 s += len;
f337b084 1393 }
a6ec74c1
JH
1394 break;
1395 case '/':
49704364
WL
1396 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1397 break;
a6ec74c1
JH
1398 case 'A':
1399 case 'Z':
1400 case 'a':
08ca2aa3
TH
1401 if (checksum) {
1402 /* Preliminary length estimate is assumed done in 'W' */
1403 if (len > strend - s) len = strend - s;
1404 goto W_checksum;
1405 }
1406 if (utf8) {
1407 I32 l;
f7fe979e 1408 const char *hop;
08ca2aa3
TH
1409 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1410 if (hop >= strend) {
1411 if (hop > strend)
1412 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1413 break;
fc241834 1414 }
a6ec74c1 1415 }
08ca2aa3
TH
1416 if (hop > strend)
1417 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1418 len = hop - s;
1419 } else if (len > strend - s)
1420 len = strend - s;
1421
1422 if (datumtype == 'Z') {
1423 /* 'Z' strips stuff after first null */
f7fe979e 1424 const char *ptr, *end;
f337b084
TH
1425 end = s + len;
1426 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1427 sv = newSVpvn(s, ptr-s);
1428 if (howlen == e_star) /* exact for 'Z*' */
1429 len = ptr-s + (ptr != strend ? 1 : 0);
1430 } else if (datumtype == 'A') {
1431 /* 'A' strips both nulls and spaces */
f7fe979e 1432 const char *ptr;
18bdf90a
TH
1433 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1436 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1437 if (ptr >= s) ptr += UTF8SKIP(ptr);
1438 else ptr++;
28be1210 1439 if (ptr > s+len)
18bdf90a
TH
1440 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1441 } else {
1442 for (ptr = s+len-1; ptr >= s; ptr--)
1443 if (*ptr != 0 && !isSPACE(*ptr)) break;
1444 ptr++;
1445 }
08ca2aa3
TH
1446 sv = newSVpvn(s, ptr-s);
1447 } else sv = newSVpvn(s, len);
1448
1449 if (utf8) {
1450 SvUTF8_on(sv);
1451 /* Undo any upgrade done due to need_utf8() */
f337b084 1452 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1453 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1454 }
1455 XPUSHs(sv_2mortal(sv));
08ca2aa3 1456 s += len;
a6ec74c1
JH
1457 break;
1458 case 'B':
08ca2aa3
TH
1459 case 'b': {
1460 char *str;
49704364 1461 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1462 len = (strend - s) * 8;
1463 if (checksum) {
1464 if (!PL_bitcount) {
08ca2aa3 1465 int bits;
a02a5408 1466 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1467 for (bits = 1; bits < 256; bits++) {
1468 if (bits & 1) PL_bitcount[bits]++;
1469 if (bits & 2) PL_bitcount[bits]++;
1470 if (bits & 4) PL_bitcount[bits]++;
1471 if (bits & 8) PL_bitcount[bits]++;
1472 if (bits & 16) PL_bitcount[bits]++;
1473 if (bits & 32) PL_bitcount[bits]++;
1474 if (bits & 64) PL_bitcount[bits]++;
1475 if (bits & 128) PL_bitcount[bits]++;
1476 }
1477 }
f337b084 1478 if (utf8)
08ca2aa3 1479 while (len >= 8 && s < strend) {
f337b084 1480 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1481 len -= 8;
1482 }
f337b084 1483 else
fc241834 1484 while (len >= 8) {
08ca2aa3 1485 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1486 len -= 8;
1487 }
08ca2aa3
TH
1488 if (len && s < strend) {
1489 U8 bits;
f337b084
TH
1490 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1491 if (datumtype == 'b')
a6ec74c1 1492 while (len-- > 0) {
92d41999 1493 if (bits & 1) cuv++;
a6ec74c1
JH
1494 bits >>= 1;
1495 }
f337b084 1496 else
a6ec74c1 1497 while (len-- > 0) {
08ca2aa3 1498 if (bits & 0x80) cuv++;
a6ec74c1
JH
1499 bits <<= 1;
1500 }
fc241834 1501 }
a6ec74c1
JH
1502 break;
1503 }
08ca2aa3 1504
561b68a9 1505 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1506 SvPOK_on(sv);
1507 str = SvPVX(sv);
1508 if (datumtype == 'b') {
f337b084 1509 U8 bits = 0;
f7fe979e 1510 const I32 ai32 = len;
08ca2aa3
TH
1511 for (len = 0; len < ai32; len++) {
1512 if (len & 7) bits >>= 1;
1513 else if (utf8) {
1514 if (s >= strend) break;
f337b084 1515 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1516 } else bits = *(U8 *) s++;
1517 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1518 }
08ca2aa3 1519 } else {
f337b084 1520 U8 bits = 0;
f7fe979e 1521 const I32 ai32 = len;
08ca2aa3
TH
1522 for (len = 0; len < ai32; len++) {
1523 if (len & 7) bits <<= 1;
1524 else if (utf8) {
1525 if (s >= strend) break;
f337b084 1526 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1527 } else bits = *(U8 *) s++;
1528 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1529 }
1530 }
1531 *str = '\0';
aa07b2f6 1532 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1533 XPUSHs(sv);
a6ec74c1 1534 break;
08ca2aa3 1535 }
a6ec74c1 1536 case 'H':
08ca2aa3
TH
1537 case 'h': {
1538 char *str;
fc241834 1539 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1540 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1541 len = (strend - s) * 2;
561b68a9 1542 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1543 SvPOK_on(sv);
1544 str = SvPVX(sv);
1545 if (datumtype == 'h') {
f337b084 1546 U8 bits = 0;
9e27e96a 1547 I32 ai32 = len;
fc241834
RGS
1548 for (len = 0; len < ai32; len++) {
1549 if (len & 1) bits >>= 4;
1550 else if (utf8) {
1551 if (s >= strend) break;
f337b084 1552 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1553 } else bits = * (U8 *) s++;
a6ec74c1
JH
1554 *str++ = PL_hexdigit[bits & 15];
1555 }
08ca2aa3 1556 } else {
f337b084 1557 U8 bits = 0;
f7fe979e 1558 const I32 ai32 = len;
08ca2aa3
TH
1559 for (len = 0; len < ai32; len++) {
1560 if (len & 1) bits <<= 4;
1561 else if (utf8) {
1562 if (s >= strend) break;
f337b084 1563 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1564 } else bits = *(U8 *) s++;
a6ec74c1
JH
1565 *str++ = PL_hexdigit[(bits >> 4) & 15];
1566 }
1567 }
1568 *str = '\0';
aa07b2f6 1569 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1570 XPUSHs(sv);
a6ec74c1 1571 break;
08ca2aa3 1572 }
a6ec74c1 1573 case 'c':
73cb7263 1574 while (len-- > 0) {
f337b084 1575 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1576 if (aint >= 128) /* fake up signed chars */
1577 aint -= 256;
08ca2aa3 1578 if (!checksum)
ac7f3b1b 1579 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1580 else if (checksum > bits_in_uv)
1581 cdouble += (NV)aint;
1582 else
1583 cuv += aint;
a6ec74c1
JH
1584 }
1585 break;
1586 case 'C':
08ca2aa3
TH
1587 case 'W':
1588 W_checksum:
35bcd338 1589 if (len == 0) {
fc241834 1590 if (explicit_length && datumtype == 'C')
08ca2aa3 1591 /* Switch to "character" mode */
f337b084 1592 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1593 break;
1594 }
fc241834 1595 if (datumtype == 'C' ?
f337b084
TH
1596 (symptr->flags & FLAG_DO_UTF8) &&
1597 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1598 while (len-- > 0 && s < strend) {
08ca2aa3 1599 STRLEN retlen;
f7fe979e 1600 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1601 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1602 if (retlen == (STRLEN) -1 || retlen == 0)
1603 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1604 s += retlen;
1605 if (!checksum)
1606 PUSHs(sv_2mortal(newSVuv((UV) val)));
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV) val;
d6d3e8bd 1609 else
08ca2aa3 1610 cuv += val;
fc241834 1611 }
08ca2aa3 1612 } else if (!checksum)
a6ec74c1 1613 while (len-- > 0) {
f7fe979e 1614 const U8 ch = *(U8 *) s++;
08ca2aa3 1615 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1616 }
08ca2aa3
TH
1617 else if (checksum > bits_in_uv)
1618 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1619 else
1620 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1621 break;
1622 case 'U':
35bcd338 1623 if (len == 0) {
08ca2aa3
TH
1624 if (explicit_length) {
1625 /* Switch to "bytes in UTF-8" mode */
f337b084 1626 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1627 else
1628 /* Should be impossible due to the need_utf8() test */
1629 Perl_croak(aTHX_ "U0 mode on a byte string");
1630 }
35bcd338
JH
1631 break;
1632 }
08ca2aa3 1633 if (len > strend - s) len = strend - s;
fc241834 1634 if (!checksum) {
08ca2aa3
TH
1635 if (len && unpack_only_one) len = 1;
1636 EXTEND(SP, len);
1637 EXTEND_MORTAL(len);
fc241834 1638 }
08ca2aa3
TH
1639 while (len-- > 0 && s < strend) {
1640 STRLEN retlen;
1641 UV auv;
1642 if (utf8) {
1643 U8 result[UTF8_MAXLEN];
f7fe979e 1644 const char *ptr = s;
08ca2aa3 1645 STRLEN len;
08ca2aa3
TH
1646 /* Bug: warns about bad utf8 even if we are short on bytes
1647 and will break out of the loop */
230e1fce
NC
1648 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1649 'U'))
08ca2aa3
TH
1650 break;
1651 len = UTF8SKIP(result);
fc241834 1652 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1653 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1654 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1655 s = ptr;
1656 } else {
1657 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1658 if (retlen == (STRLEN) -1 || retlen == 0)
1659 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1660 s += retlen;
1661 }
1662 if (!checksum)
1663 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1664 else if (checksum > bits_in_uv)
08ca2aa3 1665 cdouble += (NV) auv;
73cb7263 1666 else
08ca2aa3 1667 cuv += auv;
a6ec74c1
JH
1668 }
1669 break;
49704364
WL
1670 case 's' | TYPE_IS_SHRIEKING:
1671#if SHORTSIZE != SIZE16
73cb7263 1672 while (len-- > 0) {
08ca2aa3 1673 short ashort;
f337b084
TH
1674 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1675 DO_BO_UNPACK(ashort, s);
08ca2aa3 1676 if (!checksum)
ac7f3b1b 1677 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1678 else if (checksum > bits_in_uv)
1679 cdouble += (NV)ashort;
1680 else
1681 cuv += ashort;
49704364
WL
1682 }
1683 break;
1684#else
1685 /* Fallthrough! */
a6ec74c1 1686#endif
49704364 1687 case 's':
73cb7263 1688 while (len-- > 0) {
08ca2aa3
TH
1689 I16 ai16;
1690
1691#if U16SIZE > SIZE16
1692 ai16 = 0;
1693#endif
f337b084 1694 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1695 DO_BO_UNPACK(ai16, 16);
1109a392 1696#if U16SIZE > SIZE16
73cb7263
NC
1697 if (ai16 > 32767)
1698 ai16 -= 65536;
a6ec74c1 1699#endif
08ca2aa3 1700 if (!checksum)
ac7f3b1b 1701 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1702 else if (checksum > bits_in_uv)
1703 cdouble += (NV)ai16;
1704 else
1705 cuv += ai16;
a6ec74c1
JH
1706 }
1707 break;
49704364
WL
1708 case 'S' | TYPE_IS_SHRIEKING:
1709#if SHORTSIZE != SIZE16
73cb7263 1710 while (len-- > 0) {
08ca2aa3 1711 unsigned short aushort;
f337b084
TH
1712 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1713 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1714 if (!checksum)
1715 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1716 else if (checksum > bits_in_uv)
1717 cdouble += (NV)aushort;
1718 else
1719 cuv += aushort;
49704364
WL
1720 }
1721 break;
1722#else
1723 /* Fallhrough! */
1724#endif
a6ec74c1
JH
1725 case 'v':
1726 case 'n':
1727 case 'S':
73cb7263 1728 while (len-- > 0) {
08ca2aa3
TH
1729 U16 au16;
1730#if U16SIZE > SIZE16
1731 au16 = 0;
1732#endif
f337b084 1733 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1734 DO_BO_UNPACK(au16, 16);
a6ec74c1 1735#ifdef HAS_NTOHS
73cb7263
NC
1736 if (datumtype == 'n')
1737 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1738#endif
1739#ifdef HAS_VTOHS
73cb7263
NC
1740 if (datumtype == 'v')
1741 au16 = vtohs(au16);
a6ec74c1 1742#endif
08ca2aa3
TH
1743 if (!checksum)
1744 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1745 else if (checksum > bits_in_uv)
f337b084 1746 cdouble += (NV) au16;
73cb7263
NC
1747 else
1748 cuv += au16;
a6ec74c1
JH
1749 }
1750 break;
7212898e 1751#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1752 case 'v' | TYPE_IS_SHRIEKING:
1753 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1754 while (len-- > 0) {
08ca2aa3
TH
1755 I16 ai16;
1756# if U16SIZE > SIZE16
1757 ai16 = 0;
1758# endif
f337b084 1759 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1760# ifdef HAS_NTOHS
73cb7263 1761 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1762 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1763# endif /* HAS_NTOHS */
1764# ifdef HAS_VTOHS
73cb7263 1765 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1766 ai16 = (I16) vtohs((U16) ai16);
1767# endif /* HAS_VTOHS */
1768 if (!checksum)
ac7f3b1b 1769 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1770 else if (checksum > bits_in_uv)
08ca2aa3 1771 cdouble += (NV) ai16;
73cb7263
NC
1772 else
1773 cuv += ai16;
068bd2e7
MHM
1774 }
1775 break;
08ca2aa3 1776#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1777 case 'i':
49704364 1778 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1779 while (len-- > 0) {
08ca2aa3 1780 int aint;
f337b084
TH
1781 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1782 DO_BO_UNPACK(aint, i);
08ca2aa3 1783 if (!checksum)
ac7f3b1b 1784 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)aint;
1787 else
1788 cuv += aint;
a6ec74c1
JH
1789 }
1790 break;
1791 case 'I':
49704364 1792 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1793 while (len-- > 0) {
08ca2aa3 1794 unsigned int auint;
f337b084
TH
1795 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1796 DO_BO_UNPACK(auint, i);
08ca2aa3 1797 if (!checksum)
ac7f3b1b 1798 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)auint;
1801 else
1802 cuv += auint;
a6ec74c1
JH
1803 }
1804 break;
92d41999 1805 case 'j':
73cb7263 1806 while (len-- > 0) {
08ca2aa3 1807 IV aiv;
f337b084 1808 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1809#if IVSIZE == INTSIZE
f337b084 1810 DO_BO_UNPACK(aiv, i);
1109a392 1811#elif IVSIZE == LONGSIZE
f337b084 1812 DO_BO_UNPACK(aiv, l);
1109a392 1813#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1814 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1815#else
1816 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1817#endif
08ca2aa3 1818 if (!checksum)
ac7f3b1b 1819 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1820 else if (checksum > bits_in_uv)
1821 cdouble += (NV)aiv;
1822 else
1823 cuv += aiv;
92d41999
JH
1824 }
1825 break;
1826 case 'J':
73cb7263 1827 while (len-- > 0) {
08ca2aa3 1828 UV auv;
f337b084 1829 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1830#if IVSIZE == INTSIZE
f337b084 1831 DO_BO_UNPACK(auv, i);
08ca2aa3 1832#elif IVSIZE == LONGSIZE
f337b084 1833 DO_BO_UNPACK(auv, l);
08ca2aa3 1834#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1835 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1836#else
1837 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1838#endif
08ca2aa3 1839 if (!checksum)
ac7f3b1b 1840 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1841 else if (checksum > bits_in_uv)
1842 cdouble += (NV)auv;
1843 else
1844 cuv += auv;
92d41999
JH
1845 }
1846 break;
49704364
WL
1847 case 'l' | TYPE_IS_SHRIEKING:
1848#if LONGSIZE != SIZE32
73cb7263 1849 while (len-- > 0) {
08ca2aa3 1850 long along;
f337b084
TH
1851 SHIFT_VAR(utf8, s, strend, along, datumtype);
1852 DO_BO_UNPACK(along, l);
08ca2aa3 1853 if (!checksum)
ac7f3b1b 1854 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)along;
1857 else
1858 cuv += along;
49704364
WL
1859 }
1860 break;
1861#else
1862 /* Fallthrough! */
a6ec74c1 1863#endif
49704364 1864 case 'l':
73cb7263 1865 while (len-- > 0) {
08ca2aa3
TH
1866 I32 ai32;
1867#if U32SIZE > SIZE32
1868 ai32 = 0;
1869#endif
f337b084 1870 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1871 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1872#if U32SIZE > SIZE32
08ca2aa3 1873 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1874#endif
08ca2aa3 1875 if (!checksum)
ac7f3b1b 1876 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1877 else if (checksum > bits_in_uv)
1878 cdouble += (NV)ai32;
1879 else
1880 cuv += ai32;
a6ec74c1
JH
1881 }
1882 break;
49704364
WL
1883 case 'L' | TYPE_IS_SHRIEKING:
1884#if LONGSIZE != SIZE32
73cb7263 1885 while (len-- > 0) {
08ca2aa3 1886 unsigned long aulong;
f337b084
TH
1887 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1888 DO_BO_UNPACK(aulong, l);
08ca2aa3 1889 if (!checksum)
ac7f3b1b 1890 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1891 else if (checksum > bits_in_uv)
1892 cdouble += (NV)aulong;
1893 else
1894 cuv += aulong;
49704364
WL
1895 }
1896 break;
1897#else
1898 /* Fall through! */
1899#endif
a6ec74c1
JH
1900 case 'V':
1901 case 'N':
1902 case 'L':
73cb7263 1903 while (len-- > 0) {
08ca2aa3
TH
1904 U32 au32;
1905#if U32SIZE > SIZE32
1906 au32 = 0;
1907#endif
f337b084 1908 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1909 DO_BO_UNPACK(au32, 32);
a6ec74c1 1910#ifdef HAS_NTOHL
73cb7263
NC
1911 if (datumtype == 'N')
1912 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1913#endif
1914#ifdef HAS_VTOHL
73cb7263
NC
1915 if (datumtype == 'V')
1916 au32 = vtohl(au32);
a6ec74c1 1917#endif
08ca2aa3 1918 if (!checksum)
fc241834
RGS
1919 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)au32;
1922 else
1923 cuv += au32;
a6ec74c1
JH
1924 }
1925 break;
7212898e 1926#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1927 case 'V' | TYPE_IS_SHRIEKING:
1928 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1929 while (len-- > 0) {
08ca2aa3
TH
1930 I32 ai32;
1931# if U32SIZE > SIZE32
1932 ai32 = 0;
1933# endif
f337b084 1934 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1935# ifdef HAS_NTOHL
73cb7263
NC
1936 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1937 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1938# endif
1939# ifdef HAS_VTOHL
73cb7263
NC
1940 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1941 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1942# endif
1943 if (!checksum)
ac7f3b1b 1944 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1945 else if (checksum > bits_in_uv)
1946 cdouble += (NV)ai32;
1947 else
1948 cuv += ai32;
068bd2e7
MHM
1949 }
1950 break;
08ca2aa3 1951#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1952 case 'p':
a6ec74c1 1953 while (len-- > 0) {
f7fe979e 1954 const char *aptr;
f337b084 1955 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1956 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1957 /* newSVpv generates undef if aptr is NULL */
1958 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1959 }
1960 break;
1961 case 'w':
a6ec74c1
JH
1962 {
1963 UV auv = 0;
1964 U32 bytes = 0;
fc241834 1965
08ca2aa3
TH
1966 while (len > 0 && s < strend) {
1967 U8 ch;
f337b084 1968 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1969 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1970 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1971 if (ch < 0x80) {
a6ec74c1 1972 bytes = 0;
ac7f3b1b 1973 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1974 len--;
1975 auv = 0;
08ca2aa3 1976 continue;
a6ec74c1 1977 }
08ca2aa3 1978 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1979 const char *t;
a6ec74c1
JH
1980
1981 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1982 while (s < strend) {
f337b084 1983 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1984 sv = mul128(sv, (U8)(ch & 0x7f));
1985 if (!(ch & 0x80)) {
a6ec74c1
JH
1986 bytes = 0;
1987 break;
1988 }
1989 }
10516c54 1990 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1991 while (*t == '0')
1992 t++;
1993 sv_chop(sv, t);
1994 PUSHs(sv_2mortal(sv));
1995 len--;
1996 auv = 0;
1997 }
1998 }
1999 if ((s >= strend) && bytes)
49704364 2000 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2001 }
2002 break;
2003 case 'P':
49704364
WL
2004 if (symptr->howlen == e_star)
2005 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2006 EXTEND(SP, 1);
2d3e0934 2007 if (s + sizeof(char*) <= strend) {
08ca2aa3 2008 char *aptr;
f337b084 2009 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2010 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2011 /* newSVpvn generates undef if aptr is NULL */
2012 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2013 }
a6ec74c1
JH
2014 break;
2015#ifdef HAS_QUAD
2016 case 'q':
73cb7263 2017 while (len-- > 0) {
08ca2aa3 2018 Quad_t aquad;
f337b084
TH
2019 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2020 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2021 if (!checksum)
2022 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2023 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2024 else if (checksum > bits_in_uv)
2025 cdouble += (NV)aquad;
2026 else
2027 cuv += aquad;
2028 }
a6ec74c1
JH
2029 break;
2030 case 'Q':
73cb7263 2031 while (len-- > 0) {
08ca2aa3 2032 Uquad_t auquad;
f337b084
TH
2033 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2034 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2035 if (!checksum)
2036 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2037 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2038 else if (checksum > bits_in_uv)
2039 cdouble += (NV)auquad;
2040 else
2041 cuv += auquad;
a6ec74c1
JH
2042 }
2043 break;
08ca2aa3 2044#endif /* HAS_QUAD */
a6ec74c1
JH
2045 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2046 case 'f':
73cb7263 2047 while (len-- > 0) {
08ca2aa3 2048 float afloat;
f337b084 2049 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2050 DO_BO_UNPACK_N(afloat, float);
2051 if (!checksum)
2052 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2053 else
73cb7263 2054 cdouble += afloat;
fc241834 2055 }
a6ec74c1
JH
2056 break;
2057 case 'd':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 double adouble;
f337b084 2060 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2061 DO_BO_UNPACK_N(adouble, double);
2062 if (!checksum)
2063 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2064 else
73cb7263 2065 cdouble += adouble;
fc241834 2066 }
a6ec74c1 2067 break;
92d41999 2068 case 'F':
73cb7263 2069 while (len-- > 0) {
08ca2aa3 2070 NV anv;
f337b084 2071 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2072 DO_BO_UNPACK_N(anv, NV);
2073 if (!checksum)
2074 PUSHs(sv_2mortal(newSVnv(anv)));
2075 else
73cb7263 2076 cdouble += anv;
fc241834 2077 }
92d41999
JH
2078 break;
2079#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2080 case 'D':
73cb7263 2081 while (len-- > 0) {
08ca2aa3 2082 long double aldouble;
f337b084 2083 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2084 DO_BO_UNPACK_N(aldouble, long double);
2085 if (!checksum)
2086 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2087 else
2088 cdouble += aldouble;
92d41999
JH
2089 }
2090 break;
2091#endif
a6ec74c1
JH
2092 case 'u':
2093 /* MKS:
2094 * Initialise the decode mapping. By using a table driven
2095 * algorithm, the code will be character-set independent
2096 * (and just as fast as doing character arithmetic)
2097 */
2098 if (PL_uudmap['M'] == 0) {
2d3e0934 2099 size_t i;
a6ec74c1 2100
2d3e0934 2101 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2102 PL_uudmap[(U8)PL_uuemap[i]] = i;
2103 /*
2104 * Because ' ' and '`' map to the same value,
2105 * we need to decode them both the same.
2106 */
2107 PL_uudmap[' '] = 0;
2108 }
08ca2aa3 2109 {
f7fe979e 2110 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2111 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2112 if (l) SvPOK_on(sv);
2113 }
2114 if (utf8) {
2115 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2116 I32 a, b, c, d;
2117 char hunk[4];
2118
2119 hunk[3] = '\0';
2120 while (len > 0) {
2121 next_uni_uu(aTHX_ &s, strend, &a);
2122 next_uni_uu(aTHX_ &s, strend, &b);
2123 next_uni_uu(aTHX_ &s, strend, &c);
2124 next_uni_uu(aTHX_ &s, strend, &d);
2125 hunk[0] = (char)((a << 2) | (b >> 4));
2126 hunk[1] = (char)((b << 4) | (c >> 2));
2127 hunk[2] = (char)((c << 6) | d);
2128 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2129 len -= 3;
2130 }
2131 if (s < strend) {
f7fe979e
AL
2132 if (*s == '\n') {
2133 s++;
2134 }
08ca2aa3
TH
2135 else {
2136 /* possible checksum byte */
f7fe979e
AL
2137 const char *skip = s+UTF8SKIP(s);
2138 if (skip < strend && *skip == '\n')
2139 s = skip+1;
08ca2aa3
TH
2140 }
2141 }
2142 }
2143 } else {
fc241834
RGS
2144 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2145 I32 a, b, c, d;
2146 char hunk[4];
a6ec74c1 2147
fc241834
RGS
2148 hunk[3] = '\0';
2149 len = PL_uudmap[*(U8*)s++] & 077;
2150 while (len > 0) {
2151 if (s < strend && ISUUCHAR(*s))
2152 a = PL_uudmap[*(U8*)s++] & 077;
2153 else
2154 a = 0;
2155 if (s < strend && ISUUCHAR(*s))
2156 b = PL_uudmap[*(U8*)s++] & 077;
2157 else
2158 b = 0;
2159 if (s < strend && ISUUCHAR(*s))
2160 c = PL_uudmap[*(U8*)s++] & 077;
2161 else
2162 c = 0;
2163 if (s < strend && ISUUCHAR(*s))
2164 d = PL_uudmap[*(U8*)s++] & 077;
2165 else
2166 d = 0;
2167 hunk[0] = (char)((a << 2) | (b >> 4));
2168 hunk[1] = (char)((b << 4) | (c >> 2));
2169 hunk[2] = (char)((c << 6) | d);
2170 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2171 len -= 3;
2172 }
2173 if (*s == '\n')
2174 s++;
2175 else /* possible checksum byte */
2176 if (s + 1 < strend && s[1] == '\n')
2177 s += 2;
a6ec74c1 2178 }
08ca2aa3
TH
2179 }
2180 XPUSHs(sv);
a6ec74c1
JH
2181 break;
2182 }
49704364 2183
a6ec74c1 2184 if (checksum) {
1109a392 2185 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2186 (checksum > bits_in_uv &&
08ca2aa3
TH
2187 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2188 NV trouble, anv;
a6ec74c1 2189
08ca2aa3 2190 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2191 while (checksum >= 16) {
2192 checksum -= 16;
08ca2aa3 2193 anv *= 65536.0;
a6ec74c1 2194 }
a6ec74c1 2195 while (cdouble < 0.0)
08ca2aa3
TH
2196 cdouble += anv;
2197 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2198 sv = newSVnv(cdouble);
a6ec74c1
JH
2199 }
2200 else {
fa8ec7c1
NC
2201 if (checksum < bits_in_uv) {
2202 UV mask = ((UV)1 << checksum) - 1;
92d41999 2203 cuv &= mask;
a6ec74c1 2204 }
c4c5f44a 2205 sv = newSVuv(cuv);
a6ec74c1
JH
2206 }
2207 XPUSHs(sv_2mortal(sv));
2208 checksum = 0;
2209 }
fc241834 2210
49704364
WL
2211 if (symptr->flags & FLAG_SLASH){
2212 if (SP - PL_stack_base - start_sp_offset <= 0)
2213 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2214 if( next_symbol(symptr) ){
2215 if( symptr->howlen == e_number )
2216 Perl_croak(aTHX_ "Count after length/code in unpack" );
2217 if( beyond ){
2218 /* ...end of char buffer then no decent length available */
2219 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2220 } else {
2221 /* take top of stack (hope it's numeric) */
2222 len = POPi;
2223 if( len < 0 )
2224 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2225 }
2226 } else {
2227 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2228 }
2229 datumtype = symptr->code;
21c16052 2230 explicit_length = FALSE;
49704364
WL
2231 goto redo_switch;
2232 }
a6ec74c1 2233 }
49704364 2234
18529408
IZ
2235 if (new_s)
2236 *new_s = s;
2237 PUTBACK;
2238 return SP - PL_stack_base - start_sp_offset;
2239}
2240
2241PP(pp_unpack)
2242{
97aff369 2243 dVAR;
18529408 2244 dSP;
bab9c0ac 2245 dPOPPOPssrl;
18529408
IZ
2246 I32 gimme = GIMME_V;
2247 STRLEN llen;
2248 STRLEN rlen;
5c144d81
NC
2249 const char *pat = SvPV_const(left, llen);
2250 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2251 const char *strend = s + rlen;
2252 const char *patend = pat + llen;
08ca2aa3 2253 I32 cnt;
18529408
IZ
2254
2255 PUTBACK;
7accc089 2256 cnt = unpackstring(pat, patend, s, strend,
49704364 2257 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2258 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2259
18529408
IZ
2260 SPAGAIN;
2261 if ( !cnt && gimme == G_SCALAR )
2262 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2263 RETURN;
2264}
2265
f337b084 2266STATIC U8 *
f7fe979e 2267doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2268{
f337b084 2269 *h++ = PL_uuemap[len];
a6ec74c1 2270 while (len > 2) {
f337b084
TH
2271 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2272 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2273 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2274 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2275 s += 3;
2276 len -= 3;
2277 }
2278 if (len > 0) {
f7fe979e 2279 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2280 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2281 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2282 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2283 *h++ = PL_uuemap[0];
a6ec74c1 2284 }
f337b084
TH
2285 *h++ = '\n';
2286 return h;
a6ec74c1
JH
2287}
2288
2289STATIC SV *
f7fe979e 2290S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2291{
8b6e33c7
AL
2292 SV *result = newSVpvn(s, l);
2293 char *const result_c = SvPV_nolen(result); /* convenience */
2294 char *out = result_c;
2295 bool skip = 1;
2296 bool ignore = 0;
a6ec74c1
JH
2297
2298 while (*s) {
2299 switch (*s) {
2300 case ' ':
2301 break;
2302 case '+':
2303 if (!skip) {
2304 SvREFCNT_dec(result);
2305 return (NULL);
2306 }
2307 break;
2308 case '0':
2309 case '1':
2310 case '2':
2311 case '3':
2312 case '4':
2313 case '5':
2314 case '6':
2315 case '7':
2316 case '8':
2317 case '9':
2318 skip = 0;
2319 if (!ignore) {
2320 *(out++) = *s;
2321 }
2322 break;
2323 case '.':
2324 ignore = 1;
2325 break;
2326 default:
2327 SvREFCNT_dec(result);
2328 return (NULL);
2329 }
2330 s++;
2331 }
2332 *(out++) = '\0';
2333 SvCUR_set(result, out - result_c);
2334 return (result);
2335}
2336
2337/* pnum must be '\0' terminated */
2338STATIC int
2339S_div128(pTHX_ SV *pnum, bool *done)
2340{
8b6e33c7
AL
2341 STRLEN len;
2342 char * const s = SvPV(pnum, len);
2343 char *t = s;
2344 int m = 0;
2345
2346 *done = 1;
2347 while (*t) {
2348 const int i = m * 10 + (*t - '0');
2349 const int r = (i >> 7); /* r < 10 */
2350 m = i & 0x7F;
2351 if (r) {
2352 *done = 0;
2353 }
2354 *(t++) = '0' + r;
a6ec74c1 2355 }
8b6e33c7
AL
2356 *(t++) = '\0';
2357 SvCUR_set(pnum, (STRLEN) (t - s));
2358 return (m);
a6ec74c1
JH
2359}
2360
18529408
IZ
2361/*
2362=for apidoc pack_cat
2363
7accc089
JH
2364The engine implementing pack() Perl function. Note: parameters next_in_list and
2365flags are not used. This call should not be used; use packlist instead.
18529408 2366
bfce84ec
AL
2367=cut
2368*/
49704364 2369
18529408 2370void
f7fe979e 2371Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2372{
aadb217d 2373 tempsym_t sym;
8772537c
AL
2374 PERL_UNUSED_ARG(next_in_list);
2375 PERL_UNUSED_ARG(flags);
7accc089 2376
f7fe979e 2377 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2378
7accc089
JH
2379 (void)pack_rec( cat, &sym, beglist, endlist );
2380}
2381
2382
2383/*
2384=for apidoc packlist
2385
2386The engine implementing pack() Perl function.
2387
bfce84ec
AL
2388=cut
2389*/
7accc089
JH
2390
2391void
f7fe979e 2392Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2393{
97aff369 2394 dVAR;
f337b084 2395 STRLEN no_len;
aadb217d
JH
2396 tempsym_t sym;
2397
f7fe979e 2398 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2399
f337b084
TH
2400 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2401 Also make sure any UTF8 flag is loaded */
2402 SvPV_force(cat, no_len);
bfce84ec
AL
2403 if (DO_UTF8(cat))
2404 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2405
49704364
WL
2406 (void)pack_rec( cat, &sym, beglist, endlist );
2407}
2408
f337b084
TH
2409/* like sv_utf8_upgrade, but also repoint the group start markers */
2410STATIC void
2411marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2412 STRLEN len;
2413 tempsym_t *group;
f7fe979e
AL
2414 const char *from_ptr, *from_start, *from_end, **marks, **m;
2415 char *to_start, *to_ptr;
f337b084
TH
2416
2417 if (SvUTF8(sv)) return;
2418
aa07b2f6 2419 from_start = SvPVX_const(sv);
f337b084
TH
2420 from_end = from_start + SvCUR(sv);
2421 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2422 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2423 if (from_ptr == from_end) {
2424 /* Simple case: no character needs to be changed */
2425 SvUTF8_on(sv);
2426 return;
2427 }
2428
3473cf63 2429 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2430 Newx(to_start, len, char);
f337b084
TH
2431 Copy(from_start, to_start, from_ptr-from_start, char);
2432 to_ptr = to_start + (from_ptr-from_start);
2433
a02a5408 2434 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2435 for (group=sym_ptr; group; group = group->previous)
2436 marks[group->level] = from_start + group->strbeg;
2437 marks[sym_ptr->level+1] = from_end+1;
2438 for (m = marks; *m < from_ptr; m++)
2439 *m = to_start + (*m-from_start);
2440
2441 for (;from_ptr < from_end; from_ptr++) {
2442 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2443 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2444 }
2445 *to_ptr = 0;
2446
2447 while (*m == from_ptr) *m++ = to_ptr;
2448 if (m != marks + sym_ptr->level+1) {
2449 Safefree(marks);
2450 Safefree(to_start);
2451 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2452 }
2453 for (group=sym_ptr; group; group = group->previous)
2454 group->strbeg = marks[group->level] - to_start;
2455 Safefree(marks);
2456
2457 if (SvOOK(sv)) {
2458 if (SvIVX(sv)) {
b162af07 2459 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2460 from_start -= SvIVX(sv);
2461 SvIV_set(sv, 0);
2462 }
2463 SvFLAGS(sv) &= ~SVf_OOK;
2464 }
2465 if (SvLEN(sv) != 0)
2466 Safefree(from_start);
f880fe2f 2467 SvPV_set(sv, to_start);
b162af07
SP
2468 SvCUR_set(sv, to_ptr - to_start);
2469 SvLEN_set(sv, len);
f337b084
TH
2470 SvUTF8_on(sv);
2471}
2472
2473/* Exponential string grower. Makes string extension effectively O(n)
2474 needed says how many extra bytes we need (not counting the final '\0')
2475 Only grows the string if there is an actual lack of space
2476*/
2477STATIC char *
0bd48802 2478S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2479 const STRLEN cur = SvCUR(sv);
2480 const STRLEN len = SvLEN(sv);
f337b084
TH
2481 STRLEN extend;
2482 if (len - cur > needed) return SvPVX(sv);
2483 extend = needed > len ? needed : len;
2484 return SvGROW(sv, len+extend+1);
2485}
49704364
WL
2486
2487STATIC
2488SV **
f337b084 2489S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2490{
97aff369 2491 dVAR;
49704364 2492 tempsym_t lookahead;
f337b084
TH
2493 I32 items = endlist - beglist;
2494 bool found = next_symbol(symptr);
2495 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2496 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2497
2498 if (symptr->level == 0 && found && symptr->code == 'U') {
2499 marked_upgrade(aTHX_ cat, symptr);
2500 symptr->flags |= FLAG_DO_UTF8;
2501 utf8 = 0;
49704364 2502 }
f337b084 2503 symptr->strbeg = SvCUR(cat);
49704364
WL
2504
2505 while (found) {
f337b084
TH
2506 SV *fromstr;
2507 STRLEN fromlen;
2508 I32 len;
a0714e2c 2509 SV *lengthcode = NULL;
49704364 2510 I32 datumtype = symptr->code;
f337b084
TH
2511 howlen_t howlen = symptr->howlen;
2512 char *start = SvPVX(cat);
2513 char *cur = start + SvCUR(cat);
49704364 2514
f337b084
TH
2515#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2516
2517 switch (howlen) {
fc241834 2518 case e_star:
f337b084
TH
2519 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2520 0 : items;
2521 break;
2522 default:
2523 /* e_no_len and e_number */
2524 len = symptr->length;
49704364
WL
2525 break;
2526 }
2527
f337b084 2528 if (len) {
a7a3cfaa 2529 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2530
a7a3cfaa
TH
2531 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2532 /* We can process this letter. */
2533 STRLEN size = props & PACK_SIZE_MASK;
2534 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2535 }
f337b084
TH
2536 }
2537
49704364
WL
2538 /* Look ahead for next symbol. Do we have code/code? */
2539 lookahead = *symptr;
2540 found = next_symbol(&lookahead);
246f24af
TH
2541 if (symptr->flags & FLAG_SLASH) {
2542 IV count;
f337b084 2543 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2544 if (strchr("aAZ", lookahead.code)) {
2545 if (lookahead.howlen == e_number) count = lookahead.length;
2546 else {
2547 if (items > 0)
2548 count = DO_UTF8(*beglist) ?
2549 sv_len_utf8(*beglist) : sv_len(*beglist);
2550 else count = 0;
2551 if (lookahead.code == 'Z') count++;
2552 }
2553 } else {
2554 if (lookahead.howlen == e_number && lookahead.length < items)
2555 count = lookahead.length;
2556 else count = items;
2557 }
2558 lookahead.howlen = e_number;
2559 lookahead.length = count;
2560 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2561 }
49704364 2562
fc241834
RGS
2563 /* Code inside the switch must take care to properly update
2564 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2565 doesn't simply leave using break */
1109a392 2566 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2567 default:
f337b084
TH
2568 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2569 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2570 case '%':
49704364 2571 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2572 {
2573 char *from;
2574#ifdef PERL_PACK_CAN_SHRIEKSIGN
2575 case '.' | TYPE_IS_SHRIEKING:
2576#endif
2577 case '.':
2578 if (howlen == e_star) from = start;
2579 else if (len == 0) from = cur;
2580 else {
2581 tempsym_t *group = symptr;
2582
2583 while (--len && group) group = group->previous;
2584 from = group ? start + group->strbeg : start;
2585 }
2586 fromstr = NEXTFROM;
2587 len = SvIV(fromstr);
2588 goto resize;
2589#ifdef PERL_PACK_CAN_SHRIEKSIGN
2590 case '@' | TYPE_IS_SHRIEKING:
2591#endif
a6ec74c1 2592 case '@':
28be1210
TH
2593 from = start + symptr->strbeg;
2594 resize:
2595#ifdef PERL_PACK_CAN_SHRIEKSIGN
2596 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2597#else /* PERL_PACK_CAN_SHRIEKSIGN */
2598 if (utf8)
2599#endif
2600 if (len >= 0) {
2601 while (len && from < cur) {
2602 from += UTF8SKIP(from);
2603 len--;
2604 }
2605 if (from > cur)
2606 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2607 if (len) {
2608 /* Here we know from == cur */
2609 grow:
2610 GROWING(0, cat, start, cur, len);
2611 Zero(cur, len, char);
2612 cur += len;
2613 } else if (from < cur) {
2614 len = cur - from;
2615 goto shrink;
2616 } else goto no_change;
2617 } else {
2618 cur = from;
2619 len = -len;
2620 goto utf8_shrink;
f337b084 2621 }
28be1210
TH
2622 else {
2623 len -= cur - from;
f337b084 2624 if (len > 0) goto grow;
28be1210 2625 if (len == 0) goto no_change;
fc241834 2626 len = -len;
28be1210 2627 goto shrink;
f337b084 2628 }
a6ec74c1 2629 break;
28be1210 2630 }
fc241834 2631 case '(': {
49704364 2632 tempsym_t savsym = *symptr;
66c611c5
MHM
2633 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2634 symptr->flags |= group_modifiers;
49704364
WL
2635 symptr->patend = savsym.grpend;
2636 symptr->level++;
f337b084 2637 symptr->previous = &lookahead;
18529408 2638 while (len--) {
f337b084
TH
2639 U32 was_utf8;
2640 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2641 else symptr->flags &= ~FLAG_PARSE_UTF8;
2642 was_utf8 = SvUTF8(cat);
49704364 2643 symptr->patptr = savsym.grpbeg;
f337b084
TH
2644 beglist = pack_rec(cat, symptr, beglist, endlist);
2645 if (SvUTF8(cat) != was_utf8)
2646 /* This had better be an upgrade while in utf8==0 mode */
2647 utf8 = 1;
2648
49704364 2649 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2650 break; /* No way to continue */
2651 }
f337b084
TH
2652 lookahead.flags = symptr->flags & ~group_modifiers;
2653 goto no_change;
18529408 2654 }
62f95557
IZ
2655 case 'X' | TYPE_IS_SHRIEKING:
2656 if (!len) /* Avoid division by 0 */
2657 len = 1;
f337b084
TH
2658 if (utf8) {
2659 char *hop, *last;
2660 I32 l = len;
2661 hop = last = start;
2662 while (hop < cur) {
2663 hop += UTF8SKIP(hop);
2664 if (--l == 0) {
2665 last = hop;
2666 l = len;
2667 }
2668 }
2669 if (last > cur)
2670 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2671 cur = last;
2672 break;
2673 }
2674 len = (cur-start) % len;
62f95557 2675 /* FALL THROUGH */
a6ec74c1 2676 case 'X':
f337b084
TH
2677 if (utf8) {
2678 if (len < 1) goto no_change;
28be1210 2679 utf8_shrink:
f337b084
TH
2680 while (len > 0) {
2681 if (cur <= start)
28be1210
TH
2682 Perl_croak(aTHX_ "'%c' outside of string in pack",
2683 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2684 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2685 if (cur <= start)
28be1210
TH
2686 Perl_croak(aTHX_ "'%c' outside of string in pack",
2687 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2688 }
2689 len--;
2690 }
2691 } else {
fc241834 2692 shrink:
f337b084 2693 if (cur - start < len)
28be1210
TH
2694 Perl_croak(aTHX_ "'%c' outside of string in pack",
2695 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2696 cur -= len;
2697 }
2698 if (cur < start+symptr->strbeg) {
2699 /* Make sure group starts don't point into the void */
2700 tempsym_t *group;
9e27e96a 2701 const STRLEN length = cur-start;
f337b084
TH
2702 for (group = symptr;
2703 group && length < group->strbeg;
2704 group = group->previous) group->strbeg = length;
2705 lookahead.strbeg = length;
2706 }
a6ec74c1 2707 break;
fc241834
RGS
2708 case 'x' | TYPE_IS_SHRIEKING: {
2709 I32 ai32;
62f95557
IZ
2710 if (!len) /* Avoid division by 0 */
2711 len = 1;
230e1fce 2712 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2713 else ai32 = (cur - start) % len;
2714 if (ai32 == 0) goto no_change;
2715 len -= ai32;
2716 }
2717 /* FALL THROUGH */
a6ec74c1 2718 case 'x':
f337b084 2719 goto grow;
a6ec74c1
JH
2720 case 'A':
2721 case 'Z':
f337b084 2722 case 'a': {
f7fe979e 2723 const char *aptr;
f337b084 2724
a6ec74c1 2725 fromstr = NEXTFROM;
e62f0680 2726 aptr = SvPV_const(fromstr, fromlen);
f337b084 2727 if (DO_UTF8(fromstr)) {
f7fe979e 2728 const char *end, *s;
f337b084
TH
2729
2730 if (!utf8 && !SvUTF8(cat)) {
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2734 utf8 = 1;
2735 start = SvPVX(cat);
2736 cur = start + SvCUR(cat);
2737 }
fc241834 2738 if (howlen == e_star) {
f337b084
TH
2739 if (utf8) goto string_copy;
2740 len = fromlen+1;
2741 }
2742 s = aptr;
2743 end = aptr + fromlen;
2744 fromlen = datumtype == 'Z' ? len-1 : len;
2745 while ((I32) fromlen > 0 && s < end) {
2746 s += UTF8SKIP(s);
2747 fromlen--;
2748 }
2749 if (s > end)
2750 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2751 if (utf8) {
fc241834 2752 len = fromlen;
f337b084
TH
2753 if (datumtype == 'Z') len++;
2754 fromlen = s-aptr;
2755 len += fromlen;
fc241834 2756
f337b084 2757 goto string_copy;
fc241834 2758 }
f337b084
TH
2759 fromlen = len - fromlen;
2760 if (datumtype == 'Z') fromlen--;
2761 if (howlen == e_star) {
2762 len = fromlen;
2763 if (datumtype == 'Z') len++;
fc241834 2764 }
f337b084 2765 GROWING(0, cat, start, cur, len);
fc241834 2766 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2767 datumtype | TYPE_IS_PACK))
2768 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2769 cur += fromlen;
a6ec74c1 2770 len -= fromlen;
f337b084
TH
2771 } else if (utf8) {
2772 if (howlen == e_star) {
2773 len = fromlen;
2774 if (datumtype == 'Z') len++;
a6ec74c1 2775 }
f337b084
TH
2776 if (len <= (I32) fromlen) {
2777 fromlen = len;
2778 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2779 }
fc241834 2780 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2781 upgrade, so:
2782 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2783 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2784 len -= fromlen;
2785 while (fromlen > 0) {
230e1fce 2786 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2787 aptr++;
2788 fromlen--;
fc241834 2789 }
f337b084
TH
2790 } else {
2791 string_copy:
2792 if (howlen == e_star) {
2793 len = fromlen;
2794 if (datumtype == 'Z') len++;
2795 }
2796 if (len <= (I32) fromlen) {
2797 fromlen = len;
2798 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2799 }
f337b084
TH
2800 GROWING(0, cat, start, cur, len);
2801 Copy(aptr, cur, fromlen, char);
2802 cur += fromlen;
2803 len -= fromlen;
a6ec74c1 2804 }
f337b084
TH
2805 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2806 cur += len;
a6ec74c1 2807 break;
f337b084 2808 }
a6ec74c1 2809 case 'B':
f337b084 2810 case 'b': {
b83604b4 2811 const char *str, *end;
f337b084
TH
2812 I32 l, field_len;
2813 U8 bits;
2814 bool utf8_source;
2815 U32 utf8_flags;
a6ec74c1 2816
fc241834 2817 fromstr = NEXTFROM;
b83604b4 2818 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2819 end = str + fromlen;
2820 if (DO_UTF8(fromstr)) {
2821 utf8_source = TRUE;
041457d9 2822 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2823 } else {
2824 utf8_source = FALSE;
2825 utf8_flags = 0; /* Unused, but keep compilers happy */
2826 }
2827 if (howlen == e_star) len = fromlen;
2828 field_len = (len+7)/8;
2829 GROWING(utf8, cat, start, cur, field_len);
2830 if (len > (I32)fromlen) len = fromlen;
2831 bits = 0;
2832 l = 0;
2833 if (datumtype == 'B')
2834 while (l++ < len) {
2835 if (utf8_source) {
2836 UV val;
2837 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2838 bits |= val & 1;
2839 } else bits |= *str++ & 1;
2840 if (l & 7) bits <<= 1;
fc241834 2841 else {
f337b084
TH
2842 PUSH_BYTE(utf8, cur, bits);
2843 bits = 0;
a6ec74c1
JH
2844 }
2845 }
f337b084
TH
2846 else
2847 /* datumtype == 'b' */
2848 while (l++ < len) {
2849 if (utf8_source) {
2850 UV val;
2851 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2852 if (val & 1) bits |= 0x80;
2853 } else if (*str++ & 1)
2854 bits |= 0x80;
2855 if (l & 7) bits >>= 1;
fc241834 2856 else {
f337b084
TH
2857 PUSH_BYTE(utf8, cur, bits);
2858 bits = 0;
a6ec74c1
JH
2859 }
2860 }
f337b084
TH
2861 l--;
2862 if (l & 7) {
fc241834 2863 if (datumtype == 'B')
f337b084 2864 bits <<= 7 - (l & 7);
fc241834 2865 else
f337b084
TH
2866 bits >>= 7 - (l & 7);
2867 PUSH_BYTE(utf8, cur, bits);
2868 l += 7;
a6ec74c1 2869 }
f337b084
TH
2870 /* Determine how many chars are left in the requested field */
2871 l /= 8;
2872 if (howlen == e_star) field_len = 0;
2873 else field_len -= l;
2874 Zero(cur, field_len, char);
2875 cur += field_len;
a6ec74c1 2876 break;
f337b084 2877 }
a6ec74c1 2878 case 'H':
f337b084 2879 case 'h': {
10516c54 2880 const char *str, *end;
f337b084
TH
2881 I32 l, field_len;
2882 U8 bits;
2883 bool utf8_source;
2884 U32 utf8_flags;
a6ec74c1 2885
fc241834 2886 fromstr = NEXTFROM;
10516c54 2887 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2888 end = str + fromlen;
2889 if (DO_UTF8(fromstr)) {
2890 utf8_source = TRUE;
041457d9 2891 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2892 } else {
2893 utf8_source = FALSE;
2894 utf8_flags = 0; /* Unused, but keep compilers happy */
2895 }
2896 if (howlen == e_star) len = fromlen;
2897 field_len = (len+1)/2;
2898 GROWING(utf8, cat, start, cur, field_len);
2899 if (!utf8 && len > (I32)fromlen) len = fromlen;
2900 bits = 0;
2901 l = 0;
2902 if (datumtype == 'H')
2903 while (l++ < len) {
2904 if (utf8_source) {
2905 UV val;
2906 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2907 if (val < 256 && isALPHA(val))
2908 bits |= (val + 9) & 0xf;
a6ec74c1 2909 else
f337b084
TH
2910 bits |= val & 0xf;
2911 } else if (isALPHA(*str))
2912 bits |= (*str++ + 9) & 0xf;
2913 else
2914 bits |= *str++ & 0xf;
2915 if (l & 1) bits <<= 4;
fc241834 2916 else {
f337b084
TH
2917 PUSH_BYTE(utf8, cur, bits);
2918 bits = 0;
a6ec74c1
JH
2919 }
2920 }
f337b084
TH
2921 else
2922 while (l++ < len) {
2923 if (utf8_source) {
2924 UV val;
2925 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2926 if (val < 256 && isALPHA(val))
2927 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2928 else
f337b084
TH
2929 bits |= (val & 0xf) << 4;
2930 } else if (isALPHA(*str))
2931 bits |= ((*str++ + 9) & 0xf) << 4;
2932 else
2933 bits |= (*str++ & 0xf) << 4;
2934 if (l & 1) bits >>= 4;
fc241834 2935 else {
f337b084
TH
2936 PUSH_BYTE(utf8, cur, bits);
2937 bits = 0;
a6ec74c1 2938 }
fc241834 2939 }
f337b084
TH
2940 l--;
2941 if (l & 1) {
2942 PUSH_BYTE(utf8, cur, bits);
2943 l++;
2944 }
2945 /* Determine how many chars are left in the requested field */
2946 l /= 2;
2947 if (howlen == e_star) field_len = 0;
2948 else field_len -= l;
2949 Zero(cur, field_len, char);
2950 cur += field_len;
2951 break;
fc241834
RGS
2952 }
2953 case 'c':
f337b084
TH
2954 while (len-- > 0) {
2955 IV aiv;
2956 fromstr = NEXTFROM;
2957 aiv = SvIV(fromstr);
2958 if ((-128 > aiv || aiv > 127) &&
2959 ckWARN(WARN_PACK))
2960 Perl_warner(aTHX_ packWARN(WARN_PACK),
2961 "Character in 'c' format wrapped in pack");
585ec06d 2962 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2963 }
2964 break;
2965 case 'C':
f337b084
TH
2966 if (len == 0) {
2967 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2968 break;
2969 }
2970 GROWING(0, cat, start, cur, len);
a6ec74c1 2971 while (len-- > 0) {
f337b084 2972 IV aiv;
a6ec74c1 2973 fromstr = NEXTFROM;
f337b084
TH
2974 aiv = SvIV(fromstr);
2975 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2976 ckWARN(WARN_PACK))
2977 Perl_warner(aTHX_ packWARN(WARN_PACK),
2978 "Character in 'C' format wrapped in pack");
585ec06d 2979 *cur++ = (char)(aiv & 0xff);
f337b084 2980 }
fc241834
RGS
2981 break;
2982 case 'W': {
2983 char *end;
2984 U8 in_bytes = IN_BYTES;
2985
2986 end = start+SvLEN(cat)-1;
2987 if (utf8) end -= UTF8_MAXLEN-1;
2988 while (len-- > 0) {
2989 UV auv;
2990 fromstr = NEXTFROM;
2991 auv = SvUV(fromstr);
2992 if (in_bytes) auv = auv % 0x100;
2993 if (utf8) {
2994 W_utf8:
2995 if (cur > end) {
2996 *cur = '\0';
b162af07 2997 SvCUR_set(cat, cur - start);
fc241834
RGS
2998
2999 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3000 end = start+SvLEN(cat)-UTF8_MAXLEN;
3001 }
230e1fce
NC
3002 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3003 NATIVE_TO_UNI(auv),
041457d9 3004 warn_utf8 ?
230e1fce 3005 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3006 } else {
3007 if (auv >= 0x100) {
3008 if (!SvUTF8(cat)) {
3009 *cur = '\0';
b162af07 3010 SvCUR_set(cat, cur - start);
fc241834
RGS
3011 marked_upgrade(aTHX_ cat, symptr);
3012 lookahead.flags |= FLAG_DO_UTF8;
3013 lookahead.strbeg = symptr->strbeg;
3014 utf8 = 1;
3015 start = SvPVX(cat);
3016 cur = start + SvCUR(cat);
3017 end = start+SvLEN(cat)-UTF8_MAXLEN;
3018 goto W_utf8;
3019 }
3020 if (ckWARN(WARN_PACK))
3021 Perl_warner(aTHX_ packWARN(WARN_PACK),
3022 "Character in 'W' format wrapped in pack");
3023 auv &= 0xff;
3024 }
3025 if (cur >= end) {
3026 *cur = '\0';
b162af07 3027 SvCUR_set(cat, cur - start);
fc241834
RGS
3028 GROWING(0, cat, start, cur, len+1);
3029 end = start+SvLEN(cat)-1;
3030 }
fe2774ed 3031 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3032 }
3033 }
3034 break;
fc241834
RGS
3035 }
3036 case 'U': {
3037 char *end;
3038
3039 if (len == 0) {
3040 if (!(symptr->flags & FLAG_DO_UTF8)) {
3041 marked_upgrade(aTHX_ cat, symptr);
3042 lookahead.flags |= FLAG_DO_UTF8;
3043 lookahead.strbeg = symptr->strbeg;
3044 }
3045 utf8 = 0;
3046 goto no_change;
3047 }
3048
3049 end = start+SvLEN(cat);
3050 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3051 while (len-- > 0) {
fc241834 3052 UV auv;
a6ec74c1 3053 fromstr = NEXTFROM;
fc241834
RGS
3054 auv = SvUV(fromstr);
3055 if (utf8) {
230e1fce 3056 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3057 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3058 warn_utf8 ?
fc241834
RGS
3059 0 : UNICODE_ALLOW_ANY);
3060 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3061 *cur = '\0';
b162af07 3062 SvCUR_set(cat, cur - start);
fc241834
RGS
3063 GROWING(0, cat, start, cur,
3064 len+(endb-buffer)*UTF8_EXPAND);
3065 end = start+SvLEN(cat);
3066 }
3067 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3068 } else {
3069 if (cur >= end) {
3070 *cur = '\0';
b162af07 3071 SvCUR_set(cat, cur - start);
fc241834
RGS
3072 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3073 end = start+SvLEN(cat)-UTF8_MAXLEN;
3074 }
230e1fce 3075 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3076 warn_utf8 ?
230e1fce 3077 0 : UNICODE_ALLOW_ANY);
fc241834 3078 }
a6ec74c1 3079 }
a6ec74c1 3080 break;
fc241834 3081 }
a6ec74c1
JH
3082 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3083 case 'f':
a6ec74c1 3084 while (len-- > 0) {
f337b084
TH
3085 float afloat;
3086 NV anv;
a6ec74c1 3087 fromstr = NEXTFROM;
f337b084 3088 anv = SvNV(fromstr);
5cdb9e01 3089#ifdef __VOS__
f337b084 3090 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3091 during conversion from double to float into infinity, so we
3092 do it by hand. This code should either be generalized for
3093 any OS that needs it, or removed if and when VOS implements
3094 posix-976 (suggestion to support mapping to infinity).
3095 Paul.Green@stratus.com 02-04-02. */
f337b084 3096 if (anv > FLT_MAX)
fc241834 3097 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3098 else if (anv < -FLT_MAX)
fc241834 3099 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3100 else afloat = (float) anv;
3101#else /* __VOS__ */
baf3cf9c 3102# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3103 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3104 * on Alpha; fake it if we don't have them.
3105 */
f337b084 3106 if (anv > FLT_MAX)
fc241834 3107 afloat = FLT_MAX;
f337b084 3108 else if (anv < -FLT_MAX)
fc241834 3109 afloat = -FLT_MAX;
f337b084 3110 else afloat = (float)anv;
baf3cf9c 3111# else
f337b084 3112 afloat = (float)anv;
baf3cf9c 3113# endif
f337b084 3114#endif /* __VOS__ */
1109a392 3115 DO_BO_PACK_N(afloat, float);
f337b084 3116 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3117 }
3118 break;
3119 case 'd':
a6ec74c1 3120 while (len-- > 0) {
f337b084
TH
3121 double adouble;
3122 NV anv;
a6ec74c1 3123 fromstr = NEXTFROM;
f337b084 3124 anv = SvNV(fromstr);
5cdb9e01 3125#ifdef __VOS__
f337b084 3126 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3127 during conversion from long double to double into infinity,
3128 so we do it by hand. This code should either be generalized
3129 for any OS that needs it, or removed if and when VOS
3130 implements posix-976 (suggestion to support mapping to
3131 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3132 if (anv > DBL_MAX)
fc241834 3133 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3134 else if (anv < -DBL_MAX)
fc241834 3135 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3136 else adouble = (double) anv;
3137#else /* __VOS__ */
baf3cf9c 3138# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3139 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3140 * on Alpha; fake it if we don't have them.
3141 */
f337b084 3142 if (anv > DBL_MAX)
fc241834 3143 adouble = DBL_MAX;
f337b084 3144 else if (anv < -DBL_MAX)
fc241834 3145 adouble = -DBL_MAX;
f337b084 3146 else adouble = (double)anv;
baf3cf9c 3147# else
f337b084 3148 adouble = (double)anv;
baf3cf9c 3149# endif
f337b084 3150#endif /* __VOS__ */
1109a392 3151 DO_BO_PACK_N(adouble, double);
f337b084 3152 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3153 }
3154 break;
fc241834
RGS
3155 case 'F': {
3156 NV anv;
1109a392 3157 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3158 while (len-- > 0) {
3159 fromstr = NEXTFROM;
3160 anv = SvNV(fromstr);
1109a392 3161 DO_BO_PACK_N(anv, NV);
fc241834 3162 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3163 }
3164 break;
fc241834 3165 }
92d41999 3166#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3167 case 'D': {
3168 long double aldouble;
1109a392
MHM
3169 /* long doubles can have unused bits, which may be nonzero */
3170 Zero(&aldouble, 1, long double);
92d41999
JH
3171 while (len-- > 0) {
3172 fromstr = NEXTFROM;
3173 aldouble = (long double)SvNV(fromstr);
1109a392 3174 DO_BO_PACK_N(aldouble, long double);
fc241834 3175 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3176 }
3177 break;
fc241834 3178 }
92d41999 3179#endif
7212898e 3180#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3181 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3182#endif
a6ec74c1
JH
3183 case 'n':
3184 while (len-- > 0) {
f337b084 3185 I16 ai16;
a6ec74c1 3186 fromstr = NEXTFROM;
ef108786 3187 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3188#ifdef HAS_HTONS
ef108786 3189 ai16 = PerlSock_htons(ai16);
a6ec74c1 3190#endif
f337b084 3191 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3192 }
3193 break;
7212898e 3194#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3195 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3196#endif
a6ec74c1
JH
3197 case 'v':
3198 while (len-- > 0) {
f337b084 3199 I16 ai16;
a6ec74c1 3200 fromstr = NEXTFROM;
ef108786 3201 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3202#ifdef HAS_HTOVS
ef108786 3203 ai16 = htovs(ai16);
a6ec74c1 3204#endif
f337b084 3205 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3206 }
3207 break;
49704364 3208 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3209#if SHORTSIZE != SIZE16
fc241834 3210 while (len-- > 0) {
f337b084 3211 unsigned short aushort;
fc241834
RGS
3212 fromstr = NEXTFROM;
3213 aushort = SvUV(fromstr);
3214 DO_BO_PACK(aushort, s);
f337b084 3215 PUSH_VAR(utf8, cur, aushort);
fc241834 3216 }
49704364
WL
3217 break;
3218#else
3219 /* Fall through! */
a6ec74c1 3220#endif
49704364 3221 case 'S':
fc241834 3222 while (len-- > 0) {
f337b084 3223 U16 au16;
fc241834
RGS
3224 fromstr = NEXTFROM;
3225 au16 = (U16)SvUV(fromstr);
3226 DO_BO_PACK(au16, 16);
f337b084 3227 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3228 }
3229 break;
49704364 3230 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3231#if SHORTSIZE != SIZE16
fc241834 3232 while (len-- > 0) {
f337b084 3233 short ashort;
fc241834
RGS
3234 fromstr = NEXTFROM;
3235 ashort = SvIV(fromstr);
3236 DO_BO_PACK(ashort, s);
f337b084 3237 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3238 }
49704364
WL
3239 break;
3240#else
3241 /* Fall through! */
a6ec74c1 3242#endif
49704364
WL
3243 case 's':
3244 while (len-- > 0) {
f337b084 3245 I16 ai16;
49704364 3246 fromstr = NEXTFROM;
ef108786
MHM
3247 ai16 = (I16)SvIV(fromstr);
3248 DO_BO_PACK(ai16, 16);
f337b084 3249 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3250 }
3251 break;
3252 case 'I':
49704364 3253 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3254 while (len-- > 0) {
f337b084 3255 unsigned int auint;
a6ec74c1
JH
3256 fromstr = NEXTFROM;
3257 auint = SvUV(fromstr);
1109a392 3258 DO_BO_PACK(auint, i);
f337b084 3259 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3260 }
3261 break;
92d41999
JH
3262 case 'j':
3263 while (len-- > 0) {
f337b084 3264 IV aiv;
92d41999
JH
3265 fromstr = NEXTFROM;
3266 aiv = SvIV(fromstr);
1109a392
MHM
3267#if IVSIZE == INTSIZE
3268 DO_BO_PACK(aiv, i);
3269#elif IVSIZE == LONGSIZE
3270 DO_BO_PACK(aiv, l);
3271#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3272 DO_BO_PACK(aiv, 64);
f337b084
TH
3273#else
3274 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3275#endif
f337b084 3276 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3277 }
3278 break;
3279 case 'J':
3280 while (len-- > 0) {
f337b084 3281 UV auv;
92d41999
JH
3282 fromstr = NEXTFROM;
3283 auv = SvUV(fromstr);
1109a392
MHM
3284#if UVSIZE == INTSIZE
3285 DO_BO_PACK(auv, i);
3286#elif UVSIZE == LONGSIZE
3287 DO_BO_PACK(auv, l);
3288#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3289 DO_BO_PACK(auv, 64);
f337b084
TH
3290#else
3291 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3292#endif
f337b084 3293 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3294 }
3295 break;
a6ec74c1
JH
3296 case 'w':
3297 while (len-- > 0) {
f337b084 3298 NV anv;
a6ec74c1 3299 fromstr = NEXTFROM;
15e9f109 3300 anv = SvNV(fromstr);
a6ec74c1 3301
f337b084
TH
3302 if (anv < 0) {
3303 *cur = '\0';
b162af07 3304 SvCUR_set(cat, cur - start);
49704364 3305 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3306 }
a6ec74c1 3307
196b62db
NC
3308 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3309 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3310 any negative IVs will have already been got by the croak()
3311 above. IOK is untrue for fractions, so we test them
3312 against UV_MAX_P1. */
f337b084
TH
3313 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3314 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3315 char *in = buf + sizeof(buf);
196b62db 3316 UV auv = SvUV(fromstr);
a6ec74c1
JH
3317
3318 do {
eb160463 3319 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3320 auv >>= 7;
3321 } while (auv);
3322 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3323 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3324 in, (buf + sizeof(buf)) - in);
3325 } else if (SvPOKp(fromstr))
3326 goto w_string;
a6ec74c1 3327 else if (SvNOKp(fromstr)) {
0258719b
NC
3328 /* 10**NV_MAX_10_EXP is the largest power of 10
3329 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3330 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3331 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3332 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3333 Some C compilers are strict about integral constant
3334 expressions so we conservatively divide by a slightly
3335 smaller integer instead of multiplying by the exact
3336 floating-point value.
0258719b
NC
3337 */
3338#ifdef NV_MAX_10_EXP
f337b084 3339 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3340 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3341#else
f337b084 3342 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3343 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3344#endif
a6ec74c1
JH
3345 char *in = buf + sizeof(buf);
3346
8b6e33c7 3347 anv = Perl_floor(anv);
a6ec74c1 3348 do {
8b6e33c7 3349 const NV next = Perl_floor(anv / 128);
a6ec74c1 3350 if (in <= buf) /* this cannot happen ;-) */
49704364 3351 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3352 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3353 anv = next;
3354 } while (anv > 0);
a6ec74c1 3355 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3356 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3357 in, (buf + sizeof(buf)) - in);
3358 } else {
8b6e33c7
AL
3359 const char *from;
3360 char *result, *in;
735b914b
JH
3361 SV *norm;
3362 STRLEN len;
3363 bool done;
3364
f337b084 3365 w_string:
735b914b 3366 /* Copy string and check for compliance */
349d4f2f 3367 from = SvPV_const(fromstr, len);
735b914b 3368 if ((norm = is_an_int(from, len)) == NULL)
49704364 3369 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3370
a02a5408 3371 Newx(result, len, char);
735b914b
JH
3372 in = result + len;
3373 done = FALSE;
f337b084 3374 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3375 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3376 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3377 in, (result + len) - in);
735b914b
JH
3378 Safefree(result);
3379 SvREFCNT_dec(norm); /* free norm */
fc241834 3380 }
a6ec74c1
JH
3381 }
3382 break;
3383 case 'i':
49704364 3384 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3385 while (len-- > 0) {
f337b084 3386 int aint;
a6ec74c1
JH
3387 fromstr = NEXTFROM;
3388 aint = SvIV(fromstr);
1109a392 3389 DO_BO_PACK(aint, i);
f337b084 3390 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3391 }
3392 break;
7212898e 3393#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3394 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3395#endif
a6ec74c1
JH
3396 case 'N':
3397 while (len-- > 0) {
f337b084 3398 U32 au32;
a6ec74c1 3399 fromstr = NEXTFROM;
ef108786 3400 au32 = SvUV(fromstr);
a6ec74c1 3401#ifdef HAS_HTONL
ef108786 3402 au32 = PerlSock_htonl(au32);
a6ec74c1 3403#endif
f337b084 3404 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3405 }
3406 break;
7212898e 3407#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3408 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3409#endif
a6ec74c1
JH
3410 case 'V':
3411 while (len-- > 0) {
f337b084 3412 U32 au32;
a6ec74c1 3413 fromstr = NEXTFROM;
ef108786 3414 au32 = SvUV(fromstr);
a6ec74c1 3415#ifdef HAS_HTOVL
ef108786 3416 au32 = htovl(au32);
a6ec74c1 3417#endif
f337b084 3418 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3419 }
3420 break;
49704364 3421 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3422#if LONGSIZE != SIZE32
fc241834 3423 while (len-- > 0) {
f337b084 3424 unsigned long aulong;
fc241834
RGS
3425 fromstr = NEXTFROM;
3426 aulong = SvUV(fromstr);
3427 DO_BO_PACK(aulong, l);
f337b084 3428 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3429 }
49704364
WL
3430 break;
3431#else
3432 /* Fall though! */
a6ec74c1 3433#endif
49704364 3434 case 'L':
fc241834 3435 while (len-- > 0) {
f337b084 3436 U32 au32;
fc241834
RGS
3437 fromstr = NEXTFROM;
3438 au32 = SvUV(fromstr);
3439 DO_BO_PACK(au32, 32);
f337b084 3440 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3441 }
3442 break;
49704364 3443 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3444#if LONGSIZE != SIZE32
fc241834 3445 while (len-- > 0) {
f337b084 3446 long along;
fc241834
RGS
3447 fromstr = NEXTFROM;
3448 along = SvIV(fromstr);
3449 DO_BO_PACK(along, l);
f337b084 3450 PUSH_VAR(utf8, cur, along);
a6ec74c1 3451 }
49704364
WL
3452 break;
3453#else
3454 /* Fall though! */
a6ec74c1 3455#endif
49704364
WL
3456 case 'l':
3457 while (len-- > 0) {
f337b084 3458 I32 ai32;
49704364 3459 fromstr = NEXTFROM;
ef108786
MHM
3460 ai32 = SvIV(fromstr);
3461 DO_BO_PACK(ai32, 32);
f337b084 3462 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3463 }
3464 break;
3465#ifdef HAS_QUAD
3466 case 'Q':
3467 while (len-- > 0) {
f337b084 3468 Uquad_t auquad;
a6ec74c1 3469 fromstr = NEXTFROM;
f337b084 3470 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3471 DO_BO_PACK(auquad, 64);
f337b084 3472 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3473 }
3474 break;
3475 case 'q':
3476 while (len-- > 0) {
f337b084 3477 Quad_t aquad;
a6ec74c1
JH
3478 fromstr = NEXTFROM;
3479 aquad = (Quad_t)SvIV(fromstr);
1109a392 3480 DO_BO_PACK(aquad, 64);
f337b084 3481 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3482 }
3483 break;
f337b084 3484#endif /* HAS_QUAD */
a6ec74c1
JH
3485 case 'P':
3486 len = 1; /* assume SV is correct length */
f337b084 3487 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3488 /* Fall through! */
a6ec74c1
JH
3489 case 'p':
3490 while (len-- > 0) {
83003860 3491 const char *aptr;
f337b084 3492
a6ec74c1 3493 fromstr = NEXTFROM;
28a4f200
TH
3494 SvGETMAGIC(fromstr);
3495 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3496 else {
a6ec74c1
JH
3497 /* XXX better yet, could spirit away the string to
3498 * a safe spot and hang on to it until the result
3499 * of pack() (and all copies of the result) are
3500 * gone.
3501 */
041457d9
DM
3502 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3503 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3504 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3505 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3506 }
3507 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3508 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3509 else
2596d9fe 3510 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3511 }
07409e01 3512 DO_BO_PACK_PC(aptr);
f337b084 3513 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3514 }
3515 break;
fc241834 3516 case 'u': {
f7fe979e 3517 const char *aptr, *aend;
fc241834 3518 bool from_utf8;
f337b084 3519
a6ec74c1 3520 fromstr = NEXTFROM;
fc241834
RGS
3521 if (len <= 2) len = 45;
3522 else len = len / 3 * 3;
3523 if (len >= 64) {
0fdc08b6
RGS
3524 if (ckWARN(WARN_PACK))
3525 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3526 "Field too wide in 'u' format in pack");
3527 len = 63;
3528 }
83003860 3529 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3530 from_utf8 = DO_UTF8(fromstr);
3531 if (from_utf8) {
3532 aend = aptr + fromlen;
3533 fromlen = sv_len_utf8(fromstr);
3534 } else aend = NULL; /* Unused, but keep compilers happy */
3535 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3536 while (fromlen > 0) {
fc241834 3537 U8 *end;
a6ec74c1 3538 I32 todo;
fc241834 3539 U8 hunk[1+63/3*4+1];
a6ec74c1 3540
eb160463 3541 if ((I32)fromlen > len)
a6ec74c1
JH
3542 todo = len;
3543 else
3544 todo = fromlen;
fc241834
RGS
3545 if (from_utf8) {
3546 char buffer[64];
3547 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3548 'u' | TYPE_IS_PACK)) {
3549 *cur = '\0';
b162af07 3550 SvCUR_set(cat, cur - start);
fc241834
RGS
3551 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3552 }
3553 end = doencodes(hunk, buffer, todo);
3554 } else {
3555 end = doencodes(hunk, aptr, todo);
3556 aptr += todo;
3557 }
3558 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3559 fromlen -= todo;
3560 }
a6ec74c1
JH
3561 break;
3562 }
f337b084
TH
3563 }
3564 *cur = '\0';
b162af07 3565 SvCUR_set(cat, cur - start);
f337b084 3566 no_change:
49704364 3567 *symptr = lookahead;
a6ec74c1 3568 }
49704364 3569 return beglist;
18529408
IZ
3570}
3571#undef NEXTFROM
3572
3573
3574PP(pp_pack)
3575{
97aff369 3576 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3577 register SV *cat = TARG;
3578 STRLEN fromlen;
349d4f2f
NC
3579 SV *pat_sv = *++MARK;
3580 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3581 register const char *patend = pat + fromlen;
18529408
IZ
3582
3583 MARK++;
3584 sv_setpvn(cat, "", 0);
f337b084 3585 SvUTF8_off(cat);
18529408 3586
7accc089 3587 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3588
a6ec74c1
JH
3589 SvSETMAGIC(cat);
3590 SP = ORIGMARK;
3591 PUSHs(cat);
3592 RETURN;
3593}
a6ec74c1 3594
73cb7263
NC
3595/*
3596 * Local variables:
3597 * c-indentation-style: bsd
3598 * c-basic-offset: 4
3599 * indent-tabs-mode: t
3600 * End:
3601 *
37442d52
RGS
3602 * ex: set ts=8 sts=4 sw=4 noet:
3603 */