This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
quiet a few warnings
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
7212898e
NC
325# else
326# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
327# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
328# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
329# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 330# endif
1109a392 331
7212898e 332# if defined(my_htolen) && defined(my_letohn) && \
1109a392 333 defined(my_htoben) && defined(my_betohn)
7212898e 334# define DO_BO_UNPACK_N(var, type) \
1109a392 335 STMT_START { \
66c611c5 336 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
337 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
338 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
339 default: break; \
340 } \
341 } STMT_END
342
7212898e 343# define DO_BO_PACK_N(var, type) \
1109a392 344 STMT_START { \
66c611c5 345 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
346 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
347 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
348 default: break; \
349 } \
350 } STMT_END
7212898e
NC
351# else
352# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
353# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
354# endif
355
f337b084 356#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 357
78d46eaa 358#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 359#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
360#define PACK_SIZE_MASK 0x3F
361
78d46eaa
NC
362/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
363 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
364
365#if TYPE_IS_SHRIEKING != 0x100
366 ++++shriek offset should be 256
367#endif
368
369typedef U8 packprops_t;
78d46eaa
NC
370#if 'J'-'I' == 1
371/* ASCII */
a7a3cfaa
TH
372const packprops_t packprops[512] = {
373 /* normal */
374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0,
379 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 380#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 381 /* D */ LONG_DOUBLESIZE,
80a13697 382#else
a7a3cfaa 383 0,
80a13697 384#endif
a7a3cfaa
TH
385 0,
386 /* F */ NVSIZE,
387 0, 0,
388 /* I */ sizeof(unsigned int),
389 /* J */ UVSIZE,
390 0,
391 /* L */ SIZE32,
392 0,
393 /* N */ SIZE32,
394 0, 0,
80a13697 395#if defined(HAS_QUAD)
a7a3cfaa 396 /* Q */ sizeof(Uquad_t),
80a13697 397#else
a7a3cfaa 398 0,
80a13697 399#endif
a7a3cfaa
TH
400 0,
401 /* S */ SIZE16,
402 0,
403 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
404 /* V */ SIZE32,
405 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 /* c */ sizeof(char),
408 /* d */ sizeof(double),
409 0,
410 /* f */ sizeof(float),
411 0, 0,
412 /* i */ sizeof(int),
413 /* j */ IVSIZE,
414 0,
415 /* l */ SIZE32,
416 0,
417 /* n */ SIZE16,
418 0,
419 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 420#if defined(HAS_QUAD)
a7a3cfaa 421 /* q */ sizeof(Quad_t),
80a13697 422#else
a7a3cfaa 423 0,
80a13697 424#endif
a7a3cfaa
TH
425 0,
426 /* s */ SIZE16,
427 0, 0,
428 /* v */ SIZE16,
429 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0,
439 /* shrieking */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 /* I */ sizeof(unsigned int),
446 0, 0,
447 /* L */ sizeof(unsigned long),
448 0,
7212898e 449#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 450 /* N */ SIZE32,
7212898e 451#else
a7a3cfaa 452 0,
7212898e 453#endif
a7a3cfaa
TH
454 0, 0, 0, 0,
455 /* S */ sizeof(unsigned short),
456 0, 0,
7212898e 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 458 /* V */ SIZE32,
7212898e 459#else
a7a3cfaa 460 0,
7212898e 461#endif
a7a3cfaa
TH
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0,
466 /* l */ sizeof(long),
467 0,
7212898e 468#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 469 /* n */ SIZE16,
7212898e 470#else
a7a3cfaa 471 0,
7212898e 472#endif
a7a3cfaa
TH
473 0, 0, 0, 0,
474 /* s */ sizeof(short),
475 0, 0,
7212898e 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 477 /* v */ SIZE16,
7212898e 478#else
a7a3cfaa 479 0,
7212898e 480#endif
a7a3cfaa
TH
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
490};
491#else
492/* EBCDIC (or bust) */
a7a3cfaa
TH
493const packprops_t packprops[512] = {
494 /* normal */
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0,
504 /* c */ sizeof(char),
505 /* d */ sizeof(double),
506 0,
507 /* f */ sizeof(float),
508 0, 0,
509 /* i */ sizeof(int),
510 0, 0, 0, 0, 0, 0, 0,
511 /* j */ IVSIZE,
512 0,
513 /* l */ SIZE32,
514 0,
515 /* n */ SIZE16,
516 0,
517 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 518#if defined(HAS_QUAD)
a7a3cfaa 519 /* q */ sizeof(Quad_t),
80a13697 520#else
a7a3cfaa 521 0,
80a13697 522#endif
a7a3cfaa
TH
523 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 /* s */ SIZE16,
525 0, 0,
526 /* v */ SIZE16,
527 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 531#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 532 /* D */ LONG_DOUBLESIZE,
80a13697 533#else
a7a3cfaa 534 0,
80a13697 535#endif
a7a3cfaa
TH
536 0,
537 /* F */ NVSIZE,
538 0, 0,
539 /* I */ sizeof(unsigned int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* J */ UVSIZE,
542 0,
543 /* L */ SIZE32,
544 0,
545 /* N */ SIZE32,
546 0, 0,
80a13697 547#if defined(HAS_QUAD)
a7a3cfaa 548 /* Q */ sizeof(Uquad_t),
80a13697 549#else
a7a3cfaa 550 0,
80a13697 551#endif
a7a3cfaa
TH
552 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 /* S */ SIZE16,
554 0,
555 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
556 /* V */ SIZE32,
557 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* shrieking */
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 /* i */ sizeof(int),
571 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 /* l */ sizeof(long),
573 0,
7212898e 574#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 575 /* n */ SIZE16,
7212898e 576#else
a7a3cfaa 577 0,
7212898e 578#endif
a7a3cfaa
TH
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* s */ sizeof(short),
581 0, 0,
7212898e 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 583 /* v */ SIZE16,
7212898e 584#else
a7a3cfaa 585 0,
7212898e 586#endif
a7a3cfaa
TH
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0,
590 /* I */ sizeof(unsigned int),
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* L */ sizeof(unsigned long),
593 0,
7212898e 594#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 595 /* N */ SIZE32,
7212898e 596#else
a7a3cfaa 597 0,
7212898e 598#endif
a7a3cfaa
TH
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* S */ sizeof(unsigned short),
601 0, 0,
7212898e 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 603 /* V */ SIZE32,
7212898e 604#else
a7a3cfaa 605 0,
7212898e 606#endif
a7a3cfaa
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
609};
610#endif
611
08ca2aa3 612STATIC U8
f7fe979e 613uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3
TH
614{
615 UV val;
616 STRLEN retlen;
f337b084
TH
617 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
618 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
619 /* We try to process malformed UTF-8 as much as possible (preferrably with
620 warnings), but these two mean we make no progress in the string and
621 might enter an infinite loop */
622 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
623 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
624 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 625 if (val >= 0x100) {
f337b084 626 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
627 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
628 "Character in '%c' format wrapped in unpack",
f337b084 629 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
630 val &= 0xff;
631 }
632 *s += retlen;
fe2774ed 633 return (U8)val;
08ca2aa3
TH
634}
635
f337b084
TH
636#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
637 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
638 *(U8 *)(s)++)
639
640STATIC bool
f7fe979e 641uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
642{
643 UV val;
644 STRLEN retlen;
f7fe979e 645 const char *from = *s;
08ca2aa3 646 int bad = 0;
f7fe979e 647 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
648 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
649 for (;buf_len > 0; buf_len--) {
650 if (from >= end) return FALSE;
f337b084 651 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
652 if (retlen == (STRLEN) -1 || retlen == 0) {
653 from += UTF8SKIP(from);
654 bad |= 1;
655 } else from += retlen;
656 if (val >= 0x100) {
657 bad |= 2;
658 val &= 0xff;
659 }
fe2774ed 660 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
661 }
662 /* We have enough characters for the buffer. Did we have problems ? */
663 if (bad) {
664 if (bad & 1) {
665 /* Rewalk the string fragment while warning */
f7fe979e 666 const char *ptr;
9e27e96a 667 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
668 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
669 if (ptr >= end) break;
f337b084 670 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
671 }
672 if (from > end) from = end;
673 }
674 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 675 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
676 WARN_PACK : WARN_UNPACK),
677 "Character(s) in '%c' format wrapped in %s",
fc241834 678 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 679 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
680 }
681 *s = from;
682 return TRUE;
683}
684
685STATIC bool
f7fe979e 686next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3
TH
687{
688 UV val;
689 STRLEN retlen;
f337b084 690 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
691 if (val >= 0x100 || !ISUUCHAR(val) ||
692 retlen == (STRLEN) -1 || retlen == 0) {
693 *out = 0;
694 return FALSE;
695 }
696 *out = PL_uudmap[val] & 077;
f337b084 697 *s += retlen;
08ca2aa3
TH
698 return TRUE;
699}
78d46eaa 700
f337b084 701STATIC void
f7fe979e 702bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 703 U8 buffer[UTF8_MAXLEN];
f7fe979e 704 const U8 *end = start + len;
f337b084
TH
705 char *d = *dest;
706 while (start < end) {
f7fe979e 707 const int length =
f337b084
TH
708 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
709 switch(length) {
710 case 1:
711 *d++ = buffer[0];
712 break;
713 case 2:
714 *d++ = buffer[0];
715 *d++ = buffer[1];
716 break;
717 default:
718 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
719 *start, length);
720 }
721 start++;
722 }
723 *dest = d;
724}
725
230e1fce
NC
726#define PUSH_BYTES(utf8, cur, buf, len) \
727STMT_START { \
728 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
729 else { \
730 Copy(buf, cur, len, char); \
731 (cur) += (len); \
732 } \
f337b084
TH
733} STMT_END
734
735#define GROWING(utf8, cat, start, cur, in_len) \
736STMT_START { \
737 STRLEN glen = (in_len); \
3473cf63 738 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
739 if ((cur) + glen >= (start) + SvLEN(cat)) { \
740 (start) = sv_exp_grow(aTHX_ cat, glen); \
741 (cur) = (start) + SvCUR(cat); \
742 } \
743} STMT_END
744
745#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
746STMT_START { \
f7fe979e 747 const STRLEN glen = (in_len); \
f337b084 748 STRLEN gl = glen; \
3473cf63 749 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
750 if ((cur) + gl >= (start) + SvLEN(cat)) { \
751 *cur = '\0'; \
b162af07 752 SvCUR_set((cat), (cur) - (start)); \
f337b084
TH
753 (start) = sv_exp_grow(aTHX_ cat, gl); \
754 (cur) = (start) + SvCUR(cat); \
755 } \
756 PUSH_BYTES(utf8, cur, buf, glen); \
757} STMT_END
758
759#define PUSH_BYTE(utf8, s, byte) \
760STMT_START { \
761 if (utf8) { \
f7fe979e 762 const U8 au8 = (byte); \
f337b084
TH
763 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
764 } else *(U8 *)(s)++ = (byte); \
765} STMT_END
766
767/* Only to be used inside a loop (see the break) */
768#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
769STMT_START { \
770 STRLEN retlen; \
771 if (str >= end) break; \
772 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
773 if (retlen == (STRLEN) -1 || retlen == 0) { \
774 *cur = '\0'; \
775 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
776 } \
777 str += retlen; \
778} STMT_END
779
f7fe979e
AL
780static const char *_action( const tempsym_t* symptr )
781{
782 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
783}
784
206947d2 785/* Returns the sizeof() struct described by pat */
028d1f6d 786STATIC I32
f337b084 787S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 788{
f337b084 789 I32 total = 0;
206947d2 790
49704364 791 while (next_symbol(symptr)) {
f337b084 792 I32 len;
f7fe979e 793 int size;
f337b084
TH
794
795 switch (symptr->howlen) {
fc241834 796 case e_star:
49704364 797 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 798 _action( symptr ) );
49704364 799 break;
f337b084
TH
800 default:
801 /* e_no_len and e_number */
802 len = symptr->length;
803 break;
49704364
WL
804 }
805
a7a3cfaa 806 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 807 if (!size) {
f7fe979e 808 int star;
80a13697
NC
809 /* endianness doesn't influence the size of a type */
810 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
811 default:
812 Perl_croak(aTHX_ "Invalid type '%c' in %s",
813 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 814 _action( symptr ) );
28be1210
TH
815#ifdef PERL_PACK_CAN_SHRIEKSIGN
816 case '.' | TYPE_IS_SHRIEKING:
817 case '@' | TYPE_IS_SHRIEKING:
818#endif
80a13697 819 case '@':
28be1210 820 case '.':
80a13697
NC
821 case '/':
822 case 'U': /* XXXX Is it correct? */
823 case 'w':
824 case 'u':
825 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 826 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 827 _action( symptr ) );
80a13697
NC
828 case '%':
829 size = 0;
830 break;
831 case '(':
fc241834
RGS
832 {
833 tempsym_t savsym = *symptr;
834 symptr->patptr = savsym.grpbeg;
835 symptr->patend = savsym.grpend;
836 /* XXXX Theoretically, we need to measure many times at
837 different positions, since the subexpression may contain
838 alignment commands, but be not of aligned length.
839 Need to detect this and croak(). */
840 size = measure_struct(symptr);
841 *symptr = savsym;
842 break;
843 }
80a13697
NC
844 case 'X' | TYPE_IS_SHRIEKING:
845 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
846 */
847 if (!len) /* Avoid division by 0 */
848 len = 1;
849 len = total % len; /* Assumed: the start is aligned. */
850 /* FALL THROUGH */
851 case 'X':
852 size = -1;
853 if (total < len)
f7fe979e 854 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
855 break;
856 case 'x' | TYPE_IS_SHRIEKING:
857 if (!len) /* Avoid division by 0 */
858 len = 1;
859 star = total % len; /* Assumed: the start is aligned. */
860 if (star) /* Other portable ways? */
861 len = len - star;
862 else
863 len = 0;
864 /* FALL THROUGH */
865 case 'x':
866 case 'A':
867 case 'Z':
868 case 'a':
80a13697
NC
869 size = 1;
870 break;
871 case 'B':
872 case 'b':
873 len = (len + 7)/8;
874 size = 1;
875 break;
876 case 'H':
877 case 'h':
878 len = (len + 1)/2;
879 size = 1;
880 break;
78d46eaa 881
80a13697
NC
882 case 'P':
883 len = 1;
884 size = sizeof(char*);
78d46eaa
NC
885 break;
886 }
206947d2
IZ
887 }
888 total += len * size;
889 }
890 return total;
891}
892
49704364
WL
893
894/* locate matching closing parenthesis or bracket
895 * returns char pointer to char after match, or NULL
896 */
f7fe979e
AL
897STATIC const char *
898S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 899{
49704364 900 while (patptr < patend) {
f7fe979e 901 const char c = *patptr++;
49704364
WL
902
903 if (isSPACE(c))
904 continue;
905 else if (c == ender)
906 return patptr-1;
907 else if (c == '#') {
908 while (patptr < patend && *patptr != '\n')
909 patptr++;
910 continue;
911 } else if (c == '(')
912 patptr = group_end(patptr, patend, ')') + 1;
913 else if (c == '[')
914 patptr = group_end(patptr, patend, ']') + 1;
18529408 915 }
49704364
WL
916 Perl_croak(aTHX_ "No group ending character '%c' found in template",
917 ender);
918 return 0;
18529408
IZ
919}
920
49704364
WL
921
922/* Convert unsigned decimal number to binary.
923 * Expects a pointer to the first digit and address of length variable
924 * Advances char pointer to 1st non-digit char and returns number
fc241834 925 */
f7fe979e
AL
926STATIC const char *
927S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
928{
929 I32 len = *patptr++ - '0';
930 while (isDIGIT(*patptr)) {
931 if (len >= 0x7FFFFFFF/10)
932 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
933 len = (len * 10) + (*patptr++ - '0');
934 }
935 *lenptr = len;
936 return patptr;
937}
938
939/* The marvellous template parsing routine: Using state stored in *symptr,
940 * locates next template code and count
941 */
942STATIC bool
f337b084 943S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 944{
f7fe979e
AL
945 const char* patptr = symptr->patptr;
946 const char* patend = symptr->patend;
49704364
WL
947
948 symptr->flags &= ~FLAG_SLASH;
949
950 while (patptr < patend) {
951 if (isSPACE(*patptr))
952 patptr++;
953 else if (*patptr == '#') {
954 patptr++;
955 while (patptr < patend && *patptr != '\n')
956 patptr++;
957 if (patptr < patend)
958 patptr++;
959 } else {
fc241834 960 /* We should have found a template code */
49704364 961 I32 code = *patptr++ & 0xFF;
66c611c5 962 U32 inherited_modifiers = 0;
49704364
WL
963
964 if (code == ','){ /* grandfather in commas but with a warning */
965 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
966 symptr->flags |= FLAG_COMMA;
967 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 968 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
969 }
970 continue;
971 }
fc241834 972
49704364 973 /* for '(', skip to ')' */
fc241834 974 if (code == '(') {
49704364
WL
975 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
976 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 977 _action( symptr ) );
49704364
WL
978 symptr->grpbeg = patptr;
979 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
980 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
981 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 982 _action( symptr ) );
49704364
WL
983 }
984
66c611c5
MHM
985 /* look for group modifiers to inherit */
986 if (TYPE_ENDIANNESS(symptr->flags)) {
987 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
988 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
989 }
990
1109a392
MHM
991 /* look for modifiers */
992 while (patptr < patend) {
b7787f18
AL
993 const char *allowed;
994 I32 modifier;
1109a392
MHM
995 switch (*patptr) {
996 case '!':
997 modifier = TYPE_IS_SHRIEKING;
7212898e 998 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 999 break;
7212898e 1000#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1001 case '>':
1002 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1004 break;
1005 case '<':
1006 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1008 break;
f337b084 1009#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1010 default:
b7787f18
AL
1011 allowed = "";
1012 modifier = 0;
1109a392
MHM
1013 break;
1014 }
66c611c5 1015
1109a392
MHM
1016 if (modifier == 0)
1017 break;
66c611c5 1018
1109a392
MHM
1019 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1020 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1021 allowed, _action( symptr ) );
66c611c5
MHM
1022
1023 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1024 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1025 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1026 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1027 TYPE_ENDIANNESS_MASK)
1028 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1029 *patptr, _action( symptr ) );
66c611c5 1030
041457d9 1031 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1032 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1033 "Duplicate modifier '%c' after '%c' in %s",
1034 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1035 _action( symptr ) );
1109a392 1036 }
66c611c5 1037
1109a392
MHM
1038 code |= modifier;
1039 patptr++;
49704364
WL
1040 }
1041
66c611c5
MHM
1042 /* inherit modifiers */
1043 code |= inherited_modifiers;
1044
fc241834 1045 /* look for count and/or / */
49704364
WL
1046 if (patptr < patend) {
1047 if (isDIGIT(*patptr)) {
1048 patptr = get_num( patptr, &symptr->length );
1049 symptr->howlen = e_number;
1050
1051 } else if (*patptr == '*') {
1052 patptr++;
1053 symptr->howlen = e_star;
1054
1055 } else if (*patptr == '[') {
f7fe979e 1056 const char* lenptr = ++patptr;
49704364
WL
1057 symptr->howlen = e_number;
1058 patptr = group_end( patptr, patend, ']' ) + 1;
1059 /* what kind of [] is it? */
1060 if (isDIGIT(*lenptr)) {
1061 lenptr = get_num( lenptr, &symptr->length );
1062 if( *lenptr != ']' )
1063 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1064 _action( symptr ) );
49704364
WL
1065 } else {
1066 tempsym_t savsym = *symptr;
1067 symptr->patend = patptr-1;
1068 symptr->patptr = lenptr;
1069 savsym.length = measure_struct(symptr);
1070 *symptr = savsym;
1071 }
1072 } else {
1073 symptr->howlen = e_no_len;
1074 symptr->length = 1;
1075 }
1076
1077 /* try to find / */
1078 while (patptr < patend) {
1079 if (isSPACE(*patptr))
1080 patptr++;
1081 else if (*patptr == '#') {
1082 patptr++;
1083 while (patptr < patend && *patptr != '\n')
1084 patptr++;
1085 if (patptr < patend)
1086 patptr++;
1087 } else {
66c611c5 1088 if (*patptr == '/') {
49704364
WL
1089 symptr->flags |= FLAG_SLASH;
1090 patptr++;
66c611c5
MHM
1091 if (patptr < patend &&
1092 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1093 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1094 _action( symptr ) );
49704364
WL
1095 }
1096 break;
1097 }
18529408 1098 }
49704364
WL
1099 } else {
1100 /* at end - no count, no / */
1101 symptr->howlen = e_no_len;
1102 symptr->length = 1;
1103 }
1104
1105 symptr->code = code;
fc241834 1106 symptr->patptr = patptr;
49704364 1107 return TRUE;
18529408 1108 }
49704364 1109 }
fc241834 1110 symptr->patptr = patptr;
49704364 1111 return FALSE;
18529408
IZ
1112}
1113
18529408 1114/*
fc241834 1115 There is no way to cleanly handle the case where we should process the
08ca2aa3 1116 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1117 (e.g. estimates like strend-s as an upper bound for the number of
1118 characters left wouldn't work). So if we foresee the need of this
1119 (pattern starts with U or contains U0), we want to work on the encoded
1120 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1121 themselves if they need to do a lot of unpacks like this on it
1122*/
fc241834 1123STATIC bool
08ca2aa3
TH
1124need_utf8(const char *pat, const char *patend)
1125{
1126 bool first = TRUE;
1127 while (pat < patend) {
1128 if (pat[0] == '#') {
1129 pat++;
f7fe979e 1130 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1131 if (!pat) return FALSE;
1132 } else if (pat[0] == 'U') {
1133 if (first || pat[1] == '0') return TRUE;
1134 } else first = FALSE;
1135 pat++;
1136 }
1137 return FALSE;
1138}
1139
1140STATIC char
1141first_symbol(const char *pat, const char *patend) {
1142 while (pat < patend) {
1143 if (pat[0] != '#') return pat[0];
1144 pat++;
f7fe979e 1145 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1146 if (!pat) return 0;
1147 pat++;
1148 }
1149 return 0;
1150}
1151
1152/*
18529408
IZ
1153=for apidoc unpack_str
1154
7accc089
JH
1155The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1156and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1157
1158=cut */
1159
1160I32
f7fe979e 1161Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1162{
f7fe979e 1163 tempsym_t sym;
8772537c
AL
1164 PERL_UNUSED_ARG(strbeg);
1165 PERL_UNUSED_ARG(new_s);
1166 PERL_UNUSED_ARG(ocnt);
08ca2aa3 1167
f337b084 1168 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1169 else if (need_utf8(pat, patend)) {
1170 /* We probably should try to avoid this in case a scalar context call
1171 wouldn't get to the "U0" */
1172 STRLEN len = strend - s;
230e1fce 1173 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1174 SAVEFREEPV(s);
1175 strend = s + len;
f337b084 1176 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1177 }
1178
f337b084
TH
1179 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1180 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1181
f7fe979e 1182 TEMPSYM_INIT(&sym, pat, patend, flags);
49704364
WL
1183
1184 return unpack_rec(&sym, s, s, strend, NULL );
1185}
1186
7accc089
JH
1187/*
1188=for apidoc unpackstring
1189
608d3aed
WL
1190The engine implementing unpack() Perl function. C<unpackstring> puts the
1191extracted list items on the stack and returns the number of elements.
1192Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1193
1194=cut */
1195
1196I32
f7fe979e 1197Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1198{
f7fe979e 1199 tempsym_t sym;
08ca2aa3 1200
f337b084 1201 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1202 else if (need_utf8(pat, patend)) {
1203 /* We probably should try to avoid this in case a scalar context call
1204 wouldn't get to the "U0" */
1205 STRLEN len = strend - s;
230e1fce 1206 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1207 SAVEFREEPV(s);
1208 strend = s + len;
f337b084 1209 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1210 }
1211
f337b084
TH
1212 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1213 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1214
f7fe979e 1215 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1216
1217 return unpack_rec(&sym, s, s, strend, NULL );
1218}
1219
49704364
WL
1220STATIC
1221I32
f7fe979e 1222S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1223{
27da23d5 1224 dVAR; dSP;
18529408 1225 SV *sv;
f7fe979e 1226 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1227 howlen_t howlen;
a6ec74c1 1228
a6ec74c1 1229 I32 checksum = 0;
92d41999 1230 UV cuv = 0;
a6ec74c1 1231 NV cdouble = 0.0;
f337b084 1232 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1233 bool beyond = FALSE;
21c16052 1234 bool explicit_length;
9e27e96a 1235 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1236 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1237 symptr->strbeg = s - strbeg;
49704364 1238
49704364 1239 while (next_symbol(symptr)) {
a7a3cfaa 1240 packprops_t props;
9e27e96a 1241 I32 len;
f337b084 1242 I32 datumtype = symptr->code;
206947d2 1243 /* do first one only unless in list context
08ca2aa3 1244 / is implemented by unpacking the count, then popping it from the
206947d2 1245 stack, so must check that we're not in the middle of a / */
49704364 1246 if ( unpack_only_one
206947d2 1247 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1248 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1249 break;
49704364 1250
f337b084 1251 switch (howlen = symptr->howlen) {
fc241834
RGS
1252 case e_star:
1253 len = strend - strbeg; /* long enough */
49704364 1254 break;
f337b084
TH
1255 default:
1256 /* e_no_len and e_number */
1257 len = symptr->length;
1258 break;
49704364 1259 }
18529408 1260
21c16052 1261 explicit_length = TRUE;
a6ec74c1 1262 redo_switch:
49704364 1263 beyond = s >= strend;
a7a3cfaa
TH
1264
1265 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1266 if (props) {
1267 /* props nonzero means we can process this letter. */
9e27e96a
AL
1268 const long size = props & PACK_SIZE_MASK;
1269 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1270 if (len > howmany)
1271 len = howmany;
1272
1273 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1274 if (len && unpack_only_one) len = 1;
1275 EXTEND(SP, len);
1276 EXTEND_MORTAL(len);
78d46eaa
NC
1277 }
1278 }
a7a3cfaa 1279
1109a392 1280 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1281 default:
1109a392 1282 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1283
a6ec74c1 1284 case '%':
49704364 1285 if (howlen == e_no_len)
18529408 1286 len = 16; /* len is not specified */
a6ec74c1 1287 checksum = len;
92d41999 1288 cuv = 0;
a6ec74c1 1289 cdouble = 0;
18529408 1290 continue;
a6ec74c1 1291 break;
18529408
IZ
1292 case '(':
1293 {
49704364 1294 tempsym_t savsym = *symptr;
9e27e96a 1295 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1296 symptr->flags |= group_modifiers;
49704364 1297 symptr->patend = savsym.grpend;
28be1210 1298 symptr->previous = &savsym;
49704364 1299 symptr->level++;
18529408
IZ
1300 PUTBACK;
1301 while (len--) {
49704364 1302 symptr->patptr = savsym.grpbeg;
f337b084
TH
1303 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1304 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1305 unpack_rec(symptr, s, strbeg, strend, &s);
1306 if (s == strend && savsym.howlen == e_star)
49704364 1307 break; /* No way to continue */
18529408
IZ
1308 }
1309 SPAGAIN;
28be1210 1310 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1311 *symptr = savsym;
18529408
IZ
1312 break;
1313 }
28be1210
TH
1314#ifdef PERL_PACK_CAN_SHRIEKSIGN
1315 case '.' | TYPE_IS_SHRIEKING:
1316#endif
1317 case '.': {
9e27e96a 1318 const char *from;
28be1210
TH
1319 SV *sv;
1320#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1321 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1322#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1323 const bool u8 = utf8;
28be1210
TH
1324#endif
1325 if (howlen == e_star) from = strbeg;
1326 else if (len <= 0) from = s;
1327 else {
1328 tempsym_t *group = symptr;
1329
1330 while (--len && group) group = group->previous;
1331 from = group ? strbeg + group->strbeg : strbeg;
1332 }
1333 sv = from <= s ?
00646304
CB
1334 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1335 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1336 XPUSHs(sv_2mortal(sv));
1337 break;
1338 }
1339#ifdef PERL_PACK_CAN_SHRIEKSIGN
1340 case '@' | TYPE_IS_SHRIEKING:
1341#endif
a6ec74c1 1342 case '@':
28be1210
TH
1343 s = strbeg + symptr->strbeg;
1344#ifdef PERL_PACK_CAN_SHRIEKSIGN
1345 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1346#else /* PERL_PACK_CAN_SHRIEKSIGN */
1347 if (utf8)
1348#endif
1349 {
08ca2aa3
TH
1350 while (len > 0) {
1351 if (s >= strend)
1352 Perl_croak(aTHX_ "'@' outside of string in unpack");
1353 s += UTF8SKIP(s);
1354 len--;
1355 }
1356 if (s > strend)
1357 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1358 } else {
28be1210 1359 if (strend-s < len)
fc241834 1360 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1361 s += len;
08ca2aa3 1362 }
a6ec74c1 1363 break;
62f95557
IZ
1364 case 'X' | TYPE_IS_SHRIEKING:
1365 if (!len) /* Avoid division by 0 */
1366 len = 1;
08ca2aa3 1367 if (utf8) {
f7fe979e 1368 const char *hop, *last;
f337b084
TH
1369 I32 l = len;
1370 hop = last = strbeg;
1371 while (hop < s) {
1372 hop += UTF8SKIP(hop);
1373 if (--l == 0) {
08ca2aa3 1374 last = hop;
f337b084
TH
1375 l = len;
1376 }
fc241834 1377 }
f337b084
TH
1378 if (last > s)
1379 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1380 s = last;
1381 break;
f337b084
TH
1382 }
1383 len = (s - strbeg) % len;
62f95557 1384 /* FALL THROUGH */
a6ec74c1 1385 case 'X':
08ca2aa3
TH
1386 if (utf8) {
1387 while (len > 0) {
1388 if (s <= strbeg)
1389 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1390 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1391 if (s <= strbeg)
1392 Perl_croak(aTHX_ "'X' outside of string in unpack");
1393 }
1394 len--;
1395 }
1396 } else {
fc241834
RGS
1397 if (len > s - strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1399 s -= len;
08ca2aa3 1400 }
a6ec74c1 1401 break;
9e27e96a
AL
1402 case 'x' | TYPE_IS_SHRIEKING: {
1403 I32 ai32;
62f95557
IZ
1404 if (!len) /* Avoid division by 0 */
1405 len = 1;
230e1fce
NC
1406 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1407 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1408 if (ai32 == 0) break;
1409 len -= ai32;
9e27e96a 1410 }
62f95557 1411 /* FALL THROUGH */
a6ec74c1 1412 case 'x':
08ca2aa3
TH
1413 if (utf8) {
1414 while (len>0) {
1415 if (s >= strend)
1416 Perl_croak(aTHX_ "'x' outside of string in unpack");
1417 s += UTF8SKIP(s);
1418 len--;
1419 }
1420 } else {
fc241834
RGS
1421 if (len > strend - s)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 s += len;
f337b084 1424 }
a6ec74c1
JH
1425 break;
1426 case '/':
49704364
WL
1427 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1428 break;
a6ec74c1
JH
1429 case 'A':
1430 case 'Z':
1431 case 'a':
08ca2aa3
TH
1432 if (checksum) {
1433 /* Preliminary length estimate is assumed done in 'W' */
1434 if (len > strend - s) len = strend - s;
1435 goto W_checksum;
1436 }
1437 if (utf8) {
1438 I32 l;
f7fe979e 1439 const char *hop;
08ca2aa3
TH
1440 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1441 if (hop >= strend) {
1442 if (hop > strend)
1443 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1444 break;
fc241834 1445 }
a6ec74c1 1446 }
08ca2aa3
TH
1447 if (hop > strend)
1448 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1449 len = hop - s;
1450 } else if (len > strend - s)
1451 len = strend - s;
1452
1453 if (datumtype == 'Z') {
1454 /* 'Z' strips stuff after first null */
f7fe979e 1455 const char *ptr, *end;
f337b084
TH
1456 end = s + len;
1457 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1458 sv = newSVpvn(s, ptr-s);
1459 if (howlen == e_star) /* exact for 'Z*' */
1460 len = ptr-s + (ptr != strend ? 1 : 0);
1461 } else if (datumtype == 'A') {
1462 /* 'A' strips both nulls and spaces */
f7fe979e 1463 const char *ptr;
18bdf90a
TH
1464 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1465 for (ptr = s+len-1; ptr >= s; ptr--)
1466 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1467 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1468 if (ptr >= s) ptr += UTF8SKIP(ptr);
1469 else ptr++;
28be1210 1470 if (ptr > s+len)
18bdf90a
TH
1471 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1472 } else {
1473 for (ptr = s+len-1; ptr >= s; ptr--)
1474 if (*ptr != 0 && !isSPACE(*ptr)) break;
1475 ptr++;
1476 }
08ca2aa3
TH
1477 sv = newSVpvn(s, ptr-s);
1478 } else sv = newSVpvn(s, len);
1479
1480 if (utf8) {
1481 SvUTF8_on(sv);
1482 /* Undo any upgrade done due to need_utf8() */
f337b084 1483 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1484 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1485 }
1486 XPUSHs(sv_2mortal(sv));
08ca2aa3 1487 s += len;
a6ec74c1
JH
1488 break;
1489 case 'B':
08ca2aa3
TH
1490 case 'b': {
1491 char *str;
49704364 1492 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1493 len = (strend - s) * 8;
1494 if (checksum) {
1495 if (!PL_bitcount) {
08ca2aa3 1496 int bits;
a02a5408 1497 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1498 for (bits = 1; bits < 256; bits++) {
1499 if (bits & 1) PL_bitcount[bits]++;
1500 if (bits & 2) PL_bitcount[bits]++;
1501 if (bits & 4) PL_bitcount[bits]++;
1502 if (bits & 8) PL_bitcount[bits]++;
1503 if (bits & 16) PL_bitcount[bits]++;
1504 if (bits & 32) PL_bitcount[bits]++;
1505 if (bits & 64) PL_bitcount[bits]++;
1506 if (bits & 128) PL_bitcount[bits]++;
1507 }
1508 }
f337b084 1509 if (utf8)
08ca2aa3 1510 while (len >= 8 && s < strend) {
f337b084 1511 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1512 len -= 8;
1513 }
f337b084 1514 else
fc241834 1515 while (len >= 8) {
08ca2aa3 1516 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1517 len -= 8;
1518 }
08ca2aa3
TH
1519 if (len && s < strend) {
1520 U8 bits;
f337b084
TH
1521 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1522 if (datumtype == 'b')
a6ec74c1 1523 while (len-- > 0) {
92d41999 1524 if (bits & 1) cuv++;
a6ec74c1
JH
1525 bits >>= 1;
1526 }
f337b084 1527 else
a6ec74c1 1528 while (len-- > 0) {
08ca2aa3 1529 if (bits & 0x80) cuv++;
a6ec74c1
JH
1530 bits <<= 1;
1531 }
fc241834 1532 }
a6ec74c1
JH
1533 break;
1534 }
08ca2aa3
TH
1535
1536 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1537 SvPOK_on(sv);
1538 str = SvPVX(sv);
1539 if (datumtype == 'b') {
f337b084 1540 U8 bits = 0;
f7fe979e 1541 const I32 ai32 = len;
08ca2aa3
TH
1542 for (len = 0; len < ai32; len++) {
1543 if (len & 7) bits >>= 1;
1544 else if (utf8) {
1545 if (s >= strend) break;
f337b084 1546 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1547 } else bits = *(U8 *) s++;
1548 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1549 }
08ca2aa3 1550 } else {
f337b084 1551 U8 bits = 0;
f7fe979e 1552 const I32 ai32 = len;
08ca2aa3
TH
1553 for (len = 0; len < ai32; len++) {
1554 if (len & 7) bits <<= 1;
1555 else if (utf8) {
1556 if (s >= strend) break;
f337b084 1557 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1558 } else bits = *(U8 *) s++;
1559 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1560 }
1561 }
1562 *str = '\0';
aa07b2f6 1563 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1564 XPUSHs(sv);
a6ec74c1 1565 break;
08ca2aa3 1566 }
a6ec74c1 1567 case 'H':
08ca2aa3
TH
1568 case 'h': {
1569 char *str;
fc241834 1570 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1571 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1572 len = (strend - s) * 2;
fc241834 1573 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1574 SvPOK_on(sv);
1575 str = SvPVX(sv);
1576 if (datumtype == 'h') {
f337b084 1577 U8 bits = 0;
9e27e96a 1578 I32 ai32 = len;
fc241834
RGS
1579 for (len = 0; len < ai32; len++) {
1580 if (len & 1) bits >>= 4;
1581 else if (utf8) {
1582 if (s >= strend) break;
f337b084 1583 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1584 } else bits = * (U8 *) s++;
a6ec74c1
JH
1585 *str++ = PL_hexdigit[bits & 15];
1586 }
08ca2aa3 1587 } else {
f337b084 1588 U8 bits = 0;
f7fe979e 1589 const I32 ai32 = len;
08ca2aa3
TH
1590 for (len = 0; len < ai32; len++) {
1591 if (len & 1) bits <<= 4;
1592 else if (utf8) {
1593 if (s >= strend) break;
f337b084 1594 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1595 } else bits = *(U8 *) s++;
a6ec74c1
JH
1596 *str++ = PL_hexdigit[(bits >> 4) & 15];
1597 }
1598 }
1599 *str = '\0';
aa07b2f6 1600 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1601 XPUSHs(sv);
a6ec74c1 1602 break;
08ca2aa3 1603 }
a6ec74c1 1604 case 'c':
73cb7263 1605 while (len-- > 0) {
f337b084 1606 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1607 if (aint >= 128) /* fake up signed chars */
1608 aint -= 256;
08ca2aa3 1609 if (!checksum)
ac7f3b1b 1610 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1611 else if (checksum > bits_in_uv)
1612 cdouble += (NV)aint;
1613 else
1614 cuv += aint;
a6ec74c1
JH
1615 }
1616 break;
1617 case 'C':
08ca2aa3
TH
1618 case 'W':
1619 W_checksum:
35bcd338 1620 if (len == 0) {
fc241834 1621 if (explicit_length && datumtype == 'C')
08ca2aa3 1622 /* Switch to "character" mode */
f337b084 1623 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1624 break;
1625 }
fc241834 1626 if (datumtype == 'C' ?
f337b084
TH
1627 (symptr->flags & FLAG_DO_UTF8) &&
1628 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1629 while (len-- > 0 && s < strend) {
08ca2aa3 1630 STRLEN retlen;
f7fe979e 1631 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1632 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1633 if (retlen == (STRLEN) -1 || retlen == 0)
1634 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1635 s += retlen;
1636 if (!checksum)
1637 PUSHs(sv_2mortal(newSVuv((UV) val)));
1638 else if (checksum > bits_in_uv)
1639 cdouble += (NV) val;
d6d3e8bd 1640 else
08ca2aa3 1641 cuv += val;
fc241834 1642 }
08ca2aa3 1643 } else if (!checksum)
a6ec74c1 1644 while (len-- > 0) {
f7fe979e 1645 const U8 ch = *(U8 *) s++;
08ca2aa3 1646 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1647 }
08ca2aa3
TH
1648 else if (checksum > bits_in_uv)
1649 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1650 else
1651 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1652 break;
1653 case 'U':
35bcd338 1654 if (len == 0) {
08ca2aa3
TH
1655 if (explicit_length) {
1656 /* Switch to "bytes in UTF-8" mode */
f337b084 1657 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1658 else
1659 /* Should be impossible due to the need_utf8() test */
1660 Perl_croak(aTHX_ "U0 mode on a byte string");
1661 }
35bcd338
JH
1662 break;
1663 }
08ca2aa3 1664 if (len > strend - s) len = strend - s;
fc241834 1665 if (!checksum) {
08ca2aa3
TH
1666 if (len && unpack_only_one) len = 1;
1667 EXTEND(SP, len);
1668 EXTEND_MORTAL(len);
fc241834 1669 }
08ca2aa3
TH
1670 while (len-- > 0 && s < strend) {
1671 STRLEN retlen;
1672 UV auv;
1673 if (utf8) {
1674 U8 result[UTF8_MAXLEN];
f7fe979e 1675 const char *ptr = s;
08ca2aa3 1676 STRLEN len;
08ca2aa3
TH
1677 /* Bug: warns about bad utf8 even if we are short on bytes
1678 and will break out of the loop */
230e1fce
NC
1679 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1680 'U'))
08ca2aa3
TH
1681 break;
1682 len = UTF8SKIP(result);
fc241834 1683 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1684 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1685 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1686 s = ptr;
1687 } else {
1688 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1689 if (retlen == (STRLEN) -1 || retlen == 0)
1690 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1691 s += retlen;
1692 }
1693 if (!checksum)
1694 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1695 else if (checksum > bits_in_uv)
08ca2aa3 1696 cdouble += (NV) auv;
73cb7263 1697 else
08ca2aa3 1698 cuv += auv;
a6ec74c1
JH
1699 }
1700 break;
49704364
WL
1701 case 's' | TYPE_IS_SHRIEKING:
1702#if SHORTSIZE != SIZE16
73cb7263 1703 while (len-- > 0) {
08ca2aa3 1704 short ashort;
f337b084
TH
1705 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1706 DO_BO_UNPACK(ashort, s);
08ca2aa3 1707 if (!checksum)
ac7f3b1b 1708 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)ashort;
1711 else
1712 cuv += ashort;
49704364
WL
1713 }
1714 break;
1715#else
1716 /* Fallthrough! */
a6ec74c1 1717#endif
49704364 1718 case 's':
73cb7263 1719 while (len-- > 0) {
08ca2aa3
TH
1720 I16 ai16;
1721
1722#if U16SIZE > SIZE16
1723 ai16 = 0;
1724#endif
f337b084 1725 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1726 DO_BO_UNPACK(ai16, 16);
1109a392 1727#if U16SIZE > SIZE16
73cb7263
NC
1728 if (ai16 > 32767)
1729 ai16 -= 65536;
a6ec74c1 1730#endif
08ca2aa3 1731 if (!checksum)
ac7f3b1b 1732 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1733 else if (checksum > bits_in_uv)
1734 cdouble += (NV)ai16;
1735 else
1736 cuv += ai16;
a6ec74c1
JH
1737 }
1738 break;
49704364
WL
1739 case 'S' | TYPE_IS_SHRIEKING:
1740#if SHORTSIZE != SIZE16
73cb7263 1741 while (len-- > 0) {
08ca2aa3 1742 unsigned short aushort;
f337b084
TH
1743 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1744 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1745 if (!checksum)
1746 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1747 else if (checksum > bits_in_uv)
1748 cdouble += (NV)aushort;
1749 else
1750 cuv += aushort;
49704364
WL
1751 }
1752 break;
1753#else
1754 /* Fallhrough! */
1755#endif
a6ec74c1
JH
1756 case 'v':
1757 case 'n':
1758 case 'S':
73cb7263 1759 while (len-- > 0) {
08ca2aa3
TH
1760 U16 au16;
1761#if U16SIZE > SIZE16
1762 au16 = 0;
1763#endif
f337b084 1764 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1765 DO_BO_UNPACK(au16, 16);
a6ec74c1 1766#ifdef HAS_NTOHS
73cb7263
NC
1767 if (datumtype == 'n')
1768 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1769#endif
1770#ifdef HAS_VTOHS
73cb7263
NC
1771 if (datumtype == 'v')
1772 au16 = vtohs(au16);
a6ec74c1 1773#endif
08ca2aa3
TH
1774 if (!checksum)
1775 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1776 else if (checksum > bits_in_uv)
f337b084 1777 cdouble += (NV) au16;
73cb7263
NC
1778 else
1779 cuv += au16;
a6ec74c1
JH
1780 }
1781 break;
7212898e 1782#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1783 case 'v' | TYPE_IS_SHRIEKING:
1784 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1785 while (len-- > 0) {
08ca2aa3
TH
1786 I16 ai16;
1787# if U16SIZE > SIZE16
1788 ai16 = 0;
1789# endif
f337b084 1790 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1791# ifdef HAS_NTOHS
73cb7263 1792 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1793 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1794# endif /* HAS_NTOHS */
1795# ifdef HAS_VTOHS
73cb7263 1796 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1797 ai16 = (I16) vtohs((U16) ai16);
1798# endif /* HAS_VTOHS */
1799 if (!checksum)
ac7f3b1b 1800 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1801 else if (checksum > bits_in_uv)
08ca2aa3 1802 cdouble += (NV) ai16;
73cb7263
NC
1803 else
1804 cuv += ai16;
068bd2e7
MHM
1805 }
1806 break;
08ca2aa3 1807#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1808 case 'i':
49704364 1809 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1810 while (len-- > 0) {
08ca2aa3 1811 int aint;
f337b084
TH
1812 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1813 DO_BO_UNPACK(aint, i);
08ca2aa3 1814 if (!checksum)
ac7f3b1b 1815 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1816 else if (checksum > bits_in_uv)
1817 cdouble += (NV)aint;
1818 else
1819 cuv += aint;
a6ec74c1
JH
1820 }
1821 break;
1822 case 'I':
49704364 1823 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1824 while (len-- > 0) {
08ca2aa3 1825 unsigned int auint;
f337b084
TH
1826 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1827 DO_BO_UNPACK(auint, i);
08ca2aa3 1828 if (!checksum)
ac7f3b1b 1829 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1830 else if (checksum > bits_in_uv)
1831 cdouble += (NV)auint;
1832 else
1833 cuv += auint;
a6ec74c1
JH
1834 }
1835 break;
92d41999 1836 case 'j':
73cb7263 1837 while (len-- > 0) {
08ca2aa3 1838 IV aiv;
f337b084 1839 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1840#if IVSIZE == INTSIZE
f337b084 1841 DO_BO_UNPACK(aiv, i);
1109a392 1842#elif IVSIZE == LONGSIZE
f337b084 1843 DO_BO_UNPACK(aiv, l);
1109a392 1844#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1845 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1846#else
1847 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1848#endif
08ca2aa3 1849 if (!checksum)
ac7f3b1b 1850 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1851 else if (checksum > bits_in_uv)
1852 cdouble += (NV)aiv;
1853 else
1854 cuv += aiv;
92d41999
JH
1855 }
1856 break;
1857 case 'J':
73cb7263 1858 while (len-- > 0) {
08ca2aa3 1859 UV auv;
f337b084 1860 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1861#if IVSIZE == INTSIZE
f337b084 1862 DO_BO_UNPACK(auv, i);
08ca2aa3 1863#elif IVSIZE == LONGSIZE
f337b084 1864 DO_BO_UNPACK(auv, l);
08ca2aa3 1865#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1866 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1867#else
1868 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1869#endif
08ca2aa3 1870 if (!checksum)
ac7f3b1b 1871 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1872 else if (checksum > bits_in_uv)
1873 cdouble += (NV)auv;
1874 else
1875 cuv += auv;
92d41999
JH
1876 }
1877 break;
49704364
WL
1878 case 'l' | TYPE_IS_SHRIEKING:
1879#if LONGSIZE != SIZE32
73cb7263 1880 while (len-- > 0) {
08ca2aa3 1881 long along;
f337b084
TH
1882 SHIFT_VAR(utf8, s, strend, along, datumtype);
1883 DO_BO_UNPACK(along, l);
08ca2aa3 1884 if (!checksum)
ac7f3b1b 1885 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1886 else if (checksum > bits_in_uv)
1887 cdouble += (NV)along;
1888 else
1889 cuv += along;
49704364
WL
1890 }
1891 break;
1892#else
1893 /* Fallthrough! */
a6ec74c1 1894#endif
49704364 1895 case 'l':
73cb7263 1896 while (len-- > 0) {
08ca2aa3
TH
1897 I32 ai32;
1898#if U32SIZE > SIZE32
1899 ai32 = 0;
1900#endif
f337b084 1901 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1902 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1903#if U32SIZE > SIZE32
08ca2aa3 1904 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1905#endif
08ca2aa3 1906 if (!checksum)
ac7f3b1b 1907 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1908 else if (checksum > bits_in_uv)
1909 cdouble += (NV)ai32;
1910 else
1911 cuv += ai32;
a6ec74c1
JH
1912 }
1913 break;
49704364
WL
1914 case 'L' | TYPE_IS_SHRIEKING:
1915#if LONGSIZE != SIZE32
73cb7263 1916 while (len-- > 0) {
08ca2aa3 1917 unsigned long aulong;
f337b084
TH
1918 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1919 DO_BO_UNPACK(aulong, l);
08ca2aa3 1920 if (!checksum)
ac7f3b1b 1921 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1922 else if (checksum > bits_in_uv)
1923 cdouble += (NV)aulong;
1924 else
1925 cuv += aulong;
49704364
WL
1926 }
1927 break;
1928#else
1929 /* Fall through! */
1930#endif
a6ec74c1
JH
1931 case 'V':
1932 case 'N':
1933 case 'L':
73cb7263 1934 while (len-- > 0) {
08ca2aa3
TH
1935 U32 au32;
1936#if U32SIZE > SIZE32
1937 au32 = 0;
1938#endif
f337b084 1939 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1940 DO_BO_UNPACK(au32, 32);
a6ec74c1 1941#ifdef HAS_NTOHL
73cb7263
NC
1942 if (datumtype == 'N')
1943 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1944#endif
1945#ifdef HAS_VTOHL
73cb7263
NC
1946 if (datumtype == 'V')
1947 au32 = vtohl(au32);
a6ec74c1 1948#endif
08ca2aa3 1949 if (!checksum)
fc241834
RGS
1950 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1951 else if (checksum > bits_in_uv)
1952 cdouble += (NV)au32;
1953 else
1954 cuv += au32;
a6ec74c1
JH
1955 }
1956 break;
7212898e 1957#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1958 case 'V' | TYPE_IS_SHRIEKING:
1959 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1960 while (len-- > 0) {
08ca2aa3
TH
1961 I32 ai32;
1962# if U32SIZE > SIZE32
1963 ai32 = 0;
1964# endif
f337b084 1965 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1966# ifdef HAS_NTOHL
73cb7263
NC
1967 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1968 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1969# endif
1970# ifdef HAS_VTOHL
73cb7263
NC
1971 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1972 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1973# endif
1974 if (!checksum)
ac7f3b1b 1975 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1976 else if (checksum > bits_in_uv)
1977 cdouble += (NV)ai32;
1978 else
1979 cuv += ai32;
068bd2e7
MHM
1980 }
1981 break;
08ca2aa3 1982#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1983 case 'p':
a6ec74c1 1984 while (len-- > 0) {
f7fe979e 1985 const char *aptr;
f337b084 1986 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1987 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1988 /* newSVpv generates undef if aptr is NULL */
1989 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1990 }
1991 break;
1992 case 'w':
a6ec74c1
JH
1993 {
1994 UV auv = 0;
1995 U32 bytes = 0;
fc241834 1996
08ca2aa3
TH
1997 while (len > 0 && s < strend) {
1998 U8 ch;
f337b084 1999 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2000 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2001 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2002 if (ch < 0x80) {
a6ec74c1 2003 bytes = 0;
ac7f3b1b 2004 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2005 len--;
2006 auv = 0;
08ca2aa3 2007 continue;
a6ec74c1 2008 }
08ca2aa3 2009 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2010 const char *t;
a6ec74c1
JH
2011
2012 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2013 while (s < strend) {
f337b084 2014 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2015 sv = mul128(sv, (U8)(ch & 0x7f));
2016 if (!(ch & 0x80)) {
a6ec74c1
JH
2017 bytes = 0;
2018 break;
2019 }
2020 }
10516c54 2021 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2022 while (*t == '0')
2023 t++;
2024 sv_chop(sv, t);
2025 PUSHs(sv_2mortal(sv));
2026 len--;
2027 auv = 0;
2028 }
2029 }
2030 if ((s >= strend) && bytes)
49704364 2031 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2032 }
2033 break;
2034 case 'P':
49704364
WL
2035 if (symptr->howlen == e_star)
2036 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2037 EXTEND(SP, 1);
08ca2aa3
TH
2038 if (sizeof(char*) <= strend - s) {
2039 char *aptr;
f337b084 2040 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2041 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2042 /* newSVpvn generates undef if aptr is NULL */
2043 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2044 }
a6ec74c1
JH
2045 break;
2046#ifdef HAS_QUAD
2047 case 'q':
73cb7263 2048 while (len-- > 0) {
08ca2aa3 2049 Quad_t aquad;
f337b084
TH
2050 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2051 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2052 if (!checksum)
2053 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2054 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2055 else if (checksum > bits_in_uv)
2056 cdouble += (NV)aquad;
2057 else
2058 cuv += aquad;
2059 }
a6ec74c1
JH
2060 break;
2061 case 'Q':
73cb7263 2062 while (len-- > 0) {
08ca2aa3 2063 Uquad_t auquad;
f337b084
TH
2064 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2065 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2066 if (!checksum)
2067 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2068 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2069 else if (checksum > bits_in_uv)
2070 cdouble += (NV)auquad;
2071 else
2072 cuv += auquad;
a6ec74c1
JH
2073 }
2074 break;
08ca2aa3 2075#endif /* HAS_QUAD */
a6ec74c1
JH
2076 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2077 case 'f':
73cb7263 2078 while (len-- > 0) {
08ca2aa3 2079 float afloat;
f337b084 2080 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2081 DO_BO_UNPACK_N(afloat, float);
2082 if (!checksum)
2083 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2084 else
73cb7263 2085 cdouble += afloat;
fc241834 2086 }
a6ec74c1
JH
2087 break;
2088 case 'd':
73cb7263 2089 while (len-- > 0) {
08ca2aa3 2090 double adouble;
f337b084 2091 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2092 DO_BO_UNPACK_N(adouble, double);
2093 if (!checksum)
2094 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2095 else
73cb7263 2096 cdouble += adouble;
fc241834 2097 }
a6ec74c1 2098 break;
92d41999 2099 case 'F':
73cb7263 2100 while (len-- > 0) {
08ca2aa3 2101 NV anv;
f337b084 2102 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2103 DO_BO_UNPACK_N(anv, NV);
2104 if (!checksum)
2105 PUSHs(sv_2mortal(newSVnv(anv)));
2106 else
73cb7263 2107 cdouble += anv;
fc241834 2108 }
92d41999
JH
2109 break;
2110#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2111 case 'D':
73cb7263 2112 while (len-- > 0) {
08ca2aa3 2113 long double aldouble;
f337b084 2114 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2115 DO_BO_UNPACK_N(aldouble, long double);
2116 if (!checksum)
2117 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2118 else
2119 cdouble += aldouble;
92d41999
JH
2120 }
2121 break;
2122#endif
a6ec74c1
JH
2123 case 'u':
2124 /* MKS:
2125 * Initialise the decode mapping. By using a table driven
2126 * algorithm, the code will be character-set independent
2127 * (and just as fast as doing character arithmetic)
2128 */
2129 if (PL_uudmap['M'] == 0) {
2130 int i;
2131
2132 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2133 PL_uudmap[(U8)PL_uuemap[i]] = i;
2134 /*
2135 * Because ' ' and '`' map to the same value,
2136 * we need to decode them both the same.
2137 */
2138 PL_uudmap[' '] = 0;
2139 }
08ca2aa3 2140 {
f7fe979e 2141 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
08ca2aa3
TH
2142 sv = sv_2mortal(NEWSV(42, l));
2143 if (l) SvPOK_on(sv);
2144 }
2145 if (utf8) {
2146 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2147 I32 a, b, c, d;
2148 char hunk[4];
2149
2150 hunk[3] = '\0';
2151 while (len > 0) {
2152 next_uni_uu(aTHX_ &s, strend, &a);
2153 next_uni_uu(aTHX_ &s, strend, &b);
2154 next_uni_uu(aTHX_ &s, strend, &c);
2155 next_uni_uu(aTHX_ &s, strend, &d);
2156 hunk[0] = (char)((a << 2) | (b >> 4));
2157 hunk[1] = (char)((b << 4) | (c >> 2));
2158 hunk[2] = (char)((c << 6) | d);
2159 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2160 len -= 3;
2161 }
2162 if (s < strend) {
f7fe979e
AL
2163 if (*s == '\n') {
2164 s++;
2165 }
08ca2aa3
TH
2166 else {
2167 /* possible checksum byte */
f7fe979e
AL
2168 const char *skip = s+UTF8SKIP(s);
2169 if (skip < strend && *skip == '\n')
2170 s = skip+1;
08ca2aa3
TH
2171 }
2172 }
2173 }
2174 } else {
fc241834
RGS
2175 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2176 I32 a, b, c, d;
2177 char hunk[4];
a6ec74c1 2178
fc241834
RGS
2179 hunk[3] = '\0';
2180 len = PL_uudmap[*(U8*)s++] & 077;
2181 while (len > 0) {
2182 if (s < strend && ISUUCHAR(*s))
2183 a = PL_uudmap[*(U8*)s++] & 077;
2184 else
2185 a = 0;
2186 if (s < strend && ISUUCHAR(*s))
2187 b = PL_uudmap[*(U8*)s++] & 077;
2188 else
2189 b = 0;
2190 if (s < strend && ISUUCHAR(*s))
2191 c = PL_uudmap[*(U8*)s++] & 077;
2192 else
2193 c = 0;
2194 if (s < strend && ISUUCHAR(*s))
2195 d = PL_uudmap[*(U8*)s++] & 077;
2196 else
2197 d = 0;
2198 hunk[0] = (char)((a << 2) | (b >> 4));
2199 hunk[1] = (char)((b << 4) | (c >> 2));
2200 hunk[2] = (char)((c << 6) | d);
2201 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2202 len -= 3;
2203 }
2204 if (*s == '\n')
2205 s++;
2206 else /* possible checksum byte */
2207 if (s + 1 < strend && s[1] == '\n')
2208 s += 2;
a6ec74c1 2209 }
08ca2aa3
TH
2210 }
2211 XPUSHs(sv);
a6ec74c1
JH
2212 break;
2213 }
49704364 2214
a6ec74c1 2215 if (checksum) {
1109a392 2216 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2217 (checksum > bits_in_uv &&
08ca2aa3
TH
2218 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2219 NV trouble, anv;
a6ec74c1 2220
08ca2aa3 2221 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2222 while (checksum >= 16) {
2223 checksum -= 16;
08ca2aa3 2224 anv *= 65536.0;
a6ec74c1 2225 }
a6ec74c1 2226 while (cdouble < 0.0)
08ca2aa3
TH
2227 cdouble += anv;
2228 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2229 sv = newSVnv(cdouble);
a6ec74c1
JH
2230 }
2231 else {
fa8ec7c1
NC
2232 if (checksum < bits_in_uv) {
2233 UV mask = ((UV)1 << checksum) - 1;
92d41999 2234 cuv &= mask;
a6ec74c1 2235 }
c4c5f44a 2236 sv = newSVuv(cuv);
a6ec74c1
JH
2237 }
2238 XPUSHs(sv_2mortal(sv));
2239 checksum = 0;
2240 }
fc241834 2241
49704364
WL
2242 if (symptr->flags & FLAG_SLASH){
2243 if (SP - PL_stack_base - start_sp_offset <= 0)
2244 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2245 if( next_symbol(symptr) ){
2246 if( symptr->howlen == e_number )
2247 Perl_croak(aTHX_ "Count after length/code in unpack" );
2248 if( beyond ){
2249 /* ...end of char buffer then no decent length available */
2250 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2251 } else {
2252 /* take top of stack (hope it's numeric) */
2253 len = POPi;
2254 if( len < 0 )
2255 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2256 }
2257 } else {
2258 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2259 }
2260 datumtype = symptr->code;
21c16052 2261 explicit_length = FALSE;
49704364
WL
2262 goto redo_switch;
2263 }
a6ec74c1 2264 }
49704364 2265
18529408
IZ
2266 if (new_s)
2267 *new_s = s;
2268 PUTBACK;
2269 return SP - PL_stack_base - start_sp_offset;
2270}
2271
2272PP(pp_unpack)
2273{
2274 dSP;
bab9c0ac 2275 dPOPPOPssrl;
18529408
IZ
2276 I32 gimme = GIMME_V;
2277 STRLEN llen;
2278 STRLEN rlen;
5c144d81
NC
2279 const char *pat = SvPV_const(left, llen);
2280 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2281 const char *strend = s + rlen;
2282 const char *patend = pat + llen;
08ca2aa3 2283 I32 cnt;
18529408
IZ
2284
2285 PUTBACK;
7accc089 2286 cnt = unpackstring(pat, patend, s, strend,
49704364 2287 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2288 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2289
18529408
IZ
2290 SPAGAIN;
2291 if ( !cnt && gimme == G_SCALAR )
2292 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2293 RETURN;
2294}
2295
f337b084 2296STATIC U8 *
f7fe979e 2297doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2298{
f337b084 2299 *h++ = PL_uuemap[len];
a6ec74c1 2300 while (len > 2) {
f337b084
TH
2301 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2302 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2303 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2304 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2305 s += 3;
2306 len -= 3;
2307 }
2308 if (len > 0) {
f7fe979e 2309 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2310 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2311 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2312 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2313 *h++ = PL_uuemap[0];
a6ec74c1 2314 }
f337b084
TH
2315 *h++ = '\n';
2316 return h;
a6ec74c1
JH
2317}
2318
2319STATIC SV *
f7fe979e 2320S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2321{
8b6e33c7
AL
2322 SV *result = newSVpvn(s, l);
2323 char *const result_c = SvPV_nolen(result); /* convenience */
2324 char *out = result_c;
2325 bool skip = 1;
2326 bool ignore = 0;
a6ec74c1
JH
2327
2328 while (*s) {
2329 switch (*s) {
2330 case ' ':
2331 break;
2332 case '+':
2333 if (!skip) {
2334 SvREFCNT_dec(result);
2335 return (NULL);
2336 }
2337 break;
2338 case '0':
2339 case '1':
2340 case '2':
2341 case '3':
2342 case '4':
2343 case '5':
2344 case '6':
2345 case '7':
2346 case '8':
2347 case '9':
2348 skip = 0;
2349 if (!ignore) {
2350 *(out++) = *s;
2351 }
2352 break;
2353 case '.':
2354 ignore = 1;
2355 break;
2356 default:
2357 SvREFCNT_dec(result);
2358 return (NULL);
2359 }
2360 s++;
2361 }
2362 *(out++) = '\0';
2363 SvCUR_set(result, out - result_c);
2364 return (result);
2365}
2366
2367/* pnum must be '\0' terminated */
2368STATIC int
2369S_div128(pTHX_ SV *pnum, bool *done)
2370{
8b6e33c7
AL
2371 STRLEN len;
2372 char * const s = SvPV(pnum, len);
2373 char *t = s;
2374 int m = 0;
2375
2376 *done = 1;
2377 while (*t) {
2378 const int i = m * 10 + (*t - '0');
2379 const int r = (i >> 7); /* r < 10 */
2380 m = i & 0x7F;
2381 if (r) {
2382 *done = 0;
2383 }
2384 *(t++) = '0' + r;
a6ec74c1 2385 }
8b6e33c7
AL
2386 *(t++) = '\0';
2387 SvCUR_set(pnum, (STRLEN) (t - s));
2388 return (m);
a6ec74c1
JH
2389}
2390
18529408
IZ
2391/*
2392=for apidoc pack_cat
2393
7accc089
JH
2394The engine implementing pack() Perl function. Note: parameters next_in_list and
2395flags are not used. This call should not be used; use packlist instead.
18529408 2396
bfce84ec
AL
2397=cut
2398*/
49704364 2399
18529408 2400void
f7fe979e 2401Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2402{
aadb217d 2403 tempsym_t sym;
8772537c
AL
2404 PERL_UNUSED_ARG(next_in_list);
2405 PERL_UNUSED_ARG(flags);
7accc089 2406
f7fe979e 2407 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2408
7accc089
JH
2409 (void)pack_rec( cat, &sym, beglist, endlist );
2410}
2411
2412
2413/*
2414=for apidoc packlist
2415
2416The engine implementing pack() Perl function.
2417
bfce84ec
AL
2418=cut
2419*/
7accc089
JH
2420
2421void
f7fe979e 2422Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2423{
f337b084 2424 STRLEN no_len;
aadb217d
JH
2425 tempsym_t sym;
2426
f7fe979e 2427 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2428
f337b084
TH
2429 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2430 Also make sure any UTF8 flag is loaded */
2431 SvPV_force(cat, no_len);
bfce84ec
AL
2432 if (DO_UTF8(cat))
2433 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2434
49704364
WL
2435 (void)pack_rec( cat, &sym, beglist, endlist );
2436}
2437
f337b084
TH
2438/* like sv_utf8_upgrade, but also repoint the group start markers */
2439STATIC void
2440marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2441 STRLEN len;
2442 tempsym_t *group;
f7fe979e
AL
2443 const char *from_ptr, *from_start, *from_end, **marks, **m;
2444 char *to_start, *to_ptr;
f337b084
TH
2445
2446 if (SvUTF8(sv)) return;
2447
aa07b2f6 2448 from_start = SvPVX_const(sv);
f337b084
TH
2449 from_end = from_start + SvCUR(sv);
2450 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2451 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2452 if (from_ptr == from_end) {
2453 /* Simple case: no character needs to be changed */
2454 SvUTF8_on(sv);
2455 return;
2456 }
2457
3473cf63 2458 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2459 Newx(to_start, len, char);
f337b084
TH
2460 Copy(from_start, to_start, from_ptr-from_start, char);
2461 to_ptr = to_start + (from_ptr-from_start);
2462
a02a5408 2463 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2464 for (group=sym_ptr; group; group = group->previous)
2465 marks[group->level] = from_start + group->strbeg;
2466 marks[sym_ptr->level+1] = from_end+1;
2467 for (m = marks; *m < from_ptr; m++)
2468 *m = to_start + (*m-from_start);
2469
2470 for (;from_ptr < from_end; from_ptr++) {
2471 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2472 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2473 }
2474 *to_ptr = 0;
2475
2476 while (*m == from_ptr) *m++ = to_ptr;
2477 if (m != marks + sym_ptr->level+1) {
2478 Safefree(marks);
2479 Safefree(to_start);
2480 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2481 }
2482 for (group=sym_ptr; group; group = group->previous)
2483 group->strbeg = marks[group->level] - to_start;
2484 Safefree(marks);
2485
2486 if (SvOOK(sv)) {
2487 if (SvIVX(sv)) {
b162af07 2488 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2489 from_start -= SvIVX(sv);
2490 SvIV_set(sv, 0);
2491 }
2492 SvFLAGS(sv) &= ~SVf_OOK;
2493 }
2494 if (SvLEN(sv) != 0)
2495 Safefree(from_start);
f880fe2f 2496 SvPV_set(sv, to_start);
b162af07
SP
2497 SvCUR_set(sv, to_ptr - to_start);
2498 SvLEN_set(sv, len);
f337b084
TH
2499 SvUTF8_on(sv);
2500}
2501
2502/* Exponential string grower. Makes string extension effectively O(n)
2503 needed says how many extra bytes we need (not counting the final '\0')
2504 Only grows the string if there is an actual lack of space
2505*/
2506STATIC char *
2507sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2508 const STRLEN cur = SvCUR(sv);
2509 const STRLEN len = SvLEN(sv);
f337b084
TH
2510 STRLEN extend;
2511 if (len - cur > needed) return SvPVX(sv);
2512 extend = needed > len ? needed : len;
2513 return SvGROW(sv, len+extend+1);
2514}
49704364
WL
2515
2516STATIC
2517SV **
f337b084 2518S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2519{
49704364 2520 tempsym_t lookahead;
f337b084
TH
2521 I32 items = endlist - beglist;
2522 bool found = next_symbol(symptr);
2523 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2524 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2525
2526 if (symptr->level == 0 && found && symptr->code == 'U') {
2527 marked_upgrade(aTHX_ cat, symptr);
2528 symptr->flags |= FLAG_DO_UTF8;
2529 utf8 = 0;
49704364 2530 }
f337b084 2531 symptr->strbeg = SvCUR(cat);
49704364
WL
2532
2533 while (found) {
f337b084
TH
2534 SV *fromstr;
2535 STRLEN fromlen;
2536 I32 len;
a6ec74c1 2537 SV *lengthcode = Nullsv;
49704364 2538 I32 datumtype = symptr->code;
f337b084
TH
2539 howlen_t howlen = symptr->howlen;
2540 char *start = SvPVX(cat);
2541 char *cur = start + SvCUR(cat);
49704364 2542
f337b084
TH
2543#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2544
2545 switch (howlen) {
fc241834 2546 case e_star:
f337b084
TH
2547 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2548 0 : items;
2549 break;
2550 default:
2551 /* e_no_len and e_number */
2552 len = symptr->length;
49704364
WL
2553 break;
2554 }
2555
f337b084 2556 if (len) {
a7a3cfaa 2557 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2558
a7a3cfaa
TH
2559 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2560 /* We can process this letter. */
2561 STRLEN size = props & PACK_SIZE_MASK;
2562 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2563 }
f337b084
TH
2564 }
2565
49704364
WL
2566 /* Look ahead for next symbol. Do we have code/code? */
2567 lookahead = *symptr;
2568 found = next_symbol(&lookahead);
246f24af
TH
2569 if (symptr->flags & FLAG_SLASH) {
2570 IV count;
f337b084 2571 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2572 if (strchr("aAZ", lookahead.code)) {
2573 if (lookahead.howlen == e_number) count = lookahead.length;
2574 else {
2575 if (items > 0)
2576 count = DO_UTF8(*beglist) ?
2577 sv_len_utf8(*beglist) : sv_len(*beglist);
2578 else count = 0;
2579 if (lookahead.code == 'Z') count++;
2580 }
2581 } else {
2582 if (lookahead.howlen == e_number && lookahead.length < items)
2583 count = lookahead.length;
2584 else count = items;
2585 }
2586 lookahead.howlen = e_number;
2587 lookahead.length = count;
2588 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2589 }
49704364 2590
fc241834
RGS
2591 /* Code inside the switch must take care to properly update
2592 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2593 doesn't simply leave using break */
1109a392 2594 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2595 default:
f337b084
TH
2596 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2597 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2598 case '%':
49704364 2599 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2600 {
2601 char *from;
2602#ifdef PERL_PACK_CAN_SHRIEKSIGN
2603 case '.' | TYPE_IS_SHRIEKING:
2604#endif
2605 case '.':
2606 if (howlen == e_star) from = start;
2607 else if (len == 0) from = cur;
2608 else {
2609 tempsym_t *group = symptr;
2610
2611 while (--len && group) group = group->previous;
2612 from = group ? start + group->strbeg : start;
2613 }
2614 fromstr = NEXTFROM;
2615 len = SvIV(fromstr);
2616 goto resize;
2617#ifdef PERL_PACK_CAN_SHRIEKSIGN
2618 case '@' | TYPE_IS_SHRIEKING:
2619#endif
a6ec74c1 2620 case '@':
28be1210
TH
2621 from = start + symptr->strbeg;
2622 resize:
2623#ifdef PERL_PACK_CAN_SHRIEKSIGN
2624 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2625#else /* PERL_PACK_CAN_SHRIEKSIGN */
2626 if (utf8)
2627#endif
2628 if (len >= 0) {
2629 while (len && from < cur) {
2630 from += UTF8SKIP(from);
2631 len--;
2632 }
2633 if (from > cur)
2634 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2635 if (len) {
2636 /* Here we know from == cur */
2637 grow:
2638 GROWING(0, cat, start, cur, len);
2639 Zero(cur, len, char);
2640 cur += len;
2641 } else if (from < cur) {
2642 len = cur - from;
2643 goto shrink;
2644 } else goto no_change;
2645 } else {
2646 cur = from;
2647 len = -len;
2648 goto utf8_shrink;
f337b084 2649 }
28be1210
TH
2650 else {
2651 len -= cur - from;
f337b084 2652 if (len > 0) goto grow;
28be1210 2653 if (len == 0) goto no_change;
fc241834 2654 len = -len;
28be1210 2655 goto shrink;
f337b084 2656 }
a6ec74c1 2657 break;
28be1210 2658 }
fc241834 2659 case '(': {
49704364 2660 tempsym_t savsym = *symptr;
66c611c5
MHM
2661 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2662 symptr->flags |= group_modifiers;
49704364
WL
2663 symptr->patend = savsym.grpend;
2664 symptr->level++;
f337b084 2665 symptr->previous = &lookahead;
18529408 2666 while (len--) {
f337b084
TH
2667 U32 was_utf8;
2668 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2669 else symptr->flags &= ~FLAG_PARSE_UTF8;
2670 was_utf8 = SvUTF8(cat);
49704364 2671 symptr->patptr = savsym.grpbeg;
f337b084
TH
2672 beglist = pack_rec(cat, symptr, beglist, endlist);
2673 if (SvUTF8(cat) != was_utf8)
2674 /* This had better be an upgrade while in utf8==0 mode */
2675 utf8 = 1;
2676
49704364 2677 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2678 break; /* No way to continue */
2679 }
f337b084
TH
2680 lookahead.flags = symptr->flags & ~group_modifiers;
2681 goto no_change;
18529408 2682 }
62f95557
IZ
2683 case 'X' | TYPE_IS_SHRIEKING:
2684 if (!len) /* Avoid division by 0 */
2685 len = 1;
f337b084
TH
2686 if (utf8) {
2687 char *hop, *last;
2688 I32 l = len;
2689 hop = last = start;
2690 while (hop < cur) {
2691 hop += UTF8SKIP(hop);
2692 if (--l == 0) {
2693 last = hop;
2694 l = len;
2695 }
2696 }
2697 if (last > cur)
2698 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2699 cur = last;
2700 break;
2701 }
2702 len = (cur-start) % len;
62f95557 2703 /* FALL THROUGH */
a6ec74c1 2704 case 'X':
f337b084
TH
2705 if (utf8) {
2706 if (len < 1) goto no_change;
28be1210 2707 utf8_shrink:
f337b084
TH
2708 while (len > 0) {
2709 if (cur <= start)
28be1210
TH
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2712 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2713 if (cur <= start)
28be1210
TH
2714 Perl_croak(aTHX_ "'%c' outside of string in pack",
2715 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2716 }
2717 len--;
2718 }
2719 } else {
fc241834 2720 shrink:
f337b084 2721 if (cur - start < len)
28be1210
TH
2722 Perl_croak(aTHX_ "'%c' outside of string in pack",
2723 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2724 cur -= len;
2725 }
2726 if (cur < start+symptr->strbeg) {
2727 /* Make sure group starts don't point into the void */
2728 tempsym_t *group;
9e27e96a 2729 const STRLEN length = cur-start;
f337b084
TH
2730 for (group = symptr;
2731 group && length < group->strbeg;
2732 group = group->previous) group->strbeg = length;
2733 lookahead.strbeg = length;
2734 }
a6ec74c1 2735 break;
fc241834
RGS
2736 case 'x' | TYPE_IS_SHRIEKING: {
2737 I32 ai32;
62f95557
IZ
2738 if (!len) /* Avoid division by 0 */
2739 len = 1;
230e1fce 2740 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2741 else ai32 = (cur - start) % len;
2742 if (ai32 == 0) goto no_change;
2743 len -= ai32;
2744 }
2745 /* FALL THROUGH */
a6ec74c1 2746 case 'x':
f337b084 2747 goto grow;
a6ec74c1
JH
2748 case 'A':
2749 case 'Z':
f337b084 2750 case 'a': {
f7fe979e 2751 const char *aptr;
f337b084 2752
a6ec74c1 2753 fromstr = NEXTFROM;
e62f0680 2754 aptr = SvPV_const(fromstr, fromlen);
f337b084 2755 if (DO_UTF8(fromstr)) {
f7fe979e 2756 const char *end, *s;
f337b084
TH
2757
2758 if (!utf8 && !SvUTF8(cat)) {
2759 marked_upgrade(aTHX_ cat, symptr);
2760 lookahead.flags |= FLAG_DO_UTF8;
2761 lookahead.strbeg = symptr->strbeg;
2762 utf8 = 1;
2763 start = SvPVX(cat);
2764 cur = start + SvCUR(cat);
2765 }
fc241834 2766 if (howlen == e_star) {
f337b084
TH
2767 if (utf8) goto string_copy;
2768 len = fromlen+1;
2769 }
2770 s = aptr;
2771 end = aptr + fromlen;
2772 fromlen = datumtype == 'Z' ? len-1 : len;
2773 while ((I32) fromlen > 0 && s < end) {
2774 s += UTF8SKIP(s);
2775 fromlen--;
2776 }
2777 if (s > end)
2778 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2779 if (utf8) {
fc241834 2780 len = fromlen;
f337b084
TH
2781 if (datumtype == 'Z') len++;
2782 fromlen = s-aptr;
2783 len += fromlen;
fc241834 2784
f337b084 2785 goto string_copy;
fc241834 2786 }
f337b084
TH
2787 fromlen = len - fromlen;
2788 if (datumtype == 'Z') fromlen--;
2789 if (howlen == e_star) {
2790 len = fromlen;
2791 if (datumtype == 'Z') len++;
fc241834 2792 }
f337b084 2793 GROWING(0, cat, start, cur, len);
fc241834 2794 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2795 datumtype | TYPE_IS_PACK))
2796 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2797 cur += fromlen;
a6ec74c1 2798 len -= fromlen;
f337b084
TH
2799 } else if (utf8) {
2800 if (howlen == e_star) {
2801 len = fromlen;
2802 if (datumtype == 'Z') len++;
a6ec74c1 2803 }
f337b084
TH
2804 if (len <= (I32) fromlen) {
2805 fromlen = len;
2806 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2807 }
fc241834 2808 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2809 upgrade, so:
2810 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2811 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2812 len -= fromlen;
2813 while (fromlen > 0) {
230e1fce 2814 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2815 aptr++;
2816 fromlen--;
fc241834 2817 }
f337b084
TH
2818 } else {
2819 string_copy:
2820 if (howlen == e_star) {
2821 len = fromlen;
2822 if (datumtype == 'Z') len++;
2823 }
2824 if (len <= (I32) fromlen) {
2825 fromlen = len;
2826 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2827 }
f337b084
TH
2828 GROWING(0, cat, start, cur, len);
2829 Copy(aptr, cur, fromlen, char);
2830 cur += fromlen;
2831 len -= fromlen;
a6ec74c1 2832 }
f337b084
TH
2833 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2834 cur += len;
a6ec74c1 2835 break;
f337b084 2836 }
a6ec74c1 2837 case 'B':
f337b084 2838 case 'b': {
b83604b4 2839 const char *str, *end;
f337b084
TH
2840 I32 l, field_len;
2841 U8 bits;
2842 bool utf8_source;
2843 U32 utf8_flags;
a6ec74c1 2844
fc241834 2845 fromstr = NEXTFROM;
b83604b4 2846 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2847 end = str + fromlen;
2848 if (DO_UTF8(fromstr)) {
2849 utf8_source = TRUE;
041457d9 2850 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2851 } else {
2852 utf8_source = FALSE;
2853 utf8_flags = 0; /* Unused, but keep compilers happy */
2854 }
2855 if (howlen == e_star) len = fromlen;
2856 field_len = (len+7)/8;
2857 GROWING(utf8, cat, start, cur, field_len);
2858 if (len > (I32)fromlen) len = fromlen;
2859 bits = 0;
2860 l = 0;
2861 if (datumtype == 'B')
2862 while (l++ < len) {
2863 if (utf8_source) {
2864 UV val;
2865 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2866 bits |= val & 1;
2867 } else bits |= *str++ & 1;
2868 if (l & 7) bits <<= 1;
fc241834 2869 else {
f337b084
TH
2870 PUSH_BYTE(utf8, cur, bits);
2871 bits = 0;
a6ec74c1
JH
2872 }
2873 }
f337b084
TH
2874 else
2875 /* datumtype == 'b' */
2876 while (l++ < len) {
2877 if (utf8_source) {
2878 UV val;
2879 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2880 if (val & 1) bits |= 0x80;
2881 } else if (*str++ & 1)
2882 bits |= 0x80;
2883 if (l & 7) bits >>= 1;
fc241834 2884 else {
f337b084
TH
2885 PUSH_BYTE(utf8, cur, bits);
2886 bits = 0;
a6ec74c1
JH
2887 }
2888 }
f337b084
TH
2889 l--;
2890 if (l & 7) {
fc241834 2891 if (datumtype == 'B')
f337b084 2892 bits <<= 7 - (l & 7);
fc241834 2893 else
f337b084
TH
2894 bits >>= 7 - (l & 7);
2895 PUSH_BYTE(utf8, cur, bits);
2896 l += 7;
a6ec74c1 2897 }
f337b084
TH
2898 /* Determine how many chars are left in the requested field */
2899 l /= 8;
2900 if (howlen == e_star) field_len = 0;
2901 else field_len -= l;
2902 Zero(cur, field_len, char);
2903 cur += field_len;
a6ec74c1 2904 break;
f337b084 2905 }
a6ec74c1 2906 case 'H':
f337b084 2907 case 'h': {
10516c54 2908 const char *str, *end;
f337b084
TH
2909 I32 l, field_len;
2910 U8 bits;
2911 bool utf8_source;
2912 U32 utf8_flags;
a6ec74c1 2913
fc241834 2914 fromstr = NEXTFROM;
10516c54 2915 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2916 end = str + fromlen;
2917 if (DO_UTF8(fromstr)) {
2918 utf8_source = TRUE;
041457d9 2919 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2920 } else {
2921 utf8_source = FALSE;
2922 utf8_flags = 0; /* Unused, but keep compilers happy */
2923 }
2924 if (howlen == e_star) len = fromlen;
2925 field_len = (len+1)/2;
2926 GROWING(utf8, cat, start, cur, field_len);
2927 if (!utf8 && len > (I32)fromlen) len = fromlen;
2928 bits = 0;
2929 l = 0;
2930 if (datumtype == 'H')
2931 while (l++ < len) {
2932 if (utf8_source) {
2933 UV val;
2934 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2935 if (val < 256 && isALPHA(val))
2936 bits |= (val + 9) & 0xf;
a6ec74c1 2937 else
f337b084
TH
2938 bits |= val & 0xf;
2939 } else if (isALPHA(*str))
2940 bits |= (*str++ + 9) & 0xf;
2941 else
2942 bits |= *str++ & 0xf;
2943 if (l & 1) bits <<= 4;
fc241834 2944 else {
f337b084
TH
2945 PUSH_BYTE(utf8, cur, bits);
2946 bits = 0;
a6ec74c1
JH
2947 }
2948 }
f337b084
TH
2949 else
2950 while (l++ < len) {
2951 if (utf8_source) {
2952 UV val;
2953 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2954 if (val < 256 && isALPHA(val))
2955 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2956 else
f337b084
TH
2957 bits |= (val & 0xf) << 4;
2958 } else if (isALPHA(*str))
2959 bits |= ((*str++ + 9) & 0xf) << 4;
2960 else
2961 bits |= (*str++ & 0xf) << 4;
2962 if (l & 1) bits >>= 4;
fc241834 2963 else {
f337b084
TH
2964 PUSH_BYTE(utf8, cur, bits);
2965 bits = 0;
a6ec74c1 2966 }
fc241834 2967 }
f337b084
TH
2968 l--;
2969 if (l & 1) {
2970 PUSH_BYTE(utf8, cur, bits);
2971 l++;
2972 }
2973 /* Determine how many chars are left in the requested field */
2974 l /= 2;
2975 if (howlen == e_star) field_len = 0;
2976 else field_len -= l;
2977 Zero(cur, field_len, char);
2978 cur += field_len;
2979 break;
fc241834
RGS
2980 }
2981 case 'c':
f337b084
TH
2982 while (len-- > 0) {
2983 IV aiv;
2984 fromstr = NEXTFROM;
2985 aiv = SvIV(fromstr);
2986 if ((-128 > aiv || aiv > 127) &&
2987 ckWARN(WARN_PACK))
2988 Perl_warner(aTHX_ packWARN(WARN_PACK),
2989 "Character in 'c' format wrapped in pack");
2990 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2991 }
2992 break;
2993 case 'C':
f337b084
TH
2994 if (len == 0) {
2995 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2996 break;
2997 }
2998 GROWING(0, cat, start, cur, len);
a6ec74c1 2999 while (len-- > 0) {
f337b084 3000 IV aiv;
a6ec74c1 3001 fromstr = NEXTFROM;
f337b084
TH
3002 aiv = SvIV(fromstr);
3003 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
3004 ckWARN(WARN_PACK))
3005 Perl_warner(aTHX_ packWARN(WARN_PACK),
3006 "Character in 'C' format wrapped in pack");
f337b084
TH
3007 *cur++ = aiv & 0xff;
3008 }
fc241834
RGS
3009 break;
3010 case 'W': {
3011 char *end;
3012 U8 in_bytes = IN_BYTES;
3013
3014 end = start+SvLEN(cat)-1;
3015 if (utf8) end -= UTF8_MAXLEN-1;
3016 while (len-- > 0) {
3017 UV auv;
3018 fromstr = NEXTFROM;
3019 auv = SvUV(fromstr);
3020 if (in_bytes) auv = auv % 0x100;
3021 if (utf8) {
3022 W_utf8:
3023 if (cur > end) {
3024 *cur = '\0';
b162af07 3025 SvCUR_set(cat, cur - start);
fc241834
RGS
3026
3027 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3028 end = start+SvLEN(cat)-UTF8_MAXLEN;
3029 }
230e1fce
NC
3030 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3031 NATIVE_TO_UNI(auv),
041457d9 3032 warn_utf8 ?
230e1fce 3033 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3034 } else {
3035 if (auv >= 0x100) {
3036 if (!SvUTF8(cat)) {
3037 *cur = '\0';
b162af07 3038 SvCUR_set(cat, cur - start);
fc241834
RGS
3039 marked_upgrade(aTHX_ cat, symptr);
3040 lookahead.flags |= FLAG_DO_UTF8;
3041 lookahead.strbeg = symptr->strbeg;
3042 utf8 = 1;
3043 start = SvPVX(cat);
3044 cur = start + SvCUR(cat);
3045 end = start+SvLEN(cat)-UTF8_MAXLEN;
3046 goto W_utf8;
3047 }
3048 if (ckWARN(WARN_PACK))
3049 Perl_warner(aTHX_ packWARN(WARN_PACK),
3050 "Character in 'W' format wrapped in pack");
3051 auv &= 0xff;
3052 }
3053 if (cur >= end) {
3054 *cur = '\0';
b162af07 3055 SvCUR_set(cat, cur - start);
fc241834
RGS
3056 GROWING(0, cat, start, cur, len+1);
3057 end = start+SvLEN(cat)-1;
3058 }
fe2774ed 3059 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3060 }
3061 }
3062 break;
fc241834
RGS
3063 }
3064 case 'U': {
3065 char *end;
3066
3067 if (len == 0) {
3068 if (!(symptr->flags & FLAG_DO_UTF8)) {
3069 marked_upgrade(aTHX_ cat, symptr);
3070 lookahead.flags |= FLAG_DO_UTF8;
3071 lookahead.strbeg = symptr->strbeg;
3072 }
3073 utf8 = 0;
3074 goto no_change;
3075 }
3076
3077 end = start+SvLEN(cat);
3078 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3079 while (len-- > 0) {
fc241834 3080 UV auv;
a6ec74c1 3081 fromstr = NEXTFROM;
fc241834
RGS
3082 auv = SvUV(fromstr);
3083 if (utf8) {
230e1fce 3084 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3085 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3086 warn_utf8 ?
fc241834
RGS
3087 0 : UNICODE_ALLOW_ANY);
3088 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3089 *cur = '\0';
b162af07 3090 SvCUR_set(cat, cur - start);
fc241834
RGS
3091 GROWING(0, cat, start, cur,
3092 len+(endb-buffer)*UTF8_EXPAND);
3093 end = start+SvLEN(cat);
3094 }
3095 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3096 } else {
3097 if (cur >= end) {
3098 *cur = '\0';
b162af07 3099 SvCUR_set(cat, cur - start);
fc241834
RGS
3100 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3101 end = start+SvLEN(cat)-UTF8_MAXLEN;
3102 }
230e1fce 3103 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3104 warn_utf8 ?
230e1fce 3105 0 : UNICODE_ALLOW_ANY);
fc241834 3106 }
a6ec74c1 3107 }
a6ec74c1 3108 break;
fc241834 3109 }
a6ec74c1
JH
3110 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3111 case 'f':
a6ec74c1 3112 while (len-- > 0) {
f337b084
TH
3113 float afloat;
3114 NV anv;
a6ec74c1 3115 fromstr = NEXTFROM;
f337b084 3116 anv = SvNV(fromstr);
5cdb9e01 3117#ifdef __VOS__
f337b084 3118 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3119 during conversion from double to float into infinity, so we
3120 do it by hand. This code should either be generalized for
3121 any OS that needs it, or removed if and when VOS implements
3122 posix-976 (suggestion to support mapping to infinity).
3123 Paul.Green@stratus.com 02-04-02. */
f337b084 3124 if (anv > FLT_MAX)
fc241834 3125 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3126 else if (anv < -FLT_MAX)
fc241834 3127 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3128 else afloat = (float) anv;
3129#else /* __VOS__ */
baf3cf9c 3130# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3131 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3132 * on Alpha; fake it if we don't have them.
3133 */
f337b084 3134 if (anv > FLT_MAX)
fc241834 3135 afloat = FLT_MAX;
f337b084 3136 else if (anv < -FLT_MAX)
fc241834 3137 afloat = -FLT_MAX;
f337b084 3138 else afloat = (float)anv;
baf3cf9c 3139# else
f337b084 3140 afloat = (float)anv;
baf3cf9c 3141# endif
f337b084 3142#endif /* __VOS__ */
1109a392 3143 DO_BO_PACK_N(afloat, float);
f337b084 3144 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3145 }
3146 break;
3147 case 'd':
a6ec74c1 3148 while (len-- > 0) {
f337b084
TH
3149 double adouble;
3150 NV anv;
a6ec74c1 3151 fromstr = NEXTFROM;
f337b084 3152 anv = SvNV(fromstr);
5cdb9e01 3153#ifdef __VOS__
f337b084 3154 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3155 during conversion from long double to double into infinity,
3156 so we do it by hand. This code should either be generalized
3157 for any OS that needs it, or removed if and when VOS
3158 implements posix-976 (suggestion to support mapping to
3159 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3160 if (anv > DBL_MAX)
fc241834 3161 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3162 else if (anv < -DBL_MAX)
fc241834 3163 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3164 else adouble = (double) anv;
3165#else /* __VOS__ */
baf3cf9c 3166# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3167 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3168 * on Alpha; fake it if we don't have them.
3169 */
f337b084 3170 if (anv > DBL_MAX)
fc241834 3171 adouble = DBL_MAX;
f337b084 3172 else if (anv < -DBL_MAX)
fc241834 3173 adouble = -DBL_MAX;
f337b084 3174 else adouble = (double)anv;
baf3cf9c 3175# else
f337b084 3176 adouble = (double)anv;
baf3cf9c 3177# endif
f337b084 3178#endif /* __VOS__ */
1109a392 3179 DO_BO_PACK_N(adouble, double);
f337b084 3180 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3181 }
3182 break;
fc241834
RGS
3183 case 'F': {
3184 NV anv;
1109a392 3185 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3186 while (len-- > 0) {
3187 fromstr = NEXTFROM;
3188 anv = SvNV(fromstr);
1109a392 3189 DO_BO_PACK_N(anv, NV);
fc241834 3190 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3191 }
3192 break;
fc241834 3193 }
92d41999 3194#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3195 case 'D': {
3196 long double aldouble;
1109a392
MHM
3197 /* long doubles can have unused bits, which may be nonzero */
3198 Zero(&aldouble, 1, long double);
92d41999
JH
3199 while (len-- > 0) {
3200 fromstr = NEXTFROM;
3201 aldouble = (long double)SvNV(fromstr);
1109a392 3202 DO_BO_PACK_N(aldouble, long double);
fc241834 3203 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3204 }
3205 break;
fc241834 3206 }
92d41999 3207#endif
7212898e 3208#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3209 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3210#endif
a6ec74c1
JH
3211 case 'n':
3212 while (len-- > 0) {
f337b084 3213 I16 ai16;
a6ec74c1 3214 fromstr = NEXTFROM;
ef108786 3215 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3216#ifdef HAS_HTONS
ef108786 3217 ai16 = PerlSock_htons(ai16);
a6ec74c1 3218#endif
f337b084 3219 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3220 }
3221 break;
7212898e 3222#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3223 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3224#endif
a6ec74c1
JH
3225 case 'v':
3226 while (len-- > 0) {
f337b084 3227 I16 ai16;
a6ec74c1 3228 fromstr = NEXTFROM;
ef108786 3229 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3230#ifdef HAS_HTOVS
ef108786 3231 ai16 = htovs(ai16);
a6ec74c1 3232#endif
f337b084 3233 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3234 }
3235 break;
49704364 3236 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3237#if SHORTSIZE != SIZE16
fc241834 3238 while (len-- > 0) {
f337b084 3239 unsigned short aushort;
fc241834
RGS
3240 fromstr = NEXTFROM;
3241 aushort = SvUV(fromstr);
3242 DO_BO_PACK(aushort, s);
f337b084 3243 PUSH_VAR(utf8, cur, aushort);
fc241834 3244 }
49704364
WL
3245 break;
3246#else
3247 /* Fall through! */
a6ec74c1 3248#endif
49704364 3249 case 'S':
fc241834 3250 while (len-- > 0) {
f337b084 3251 U16 au16;
fc241834
RGS
3252 fromstr = NEXTFROM;
3253 au16 = (U16)SvUV(fromstr);
3254 DO_BO_PACK(au16, 16);
f337b084 3255 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3256 }
3257 break;
49704364 3258 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3259#if SHORTSIZE != SIZE16
fc241834 3260 while (len-- > 0) {
f337b084 3261 short ashort;
fc241834
RGS
3262 fromstr = NEXTFROM;
3263 ashort = SvIV(fromstr);
3264 DO_BO_PACK(ashort, s);
f337b084 3265 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3266 }
49704364
WL
3267 break;
3268#else
3269 /* Fall through! */
a6ec74c1 3270#endif
49704364
WL
3271 case 's':
3272 while (len-- > 0) {
f337b084 3273 I16 ai16;
49704364 3274 fromstr = NEXTFROM;
ef108786
MHM
3275 ai16 = (I16)SvIV(fromstr);
3276 DO_BO_PACK(ai16, 16);
f337b084 3277 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3278 }
3279 break;
3280 case 'I':
49704364 3281 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3282 while (len-- > 0) {
f337b084 3283 unsigned int auint;
a6ec74c1
JH
3284 fromstr = NEXTFROM;
3285 auint = SvUV(fromstr);
1109a392 3286 DO_BO_PACK(auint, i);
f337b084 3287 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3288 }
3289 break;
92d41999
JH
3290 case 'j':
3291 while (len-- > 0) {
f337b084 3292 IV aiv;
92d41999
JH
3293 fromstr = NEXTFROM;
3294 aiv = SvIV(fromstr);
1109a392
MHM
3295#if IVSIZE == INTSIZE
3296 DO_BO_PACK(aiv, i);
3297#elif IVSIZE == LONGSIZE
3298 DO_BO_PACK(aiv, l);
3299#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3300 DO_BO_PACK(aiv, 64);
f337b084
TH
3301#else
3302 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3303#endif
f337b084 3304 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3305 }
3306 break;
3307 case 'J':
3308 while (len-- > 0) {
f337b084 3309 UV auv;
92d41999
JH
3310 fromstr = NEXTFROM;
3311 auv = SvUV(fromstr);
1109a392
MHM
3312#if UVSIZE == INTSIZE
3313 DO_BO_PACK(auv, i);
3314#elif UVSIZE == LONGSIZE
3315 DO_BO_PACK(auv, l);
3316#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3317 DO_BO_PACK(auv, 64);
f337b084
TH
3318#else
3319 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3320#endif
f337b084 3321 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3322 }
3323 break;
a6ec74c1
JH
3324 case 'w':
3325 while (len-- > 0) {
f337b084 3326 NV anv;
a6ec74c1 3327 fromstr = NEXTFROM;
15e9f109 3328 anv = SvNV(fromstr);
a6ec74c1 3329
f337b084
TH
3330 if (anv < 0) {
3331 *cur = '\0';
b162af07 3332 SvCUR_set(cat, cur - start);
49704364 3333 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3334 }
a6ec74c1 3335
196b62db
NC
3336 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3337 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3338 any negative IVs will have already been got by the croak()
3339 above. IOK is untrue for fractions, so we test them
3340 against UV_MAX_P1. */
f337b084
TH
3341 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3342 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3343 char *in = buf + sizeof(buf);
196b62db 3344 UV auv = SvUV(fromstr);
a6ec74c1
JH
3345
3346 do {
eb160463 3347 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3348 auv >>= 7;
3349 } while (auv);
3350 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3351 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3352 in, (buf + sizeof(buf)) - in);
3353 } else if (SvPOKp(fromstr))
3354 goto w_string;
a6ec74c1 3355 else if (SvNOKp(fromstr)) {
0258719b
NC
3356 /* 10**NV_MAX_10_EXP is the largest power of 10
3357 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3358 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3359 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3360 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3361 Some C compilers are strict about integral constant
3362 expressions so we conservatively divide by a slightly
3363 smaller integer instead of multiplying by the exact
3364 floating-point value.
0258719b
NC
3365 */
3366#ifdef NV_MAX_10_EXP
f337b084 3367 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3368 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3369#else
f337b084 3370 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3371 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3372#endif
a6ec74c1
JH
3373 char *in = buf + sizeof(buf);
3374
8b6e33c7 3375 anv = Perl_floor(anv);
a6ec74c1 3376 do {
8b6e33c7 3377 const NV next = Perl_floor(anv / 128);
a6ec74c1 3378 if (in <= buf) /* this cannot happen ;-) */
49704364 3379 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3380 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3381 anv = next;
3382 } while (anv > 0);
a6ec74c1 3383 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3384 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3385 in, (buf + sizeof(buf)) - in);
3386 } else {
8b6e33c7
AL
3387 const char *from;
3388 char *result, *in;
735b914b
JH
3389 SV *norm;
3390 STRLEN len;
3391 bool done;
3392
f337b084 3393 w_string:
735b914b 3394 /* Copy string and check for compliance */
349d4f2f 3395 from = SvPV_const(fromstr, len);
735b914b 3396 if ((norm = is_an_int(from, len)) == NULL)
49704364 3397 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3398
a02a5408 3399 Newx(result, len, char);
735b914b
JH
3400 in = result + len;
3401 done = FALSE;
f337b084 3402 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3403 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3404 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3405 in, (result + len) - in);
735b914b
JH
3406 Safefree(result);
3407 SvREFCNT_dec(norm); /* free norm */
fc241834 3408 }
a6ec74c1
JH
3409 }
3410 break;
3411 case 'i':
49704364 3412 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3413 while (len-- > 0) {
f337b084 3414 int aint;
a6ec74c1
JH
3415 fromstr = NEXTFROM;
3416 aint = SvIV(fromstr);
1109a392 3417 DO_BO_PACK(aint, i);
f337b084 3418 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3419 }
3420 break;
7212898e 3421#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3422 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3423#endif
a6ec74c1
JH
3424 case 'N':
3425 while (len-- > 0) {
f337b084 3426 U32 au32;
a6ec74c1 3427 fromstr = NEXTFROM;
ef108786 3428 au32 = SvUV(fromstr);
a6ec74c1 3429#ifdef HAS_HTONL
ef108786 3430 au32 = PerlSock_htonl(au32);
a6ec74c1 3431#endif
f337b084 3432 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3433 }
3434 break;
7212898e 3435#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3436 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3437#endif
a6ec74c1
JH
3438 case 'V':
3439 while (len-- > 0) {
f337b084 3440 U32 au32;
a6ec74c1 3441 fromstr = NEXTFROM;
ef108786 3442 au32 = SvUV(fromstr);
a6ec74c1 3443#ifdef HAS_HTOVL
ef108786 3444 au32 = htovl(au32);
a6ec74c1 3445#endif
f337b084 3446 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3447 }
3448 break;
49704364 3449 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3450#if LONGSIZE != SIZE32
fc241834 3451 while (len-- > 0) {
f337b084 3452 unsigned long aulong;
fc241834
RGS
3453 fromstr = NEXTFROM;
3454 aulong = SvUV(fromstr);
3455 DO_BO_PACK(aulong, l);
f337b084 3456 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3457 }
49704364
WL
3458 break;
3459#else
3460 /* Fall though! */
a6ec74c1 3461#endif
49704364 3462 case 'L':
fc241834 3463 while (len-- > 0) {
f337b084 3464 U32 au32;
fc241834
RGS
3465 fromstr = NEXTFROM;
3466 au32 = SvUV(fromstr);
3467 DO_BO_PACK(au32, 32);
f337b084 3468 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3469 }
3470 break;
49704364 3471 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3472#if LONGSIZE != SIZE32
fc241834 3473 while (len-- > 0) {
f337b084 3474 long along;
fc241834
RGS
3475 fromstr = NEXTFROM;
3476 along = SvIV(fromstr);
3477 DO_BO_PACK(along, l);
f337b084 3478 PUSH_VAR(utf8, cur, along);
a6ec74c1 3479 }
49704364
WL
3480 break;
3481#else
3482 /* Fall though! */
a6ec74c1 3483#endif
49704364
WL
3484 case 'l':
3485 while (len-- > 0) {
f337b084 3486 I32 ai32;
49704364 3487 fromstr = NEXTFROM;
ef108786
MHM
3488 ai32 = SvIV(fromstr);
3489 DO_BO_PACK(ai32, 32);
f337b084 3490 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3491 }
3492 break;
3493#ifdef HAS_QUAD
3494 case 'Q':
3495 while (len-- > 0) {
f337b084 3496 Uquad_t auquad;
a6ec74c1 3497 fromstr = NEXTFROM;
f337b084 3498 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3499 DO_BO_PACK(auquad, 64);
f337b084 3500 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3501 }
3502 break;
3503 case 'q':
3504 while (len-- > 0) {
f337b084 3505 Quad_t aquad;
a6ec74c1
JH
3506 fromstr = NEXTFROM;
3507 aquad = (Quad_t)SvIV(fromstr);
1109a392 3508 DO_BO_PACK(aquad, 64);
f337b084 3509 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3510 }
3511 break;
f337b084 3512#endif /* HAS_QUAD */
a6ec74c1
JH
3513 case 'P':
3514 len = 1; /* assume SV is correct length */
f337b084 3515 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3516 /* Fall through! */
a6ec74c1
JH
3517 case 'p':
3518 while (len-- > 0) {
83003860 3519 const char *aptr;
f337b084 3520
a6ec74c1 3521 fromstr = NEXTFROM;
28a4f200
TH
3522 SvGETMAGIC(fromstr);
3523 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3524 else {
a6ec74c1
JH
3525 /* XXX better yet, could spirit away the string to
3526 * a safe spot and hang on to it until the result
3527 * of pack() (and all copies of the result) are
3528 * gone.
3529 */
041457d9
DM
3530 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3531 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3532 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3533 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3534 }
3535 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3536 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3537 else
2596d9fe 3538 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3539 }
07409e01 3540 DO_BO_PACK_PC(aptr);
f337b084 3541 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3542 }
3543 break;
fc241834 3544 case 'u': {
f7fe979e 3545 const char *aptr, *aend;
fc241834 3546 bool from_utf8;
f337b084 3547
a6ec74c1 3548 fromstr = NEXTFROM;
fc241834
RGS
3549 if (len <= 2) len = 45;
3550 else len = len / 3 * 3;
3551 if (len >= 64) {
3552 Perl_warner(aTHX_ packWARN(WARN_PACK),
3553 "Field too wide in 'u' format in pack");
3554 len = 63;
3555 }
83003860 3556 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3557 from_utf8 = DO_UTF8(fromstr);
3558 if (from_utf8) {
3559 aend = aptr + fromlen;
3560 fromlen = sv_len_utf8(fromstr);
3561 } else aend = NULL; /* Unused, but keep compilers happy */
3562 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3563 while (fromlen > 0) {
fc241834 3564 U8 *end;
a6ec74c1 3565 I32 todo;
fc241834 3566 U8 hunk[1+63/3*4+1];
a6ec74c1 3567
eb160463 3568 if ((I32)fromlen > len)
a6ec74c1
JH
3569 todo = len;
3570 else
3571 todo = fromlen;
fc241834
RGS
3572 if (from_utf8) {
3573 char buffer[64];
3574 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3575 'u' | TYPE_IS_PACK)) {
3576 *cur = '\0';
b162af07 3577 SvCUR_set(cat, cur - start);
fc241834
RGS
3578 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3579 }
3580 end = doencodes(hunk, buffer, todo);
3581 } else {
3582 end = doencodes(hunk, aptr, todo);
3583 aptr += todo;
3584 }
3585 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3586 fromlen -= todo;
3587 }
a6ec74c1
JH
3588 break;
3589 }
f337b084
TH
3590 }
3591 *cur = '\0';
b162af07 3592 SvCUR_set(cat, cur - start);
f337b084 3593 no_change:
49704364 3594 *symptr = lookahead;
a6ec74c1 3595 }
49704364 3596 return beglist;
18529408
IZ
3597}
3598#undef NEXTFROM
3599
3600
3601PP(pp_pack)
3602{
3603 dSP; dMARK; dORIGMARK; dTARGET;
3604 register SV *cat = TARG;
3605 STRLEN fromlen;
349d4f2f
NC
3606 SV *pat_sv = *++MARK;
3607 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3608 register const char *patend = pat + fromlen;
18529408
IZ
3609
3610 MARK++;
3611 sv_setpvn(cat, "", 0);
f337b084 3612 SvUTF8_off(cat);
18529408 3613
7accc089 3614 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3615
a6ec74c1
JH
3616 SvSETMAGIC(cat);
3617 SP = ORIGMARK;
3618 PUSHs(cat);
3619 RETURN;
3620}
a6ec74c1 3621
73cb7263
NC
3622/*
3623 * Local variables:
3624 * c-indentation-style: bsd
3625 * c-basic-offset: 4
3626 * indent-tabs-mode: t
3627 * End:
3628 *
37442d52
RGS
3629 * ex: set ts=8 sts=4 sw=4 noet:
3630 */