This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Correct the paramter to Perl_op_xmldump(). The one that got away from
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
663f364b 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
a6ec74c1
JH
29#include "EXTERN.h"
30#define PERL_IN_PP_PACK_C
31#include "perl.h"
32
f7fe979e
AL
33/* Types used by pack/unpack */
34typedef enum {
35 e_no_len, /* no length */
36 e_number, /* number, [] */
37 e_star /* asterisk */
38} howlen_t;
39
40typedef struct tempsym {
41 const char* patptr; /* current template char */
42 const char* patend; /* one after last char */
43 const char* grpbeg; /* 1st char of ()-group */
44 const char* grpend; /* end of ()-group */
45 I32 code; /* template code (!<>) */
46 I32 length; /* length/repeat count */
47 howlen_t howlen; /* how length is given */
48 int level; /* () nesting level */
49 U32 flags; /* /=4, comma=2, pack=1 */
50 /* and group modifiers */
51 STRLEN strbeg; /* offset of group start */
52 struct tempsym *previous; /* previous group */
53} tempsym_t;
54
55#define TEMPSYM_INIT(symptr, p, e, f) \
56 STMT_START { \
57 (symptr)->patptr = (p); \
58 (symptr)->patend = (e); \
59 (symptr)->grpbeg = NULL; \
60 (symptr)->grpend = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->code = 0; \
63 (symptr)->length = 0; \
10edeb5d 64 (symptr)->howlen = e_no_len; \
f7fe979e
AL
65 (symptr)->level = 0; \
66 (symptr)->flags = (f); \
67 (symptr)->strbeg = 0; \
68 (symptr)->previous = NULL; \
69 } STMT_END
70
7212898e 71#if PERL_VERSION >= 9
f337b084
TH
72# define PERL_PACK_CAN_BYTEORDER
73# define PERL_PACK_CAN_SHRIEKSIGN
74#endif
75
76#ifndef CHAR_BIT
77# define CHAR_BIT 8
7212898e 78#endif
3473cf63
RGS
79/* Maximum number of bytes to which a byte can grow due to upgrade */
80#define UTF8_EXPAND 2
7212898e 81
a6ec74c1 82/*
a6ec74c1
JH
83 * Offset for integer pack/unpack.
84 *
85 * On architectures where I16 and I32 aren't really 16 and 32 bits,
86 * which for now are all Crays, pack and unpack have to play games.
87 */
88
89/*
90 * These values are required for portability of pack() output.
91 * If they're not right on your machine, then pack() and unpack()
92 * wouldn't work right anyway; you'll need to apply the Cray hack.
93 * (I'd like to check them with #if, but you can't use sizeof() in
94 * the preprocessor.) --???
95 */
96/*
97 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
98 defines are now in config.h. --Andy Dougherty April 1998
99 */
100#define SIZE16 2
101#define SIZE32 4
102
103/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
104 --jhi Feb 1999 */
105
1109a392
MHM
106#if U16SIZE > SIZE16 || U32SIZE > SIZE32
107# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
108# define OFF16(p) ((char*)(p))
109# define OFF32(p) ((char*)(p))
a6ec74c1 110# else
1109a392 111# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
112# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
113# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
114# else
08ca2aa3 115 ++++ bad cray byte order
a6ec74c1
JH
116# endif
117# endif
a6ec74c1 118#else
08ca2aa3
TH
119# define OFF16(p) ((char *) (p))
120# define OFF32(p) ((char *) (p))
a6ec74c1
JH
121#endif
122
f337b084
TH
123/* Only to be used inside a loop (see the break) */
124#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
125 if (utf8) { \
126 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
127 } else { \
128 Copy(s, OFF16(p), SIZE16, char); \
129 (s) += SIZE16; \
130 } \
131} STMT_END
132
133/* Only to be used inside a loop (see the break) */
134#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
135 if (utf8) { \
136 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
137 } else { \
138 Copy(s, OFF32(p), SIZE32, char); \
139 (s) += SIZE32; \
140 } \
141} STMT_END
142
143#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
144#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
145
146/* Only to be used inside a loop (see the break) */
f337b084 147#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
148STMT_START { \
149 if (utf8) { \
f337b084
TH
150 if (!uni_to_bytes(aTHX_ &s, strend, \
151 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
152 } else { \
153 Copy(s, (char *) &var, sizeof(var), char); \
154 s += sizeof(var); \
155 } \
08ca2aa3
TH
156} STMT_END
157
f337b084 158#define PUSH_VAR(utf8, aptr, var) \
230e1fce 159 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 160
49704364
LW
161/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
162#define MAX_SUB_TEMPLATE_LEVEL 100
163
66c611c5 164/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
165#define FLAG_WAS_UTF8 0x40
166#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 167#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 168#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
169#define FLAG_SLASH 0x04
170#define FLAG_COMMA 0x02
171#define FLAG_PACK 0x01
172
a6ec74c1
JH
173STATIC SV *
174S_mul128(pTHX_ SV *sv, U8 m)
175{
176 STRLEN len;
177 char *s = SvPV(sv, len);
178 char *t;
a6ec74c1
JH
179
180 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 181 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
182
183 sv_catsv(tmpNew, sv);
184 SvREFCNT_dec(sv); /* free old sv */
185 sv = tmpNew;
186 s = SvPV(sv, len);
187 }
188 t = s + len - 1;
189 while (!*t) /* trailing '\0'? */
190 t--;
191 while (t > s) {
f7fe979e 192 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
193 *(t--) = '0' + (char)(i % 10);
194 m = (char)(i / 10);
a6ec74c1
JH
195 }
196 return (sv);
197}
198
199/* Explosives and implosives. */
200
201#if 'I' == 73 && 'J' == 74
202/* On an ASCII/ISO kind of system */
203#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
204#else
205/*
206 Some other sort of character set - use memchr() so we don't match
207 the null byte.
208 */
209#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
210#endif
211
66c611c5 212/* type modifiers */
62f95557 213#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
214#define TYPE_IS_BIG_ENDIAN 0x200
215#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 216#define TYPE_IS_PACK 0x800
1109a392 217#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 218#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
219#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
220
7212898e 221#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 222# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 223#else
28be1210 224# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
225#endif
226
227#ifndef PERL_PACK_CAN_BYTEORDER
228/* Put "can't" first because it is shorter */
229# define TYPE_ENDIANNESS(t) 0
230# define TYPE_NO_ENDIANNESS(t) (t)
231
232# define ENDIANNESS_ALLOWED_TYPES ""
233
234# define DO_BO_UNPACK(var, type)
235# define DO_BO_PACK(var, type)
07409e01
NC
236# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
237# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
238# define DO_BO_UNPACK_N(var, type)
239# define DO_BO_PACK_N(var, type)
240# define DO_BO_UNPACK_P(var)
241# define DO_BO_PACK_P(var)
45f723e1
NC
242# define DO_BO_UNPACK_PC(var)
243# define DO_BO_PACK_PC(var)
66c611c5 244
f337b084 245#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
246
247# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
248# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
249
250# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
251
252# define DO_BO_UNPACK(var, type) \
1109a392 253 STMT_START { \
66c611c5 254 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
255 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
256 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
257 default: break; \
258 } \
259 } STMT_END
260
7212898e 261# define DO_BO_PACK(var, type) \
1109a392 262 STMT_START { \
66c611c5 263 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
264 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
265 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
266 default: break; \
267 } \
268 } STMT_END
269
07409e01 270# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 271 STMT_START { \
66c611c5 272 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 273 case TYPE_IS_BIG_ENDIAN: \
07409e01 274 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
275 break; \
276 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 277 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
278 break; \
279 default: \
280 break; \
281 } \
282 } STMT_END
283
07409e01 284# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 285 STMT_START { \
66c611c5 286 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 287 case TYPE_IS_BIG_ENDIAN: \
07409e01 288 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
289 break; \
290 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 291 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
292 break; \
293 default: \
294 break; \
295 } \
296 } STMT_END
297
7212898e 298# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
299 STMT_START { \
300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: \
302 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
303 "platform", #action, #type); \
304 break; \
305 case TYPE_IS_LITTLE_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 default: \
310 break; \
311 } \
312 } STMT_END
313
7212898e 314# if PTRSIZE == INTSIZE
07409e01
NC
315# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
316# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
317# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
318# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 319# elif PTRSIZE == LONGSIZE
07409e01
NC
320# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
321# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
322# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
323# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
324# elif PTRSIZE == IVSIZE
325# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
326# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
327# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
328# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
329# else
330# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
331# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
332# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
333# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 334# endif
1109a392 335
7212898e 336# if defined(my_htolen) && defined(my_letohn) && \
1109a392 337 defined(my_htoben) && defined(my_betohn)
7212898e 338# define DO_BO_UNPACK_N(var, type) \
1109a392 339 STMT_START { \
66c611c5 340 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
341 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
342 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
343 default: break; \
344 } \
345 } STMT_END
346
7212898e 347# define DO_BO_PACK_N(var, type) \
1109a392 348 STMT_START { \
66c611c5 349 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
350 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
351 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
352 default: break; \
353 } \
354 } STMT_END
7212898e
NC
355# else
356# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
357# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
358# endif
359
f337b084 360#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 361
78d46eaa 362#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 363#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
364#define PACK_SIZE_MASK 0x3F
365
78d46eaa
NC
366/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
367 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
368
369#if TYPE_IS_SHRIEKING != 0x100
370 ++++shriek offset should be 256
371#endif
372
373typedef U8 packprops_t;
78d46eaa
NC
374#if 'J'-'I' == 1
375/* ASCII */
f43a9a31 376STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
377 /* normal */
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0,
1651fc44 383 /* C */ sizeof(unsigned char),
80a13697 384#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 385 /* D */ LONG_DOUBLESIZE,
80a13697 386#else
a7a3cfaa 387 0,
80a13697 388#endif
a7a3cfaa
TH
389 0,
390 /* F */ NVSIZE,
391 0, 0,
392 /* I */ sizeof(unsigned int),
393 /* J */ UVSIZE,
394 0,
395 /* L */ SIZE32,
396 0,
397 /* N */ SIZE32,
398 0, 0,
80a13697 399#if defined(HAS_QUAD)
a7a3cfaa 400 /* Q */ sizeof(Uquad_t),
80a13697 401#else
a7a3cfaa 402 0,
80a13697 403#endif
a7a3cfaa
TH
404 0,
405 /* S */ SIZE16,
406 0,
407 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
408 /* V */ SIZE32,
409 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 /* c */ sizeof(char),
412 /* d */ sizeof(double),
413 0,
414 /* f */ sizeof(float),
415 0, 0,
416 /* i */ sizeof(int),
417 /* j */ IVSIZE,
418 0,
419 /* l */ SIZE32,
420 0,
421 /* n */ SIZE16,
422 0,
423 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 424#if defined(HAS_QUAD)
a7a3cfaa 425 /* q */ sizeof(Quad_t),
80a13697 426#else
a7a3cfaa 427 0,
80a13697 428#endif
a7a3cfaa
TH
429 0,
430 /* s */ SIZE16,
431 0, 0,
432 /* v */ SIZE16,
433 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0,
443 /* shrieking */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 /* I */ sizeof(unsigned int),
450 0, 0,
451 /* L */ sizeof(unsigned long),
452 0,
7212898e 453#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 454 /* N */ SIZE32,
7212898e 455#else
a7a3cfaa 456 0,
7212898e 457#endif
a7a3cfaa
TH
458 0, 0, 0, 0,
459 /* S */ sizeof(unsigned short),
460 0, 0,
7212898e 461#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 462 /* V */ SIZE32,
7212898e 463#else
a7a3cfaa 464 0,
7212898e 465#endif
a7a3cfaa
TH
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0,
468 /* i */ sizeof(int),
469 0, 0,
470 /* l */ sizeof(long),
471 0,
7212898e 472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 473 /* n */ SIZE16,
7212898e 474#else
a7a3cfaa 475 0,
7212898e 476#endif
a7a3cfaa
TH
477 0, 0, 0, 0,
478 /* s */ sizeof(short),
479 0, 0,
7212898e 480#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 481 /* v */ SIZE16,
7212898e 482#else
a7a3cfaa 483 0,
7212898e 484#endif
a7a3cfaa
TH
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
494};
495#else
496/* EBCDIC (or bust) */
f43a9a31 497STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
498 /* normal */
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0,
508 /* c */ sizeof(char),
509 /* d */ sizeof(double),
510 0,
511 /* f */ sizeof(float),
512 0, 0,
513 /* i */ sizeof(int),
514 0, 0, 0, 0, 0, 0, 0,
515 /* j */ IVSIZE,
516 0,
517 /* l */ SIZE32,
518 0,
519 /* n */ SIZE16,
520 0,
521 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 522#if defined(HAS_QUAD)
a7a3cfaa 523 /* q */ sizeof(Quad_t),
80a13697 524#else
a7a3cfaa 525 0,
80a13697 526#endif
a7a3cfaa
TH
527 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 /* s */ SIZE16,
529 0, 0,
530 /* v */ SIZE16,
531 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 534 /* C */ sizeof(unsigned char),
80a13697 535#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 536 /* D */ LONG_DOUBLESIZE,
80a13697 537#else
a7a3cfaa 538 0,
80a13697 539#endif
a7a3cfaa
TH
540 0,
541 /* F */ NVSIZE,
542 0, 0,
543 /* I */ sizeof(unsigned int),
544 0, 0, 0, 0, 0, 0, 0,
545 /* J */ UVSIZE,
546 0,
547 /* L */ SIZE32,
548 0,
549 /* N */ SIZE32,
550 0, 0,
80a13697 551#if defined(HAS_QUAD)
a7a3cfaa 552 /* Q */ sizeof(Uquad_t),
80a13697 553#else
a7a3cfaa 554 0,
80a13697 555#endif
a7a3cfaa
TH
556 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 /* S */ SIZE16,
558 0,
559 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
560 /* V */ SIZE32,
561 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 /* shrieking */
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 /* i */ sizeof(int),
575 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* l */ sizeof(long),
577 0,
7212898e 578#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 579 /* n */ SIZE16,
7212898e 580#else
a7a3cfaa 581 0,
7212898e 582#endif
a7a3cfaa
TH
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 /* s */ sizeof(short),
585 0, 0,
7212898e 586#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 587 /* v */ SIZE16,
7212898e 588#else
a7a3cfaa 589 0,
7212898e 590#endif
a7a3cfaa
TH
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0,
594 /* I */ sizeof(unsigned int),
595 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* L */ sizeof(unsigned long),
597 0,
7212898e 598#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 599 /* N */ SIZE32,
7212898e 600#else
a7a3cfaa 601 0,
7212898e 602#endif
a7a3cfaa
TH
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 /* S */ sizeof(unsigned short),
605 0, 0,
7212898e 606#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 607 /* V */ SIZE32,
7212898e 608#else
a7a3cfaa 609 0,
7212898e 610#endif
a7a3cfaa
TH
611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
613};
614#endif
615
08ca2aa3 616STATIC U8
f7fe979e 617uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 618{
08ca2aa3 619 STRLEN retlen;
0bcc34c2 620 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 621 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
622 /* We try to process malformed UTF-8 as much as possible (preferrably with
623 warnings), but these two mean we make no progress in the string and
624 might enter an infinite loop */
625 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
626 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
627 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 628 if (val >= 0x100) {
f337b084 629 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
630 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
631 "Character in '%c' format wrapped in unpack",
f337b084 632 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
633 val &= 0xff;
634 }
635 *s += retlen;
fe2774ed 636 return (U8)val;
08ca2aa3
TH
637}
638
f337b084
TH
639#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
640 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
641 *(U8 *)(s)++)
642
643STATIC bool
f7fe979e 644uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
645{
646 UV val;
647 STRLEN retlen;
f7fe979e 648 const char *from = *s;
08ca2aa3 649 int bad = 0;
f7fe979e 650 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
651 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
652 for (;buf_len > 0; buf_len--) {
653 if (from >= end) return FALSE;
f337b084 654 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
655 if (retlen == (STRLEN) -1 || retlen == 0) {
656 from += UTF8SKIP(from);
657 bad |= 1;
658 } else from += retlen;
659 if (val >= 0x100) {
660 bad |= 2;
661 val &= 0xff;
662 }
fe2774ed 663 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
664 }
665 /* We have enough characters for the buffer. Did we have problems ? */
666 if (bad) {
667 if (bad & 1) {
668 /* Rewalk the string fragment while warning */
f7fe979e 669 const char *ptr;
9e27e96a 670 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
671 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
672 if (ptr >= end) break;
f337b084 673 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
674 }
675 if (from > end) from = end;
676 }
677 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 678 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
679 WARN_PACK : WARN_UNPACK),
680 "Character(s) in '%c' format wrapped in %s",
fc241834 681 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 682 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
683 }
684 *s = from;
685 return TRUE;
686}
687
688STATIC bool
f7fe979e 689next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 690{
97aff369 691 dVAR;
08ca2aa3 692 STRLEN retlen;
0bcc34c2 693 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
694 if (val >= 0x100 || !ISUUCHAR(val) ||
695 retlen == (STRLEN) -1 || retlen == 0) {
696 *out = 0;
697 return FALSE;
698 }
699 *out = PL_uudmap[val] & 077;
f337b084 700 *s += retlen;
08ca2aa3
TH
701 return TRUE;
702}
78d46eaa 703
64844641 704STATIC char *
14333449 705S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 706 const U8 * const end = start + len;
64844641 707
f337b084 708 while (start < end) {
48fa4626
ST
709 const UV uv = NATIVE_TO_ASCII(*start);
710 if (UNI_IS_INVARIANT(uv))
711 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
712 else {
713 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
715 }
716 start++;
717 }
64844641 718 return dest;
f337b084
TH
719}
720
230e1fce
NC
721#define PUSH_BYTES(utf8, cur, buf, len) \
722STMT_START { \
64844641
AL
723 if (utf8) \
724 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
725 else { \
726 Copy(buf, cur, len, char); \
727 (cur) += (len); \
728 } \
f337b084
TH
729} STMT_END
730
731#define GROWING(utf8, cat, start, cur, in_len) \
732STMT_START { \
733 STRLEN glen = (in_len); \
3473cf63 734 if (utf8) glen *= UTF8_EXPAND; \
f337b084 735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 736 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
737 (cur) = (start) + SvCUR(cat); \
738 } \
739} STMT_END
740
741#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742STMT_START { \
f7fe979e 743 const STRLEN glen = (in_len); \
f337b084 744 STRLEN gl = glen; \
3473cf63 745 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
747 *cur = '\0'; \
b162af07 748 SvCUR_set((cat), (cur) - (start)); \
0bd48802 749 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
750 (cur) = (start) + SvCUR(cat); \
751 } \
752 PUSH_BYTES(utf8, cur, buf, glen); \
753} STMT_END
754
755#define PUSH_BYTE(utf8, s, byte) \
756STMT_START { \
757 if (utf8) { \
f7fe979e 758 const U8 au8 = (byte); \
64844641 759 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
760 } else *(U8 *)(s)++ = (byte); \
761} STMT_END
762
763/* Only to be used inside a loop (see the break) */
764#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
765STMT_START { \
766 STRLEN retlen; \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
770 *cur = '\0'; \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
772 } \
773 str += retlen; \
774} STMT_END
775
f7fe979e
AL
776static const char *_action( const tempsym_t* symptr )
777{
10edeb5d 778 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
779}
780
206947d2 781/* Returns the sizeof() struct described by pat */
028d1f6d 782STATIC I32
f337b084 783S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 784{
f337b084 785 I32 total = 0;
206947d2 786
49704364 787 while (next_symbol(symptr)) {
f337b084 788 I32 len;
f7fe979e 789 int size;
f337b084
TH
790
791 switch (symptr->howlen) {
fc241834 792 case e_star:
49704364 793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 794 _action( symptr ) );
49704364 795 break;
f337b084
TH
796 default:
797 /* e_no_len and e_number */
798 len = symptr->length;
799 break;
49704364
LW
800 }
801
a7a3cfaa 802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 803 if (!size) {
f7fe979e 804 int star;
80a13697
NC
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 default:
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 810 _action( symptr ) );
28be1210
TH
811#ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
814#endif
80a13697 815 case '@':
28be1210 816 case '.':
80a13697
NC
817 case '/':
818 case 'U': /* XXXX Is it correct? */
819 case 'w':
820 case 'u':
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 822 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 823 _action( symptr ) );
80a13697
NC
824 case '%':
825 size = 0;
826 break;
827 case '(':
fc241834
RGS
828 {
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
837 *symptr = savsym;
838 break;
839 }
80a13697
NC
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
842 */
843 if (!len) /* Avoid division by 0 */
844 len = 1;
845 len = total % len; /* Assumed: the start is aligned. */
846 /* FALL THROUGH */
847 case 'X':
848 size = -1;
849 if (total < len)
f7fe979e 850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
851 break;
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
854 len = 1;
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
857 len = len - star;
858 else
859 len = 0;
860 /* FALL THROUGH */
861 case 'x':
862 case 'A':
863 case 'Z':
864 case 'a':
80a13697
NC
865 size = 1;
866 break;
867 case 'B':
868 case 'b':
869 len = (len + 7)/8;
870 size = 1;
871 break;
872 case 'H':
873 case 'h':
874 len = (len + 1)/2;
875 size = 1;
876 break;
78d46eaa 877
80a13697
NC
878 case 'P':
879 len = 1;
880 size = sizeof(char*);
78d46eaa
NC
881 break;
882 }
206947d2
IZ
883 }
884 total += len * size;
885 }
886 return total;
887}
888
49704364
LW
889
890/* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
892 */
f7fe979e
AL
893STATIC const char *
894S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 895{
49704364 896 while (patptr < patend) {
f7fe979e 897 const char c = *patptr++;
49704364
LW
898
899 if (isSPACE(c))
900 continue;
901 else if (c == ender)
902 return patptr-1;
903 else if (c == '#') {
904 while (patptr < patend && *patptr != '\n')
905 patptr++;
906 continue;
907 } else if (c == '(')
908 patptr = group_end(patptr, patend, ')') + 1;
909 else if (c == '[')
910 patptr = group_end(patptr, patend, ']') + 1;
18529408 911 }
49704364
LW
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
913 ender);
914 return 0;
18529408
IZ
915}
916
49704364
LW
917
918/* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
fc241834 921 */
f7fe979e
AL
922STATIC const char *
923S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
LW
924{
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
930 }
931 *lenptr = len;
932 return patptr;
933}
934
935/* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
937 */
938STATIC bool
f337b084 939S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 940{
f7fe979e 941 const char* patptr = symptr->patptr;
0bcc34c2 942 const char* const patend = symptr->patend;
49704364
LW
943
944 symptr->flags &= ~FLAG_SLASH;
945
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
948 patptr++;
949 else if (*patptr == '#') {
950 patptr++;
951 while (patptr < patend && *patptr != '\n')
952 patptr++;
953 if (patptr < patend)
954 patptr++;
955 } else {
fc241834 956 /* We should have found a template code */
49704364 957 I32 code = *patptr++ & 0xFF;
66c611c5 958 U32 inherited_modifiers = 0;
49704364
LW
959
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 964 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
965 }
966 continue;
967 }
fc241834 968
49704364 969 /* for '(', skip to ')' */
fc241834 970 if (code == '(') {
49704364
LW
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 973 _action( symptr ) );
49704364
LW
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 978 _action( symptr ) );
49704364
LW
979 }
980
66c611c5
MHM
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985 }
986
1109a392
MHM
987 /* look for modifiers */
988 while (patptr < patend) {
b7787f18
AL
989 const char *allowed;
990 I32 modifier;
1109a392
MHM
991 switch (*patptr) {
992 case '!':
993 modifier = TYPE_IS_SHRIEKING;
7212898e 994 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 995 break;
7212898e 996#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
997 case '>':
998 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 999 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1000 break;
1001 case '<':
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1004 break;
f337b084 1005#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1006 default:
b7787f18
AL
1007 allowed = "";
1008 modifier = 0;
1109a392
MHM
1009 break;
1010 }
66c611c5 1011
1109a392
MHM
1012 if (modifier == 0)
1013 break;
66c611c5 1014
1109a392
MHM
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1017 allowed, _action( symptr ) );
66c611c5
MHM
1018
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1025 *patptr, _action( symptr ) );
66c611c5 1026
041457d9 1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1031 _action( symptr ) );
1109a392 1032 }
66c611c5 1033
1109a392
MHM
1034 code |= modifier;
1035 patptr++;
49704364
LW
1036 }
1037
66c611c5
MHM
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1040
fc241834 1041 /* look for count and/or / */
49704364
LW
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1046
1047 } else if (*patptr == '*') {
1048 patptr++;
1049 symptr->howlen = e_star;
1050
1051 } else if (*patptr == '[') {
f7fe979e 1052 const char* lenptr = ++patptr;
49704364
LW
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1060 _action( symptr ) );
49704364
LW
1061 } else {
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1066 *symptr = savsym;
1067 }
1068 } else {
1069 symptr->howlen = e_no_len;
1070 symptr->length = 1;
1071 }
1072
1073 /* try to find / */
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1076 patptr++;
1077 else if (*patptr == '#') {
1078 patptr++;
1079 while (patptr < patend && *patptr != '\n')
1080 patptr++;
1081 if (patptr < patend)
1082 patptr++;
1083 } else {
66c611c5 1084 if (*patptr == '/') {
49704364
LW
1085 symptr->flags |= FLAG_SLASH;
1086 patptr++;
66c611c5
MHM
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1090 _action( symptr ) );
49704364
LW
1091 }
1092 break;
1093 }
18529408 1094 }
49704364
LW
1095 } else {
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1098 symptr->length = 1;
1099 }
1100
1101 symptr->code = code;
fc241834 1102 symptr->patptr = patptr;
49704364 1103 return TRUE;
18529408 1104 }
49704364 1105 }
fc241834 1106 symptr->patptr = patptr;
49704364 1107 return FALSE;
18529408
IZ
1108}
1109
18529408 1110/*
fc241834 1111 There is no way to cleanly handle the case where we should process the
08ca2aa3 1112 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1117 themselves if they need to do a lot of unpacks like this on it
1118*/
fc241834 1119STATIC bool
08ca2aa3
TH
1120need_utf8(const char *pat, const char *patend)
1121{
1122 bool first = TRUE;
1123 while (pat < patend) {
1124 if (pat[0] == '#') {
1125 pat++;
f7fe979e 1126 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1127 if (!pat) return FALSE;
1128 } else if (pat[0] == 'U') {
1129 if (first || pat[1] == '0') return TRUE;
1130 } else first = FALSE;
1131 pat++;
1132 }
1133 return FALSE;
1134}
1135
1136STATIC char
1137first_symbol(const char *pat, const char *patend) {
1138 while (pat < patend) {
1139 if (pat[0] != '#') return pat[0];
1140 pat++;
f7fe979e 1141 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1142 if (!pat) return 0;
1143 pat++;
1144 }
1145 return 0;
1146}
1147
1148/*
7accc089
JH
1149=for apidoc unpackstring
1150
608d3aed
LW
1151The engine implementing unpack() Perl function. C<unpackstring> puts the
1152extracted list items on the stack and returns the number of elements.
1153Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1154
1155=cut */
1156
1157I32
f7fe979e 1158Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1159{
f7fe979e 1160 tempsym_t sym;
08ca2aa3 1161
f337b084 1162 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1163 else if (need_utf8(pat, patend)) {
1164 /* We probably should try to avoid this in case a scalar context call
1165 wouldn't get to the "U0" */
1166 STRLEN len = strend - s;
230e1fce 1167 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1168 SAVEFREEPV(s);
1169 strend = s + len;
f337b084 1170 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1171 }
1172
f337b084
TH
1173 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1174 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1175
f7fe979e 1176 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1177
1178 return unpack_rec(&sym, s, s, strend, NULL );
1179}
1180
4136a0f7 1181STATIC I32
f7fe979e 1182S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1183{
27da23d5 1184 dVAR; dSP;
18529408 1185 SV *sv;
f7fe979e 1186 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1187 howlen_t howlen;
a6ec74c1 1188
a6ec74c1 1189 I32 checksum = 0;
92d41999 1190 UV cuv = 0;
a6ec74c1 1191 NV cdouble = 0.0;
f337b084 1192 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1193 bool beyond = FALSE;
21c16052 1194 bool explicit_length;
9e27e96a 1195 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1196 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1197 symptr->strbeg = s - strbeg;
49704364 1198
49704364 1199 while (next_symbol(symptr)) {
a7a3cfaa 1200 packprops_t props;
9e27e96a 1201 I32 len;
f337b084 1202 I32 datumtype = symptr->code;
206947d2 1203 /* do first one only unless in list context
08ca2aa3 1204 / is implemented by unpacking the count, then popping it from the
206947d2 1205 stack, so must check that we're not in the middle of a / */
49704364 1206 if ( unpack_only_one
206947d2 1207 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1208 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1209 break;
49704364 1210
f337b084 1211 switch (howlen = symptr->howlen) {
fc241834
RGS
1212 case e_star:
1213 len = strend - strbeg; /* long enough */
49704364 1214 break;
f337b084
TH
1215 default:
1216 /* e_no_len and e_number */
1217 len = symptr->length;
1218 break;
49704364 1219 }
18529408 1220
21c16052 1221 explicit_length = TRUE;
a6ec74c1 1222 redo_switch:
49704364 1223 beyond = s >= strend;
a7a3cfaa
TH
1224
1225 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1226 if (props) {
1227 /* props nonzero means we can process this letter. */
9e27e96a
AL
1228 const long size = props & PACK_SIZE_MASK;
1229 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1230 if (len > howmany)
1231 len = howmany;
1232
1233 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1234 if (len && unpack_only_one) len = 1;
1235 EXTEND(SP, len);
1236 EXTEND_MORTAL(len);
78d46eaa
NC
1237 }
1238 }
a7a3cfaa 1239
1109a392 1240 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1241 default:
1109a392 1242 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1243
a6ec74c1 1244 case '%':
49704364 1245 if (howlen == e_no_len)
18529408 1246 len = 16; /* len is not specified */
a6ec74c1 1247 checksum = len;
92d41999 1248 cuv = 0;
a6ec74c1 1249 cdouble = 0;
18529408 1250 continue;
a6ec74c1 1251 break;
18529408
IZ
1252 case '(':
1253 {
49704364 1254 tempsym_t savsym = *symptr;
9e27e96a 1255 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1256 symptr->flags |= group_modifiers;
49704364 1257 symptr->patend = savsym.grpend;
28be1210 1258 symptr->previous = &savsym;
49704364 1259 symptr->level++;
18529408 1260 PUTBACK;
c6f750d1 1261 if (len && unpack_only_one) len = 1;
18529408 1262 while (len--) {
49704364 1263 symptr->patptr = savsym.grpbeg;
f337b084
TH
1264 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1265 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1266 unpack_rec(symptr, s, strbeg, strend, &s);
1267 if (s == strend && savsym.howlen == e_star)
49704364 1268 break; /* No way to continue */
18529408
IZ
1269 }
1270 SPAGAIN;
28be1210 1271 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1272 *symptr = savsym;
18529408
IZ
1273 break;
1274 }
28be1210
TH
1275#ifdef PERL_PACK_CAN_SHRIEKSIGN
1276 case '.' | TYPE_IS_SHRIEKING:
1277#endif
1278 case '.': {
9e27e96a 1279 const char *from;
28be1210
TH
1280 SV *sv;
1281#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1282 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1283#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1284 const bool u8 = utf8;
28be1210
TH
1285#endif
1286 if (howlen == e_star) from = strbeg;
1287 else if (len <= 0) from = s;
1288 else {
1289 tempsym_t *group = symptr;
1290
1291 while (--len && group) group = group->previous;
1292 from = group ? strbeg + group->strbeg : strbeg;
1293 }
1294 sv = from <= s ?
00646304
CB
1295 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1296 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1297 mXPUSHs(sv);
28be1210
TH
1298 break;
1299 }
1300#ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 case '@' | TYPE_IS_SHRIEKING:
1302#endif
a6ec74c1 1303 case '@':
28be1210
TH
1304 s = strbeg + symptr->strbeg;
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1307#else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 if (utf8)
1309#endif
1310 {
08ca2aa3
TH
1311 while (len > 0) {
1312 if (s >= strend)
1313 Perl_croak(aTHX_ "'@' outside of string in unpack");
1314 s += UTF8SKIP(s);
1315 len--;
1316 }
1317 if (s > strend)
1318 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1319 } else {
28be1210 1320 if (strend-s < len)
fc241834 1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1322 s += len;
08ca2aa3 1323 }
a6ec74c1 1324 break;
62f95557
IZ
1325 case 'X' | TYPE_IS_SHRIEKING:
1326 if (!len) /* Avoid division by 0 */
1327 len = 1;
08ca2aa3 1328 if (utf8) {
f7fe979e 1329 const char *hop, *last;
f337b084
TH
1330 I32 l = len;
1331 hop = last = strbeg;
1332 while (hop < s) {
1333 hop += UTF8SKIP(hop);
1334 if (--l == 0) {
08ca2aa3 1335 last = hop;
f337b084
TH
1336 l = len;
1337 }
fc241834 1338 }
f337b084
TH
1339 if (last > s)
1340 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1341 s = last;
1342 break;
f337b084
TH
1343 }
1344 len = (s - strbeg) % len;
62f95557 1345 /* FALL THROUGH */
a6ec74c1 1346 case 'X':
08ca2aa3
TH
1347 if (utf8) {
1348 while (len > 0) {
1349 if (s <= strbeg)
1350 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1351 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1352 if (s <= strbeg)
1353 Perl_croak(aTHX_ "'X' outside of string in unpack");
1354 }
1355 len--;
1356 }
1357 } else {
fc241834
RGS
1358 if (len > s - strbeg)
1359 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1360 s -= len;
08ca2aa3 1361 }
a6ec74c1 1362 break;
9e27e96a
AL
1363 case 'x' | TYPE_IS_SHRIEKING: {
1364 I32 ai32;
62f95557
IZ
1365 if (!len) /* Avoid division by 0 */
1366 len = 1;
230e1fce
NC
1367 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1368 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1369 if (ai32 == 0) break;
1370 len -= ai32;
9e27e96a 1371 }
62f95557 1372 /* FALL THROUGH */
a6ec74c1 1373 case 'x':
08ca2aa3
TH
1374 if (utf8) {
1375 while (len>0) {
1376 if (s >= strend)
1377 Perl_croak(aTHX_ "'x' outside of string in unpack");
1378 s += UTF8SKIP(s);
1379 len--;
1380 }
1381 } else {
fc241834
RGS
1382 if (len > strend - s)
1383 Perl_croak(aTHX_ "'x' outside of string in unpack");
1384 s += len;
f337b084 1385 }
a6ec74c1
JH
1386 break;
1387 case '/':
49704364
LW
1388 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1389 break;
a6ec74c1
JH
1390 case 'A':
1391 case 'Z':
1392 case 'a':
08ca2aa3
TH
1393 if (checksum) {
1394 /* Preliminary length estimate is assumed done in 'W' */
1395 if (len > strend - s) len = strend - s;
1396 goto W_checksum;
1397 }
1398 if (utf8) {
1399 I32 l;
f7fe979e 1400 const char *hop;
08ca2aa3
TH
1401 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1402 if (hop >= strend) {
1403 if (hop > strend)
1404 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1405 break;
fc241834 1406 }
a6ec74c1 1407 }
08ca2aa3
TH
1408 if (hop > strend)
1409 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1410 len = hop - s;
1411 } else if (len > strend - s)
1412 len = strend - s;
1413
1414 if (datumtype == 'Z') {
1415 /* 'Z' strips stuff after first null */
f7fe979e 1416 const char *ptr, *end;
f337b084
TH
1417 end = s + len;
1418 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1419 sv = newSVpvn(s, ptr-s);
1420 if (howlen == e_star) /* exact for 'Z*' */
1421 len = ptr-s + (ptr != strend ? 1 : 0);
1422 } else if (datumtype == 'A') {
1423 /* 'A' strips both nulls and spaces */
f7fe979e 1424 const char *ptr;
18bdf90a
TH
1425 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1426 for (ptr = s+len-1; ptr >= s; ptr--)
1427 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1428 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1429 if (ptr >= s) ptr += UTF8SKIP(ptr);
1430 else ptr++;
28be1210 1431 if (ptr > s+len)
18bdf90a
TH
1432 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 } else {
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !isSPACE(*ptr)) break;
1436 ptr++;
1437 }
08ca2aa3
TH
1438 sv = newSVpvn(s, ptr-s);
1439 } else sv = newSVpvn(s, len);
1440
1441 if (utf8) {
1442 SvUTF8_on(sv);
1443 /* Undo any upgrade done due to need_utf8() */
f337b084 1444 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1445 sv_utf8_downgrade(sv, 0);
a6ec74c1 1446 }
6e449a3a 1447 mXPUSHs(sv);
08ca2aa3 1448 s += len;
a6ec74c1
JH
1449 break;
1450 case 'B':
08ca2aa3
TH
1451 case 'b': {
1452 char *str;
49704364 1453 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1454 len = (strend - s) * 8;
1455 if (checksum) {
1456 if (!PL_bitcount) {
08ca2aa3 1457 int bits;
a02a5408 1458 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1459 for (bits = 1; bits < 256; bits++) {
1460 if (bits & 1) PL_bitcount[bits]++;
1461 if (bits & 2) PL_bitcount[bits]++;
1462 if (bits & 4) PL_bitcount[bits]++;
1463 if (bits & 8) PL_bitcount[bits]++;
1464 if (bits & 16) PL_bitcount[bits]++;
1465 if (bits & 32) PL_bitcount[bits]++;
1466 if (bits & 64) PL_bitcount[bits]++;
1467 if (bits & 128) PL_bitcount[bits]++;
1468 }
1469 }
f337b084 1470 if (utf8)
08ca2aa3 1471 while (len >= 8 && s < strend) {
f337b084 1472 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1473 len -= 8;
1474 }
f337b084 1475 else
fc241834 1476 while (len >= 8) {
08ca2aa3 1477 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1478 len -= 8;
1479 }
08ca2aa3
TH
1480 if (len && s < strend) {
1481 U8 bits;
f337b084
TH
1482 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1483 if (datumtype == 'b')
a6ec74c1 1484 while (len-- > 0) {
92d41999 1485 if (bits & 1) cuv++;
a6ec74c1
JH
1486 bits >>= 1;
1487 }
f337b084 1488 else
a6ec74c1 1489 while (len-- > 0) {
08ca2aa3 1490 if (bits & 0x80) cuv++;
a6ec74c1
JH
1491 bits <<= 1;
1492 }
fc241834 1493 }
a6ec74c1
JH
1494 break;
1495 }
08ca2aa3 1496
561b68a9 1497 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1498 SvPOK_on(sv);
1499 str = SvPVX(sv);
1500 if (datumtype == 'b') {
f337b084 1501 U8 bits = 0;
f7fe979e 1502 const I32 ai32 = len;
08ca2aa3
TH
1503 for (len = 0; len < ai32; len++) {
1504 if (len & 7) bits >>= 1;
1505 else if (utf8) {
1506 if (s >= strend) break;
f337b084 1507 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1508 } else bits = *(U8 *) s++;
1509 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1510 }
08ca2aa3 1511 } else {
f337b084 1512 U8 bits = 0;
f7fe979e 1513 const I32 ai32 = len;
08ca2aa3
TH
1514 for (len = 0; len < ai32; len++) {
1515 if (len & 7) bits <<= 1;
1516 else if (utf8) {
1517 if (s >= strend) break;
f337b084 1518 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1519 } else bits = *(U8 *) s++;
1520 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1521 }
1522 }
1523 *str = '\0';
aa07b2f6 1524 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1525 XPUSHs(sv);
a6ec74c1 1526 break;
08ca2aa3 1527 }
a6ec74c1 1528 case 'H':
08ca2aa3
TH
1529 case 'h': {
1530 char *str;
fc241834 1531 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1532 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1533 len = (strend - s) * 2;
561b68a9 1534 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1535 SvPOK_on(sv);
1536 str = SvPVX(sv);
1537 if (datumtype == 'h') {
f337b084 1538 U8 bits = 0;
9e27e96a 1539 I32 ai32 = len;
fc241834
RGS
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 1) bits >>= 4;
1542 else if (utf8) {
1543 if (s >= strend) break;
f337b084 1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1545 } else bits = * (U8 *) s++;
a6ec74c1
JH
1546 *str++ = PL_hexdigit[bits & 15];
1547 }
08ca2aa3 1548 } else {
f337b084 1549 U8 bits = 0;
f7fe979e 1550 const I32 ai32 = len;
08ca2aa3
TH
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 1) bits <<= 4;
1553 else if (utf8) {
1554 if (s >= strend) break;
f337b084 1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1556 } else bits = *(U8 *) s++;
a6ec74c1
JH
1557 *str++ = PL_hexdigit[(bits >> 4) & 15];
1558 }
1559 }
1560 *str = '\0';
aa07b2f6 1561 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1562 XPUSHs(sv);
a6ec74c1 1563 break;
08ca2aa3 1564 }
1651fc44
ML
1565 case 'C':
1566 if (len == 0) {
1567 if (explicit_length)
1568 /* Switch to "character" mode */
1569 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1570 break;
1571 }
1572 /* FALL THROUGH */
a6ec74c1 1573 case 'c':
1651fc44
ML
1574 while (len-- > 0 && s < strend) {
1575 int aint;
1576 if (utf8)
1577 {
1578 STRLEN retlen;
1579 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1580 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1581 if (retlen == (STRLEN) -1 || retlen == 0)
1582 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1583 s += retlen;
1584 }
1585 else
1586 aint = *(U8 *)(s)++;
1587 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1588 aint -= 256;
08ca2aa3 1589 if (!checksum)
6e449a3a 1590 mPUSHi(aint);
73cb7263
NC
1591 else if (checksum > bits_in_uv)
1592 cdouble += (NV)aint;
1593 else
1594 cuv += aint;
a6ec74c1
JH
1595 }
1596 break;
08ca2aa3
TH
1597 case 'W':
1598 W_checksum:
1651fc44 1599 if (utf8) {
08ca2aa3 1600 while (len-- > 0 && s < strend) {
08ca2aa3 1601 STRLEN retlen;
f7fe979e 1602 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1603 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1604 if (retlen == (STRLEN) -1 || retlen == 0)
1605 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1606 s += retlen;
1607 if (!checksum)
6e449a3a 1608 mPUSHu(val);
08ca2aa3
TH
1609 else if (checksum > bits_in_uv)
1610 cdouble += (NV) val;
d6d3e8bd 1611 else
08ca2aa3 1612 cuv += val;
fc241834 1613 }
08ca2aa3 1614 } else if (!checksum)
a6ec74c1 1615 while (len-- > 0) {
f7fe979e 1616 const U8 ch = *(U8 *) s++;
6e449a3a 1617 mPUSHu(ch);
a6ec74c1 1618 }
08ca2aa3
TH
1619 else if (checksum > bits_in_uv)
1620 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1621 else
1622 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1623 break;
1624 case 'U':
35bcd338 1625 if (len == 0) {
08ca2aa3
TH
1626 if (explicit_length) {
1627 /* Switch to "bytes in UTF-8" mode */
f337b084 1628 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1629 else
1630 /* Should be impossible due to the need_utf8() test */
1631 Perl_croak(aTHX_ "U0 mode on a byte string");
1632 }
35bcd338
JH
1633 break;
1634 }
08ca2aa3 1635 if (len > strend - s) len = strend - s;
fc241834 1636 if (!checksum) {
08ca2aa3
TH
1637 if (len && unpack_only_one) len = 1;
1638 EXTEND(SP, len);
1639 EXTEND_MORTAL(len);
fc241834 1640 }
08ca2aa3
TH
1641 while (len-- > 0 && s < strend) {
1642 STRLEN retlen;
1643 UV auv;
1644 if (utf8) {
1645 U8 result[UTF8_MAXLEN];
f7fe979e 1646 const char *ptr = s;
08ca2aa3 1647 STRLEN len;
08ca2aa3
TH
1648 /* Bug: warns about bad utf8 even if we are short on bytes
1649 and will break out of the loop */
230e1fce
NC
1650 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1651 'U'))
08ca2aa3
TH
1652 break;
1653 len = UTF8SKIP(result);
fc241834 1654 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1655 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1656 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1657 s = ptr;
1658 } else {
1659 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1660 if (retlen == (STRLEN) -1 || retlen == 0)
1661 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1662 s += retlen;
1663 }
1664 if (!checksum)
6e449a3a 1665 mPUSHu(auv);
73cb7263 1666 else if (checksum > bits_in_uv)
08ca2aa3 1667 cdouble += (NV) auv;
73cb7263 1668 else
08ca2aa3 1669 cuv += auv;
a6ec74c1
JH
1670 }
1671 break;
49704364
LW
1672 case 's' | TYPE_IS_SHRIEKING:
1673#if SHORTSIZE != SIZE16
73cb7263 1674 while (len-- > 0) {
08ca2aa3 1675 short ashort;
f337b084
TH
1676 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1677 DO_BO_UNPACK(ashort, s);
08ca2aa3 1678 if (!checksum)
6e449a3a 1679 mPUSHi(ashort);
73cb7263
NC
1680 else if (checksum > bits_in_uv)
1681 cdouble += (NV)ashort;
1682 else
1683 cuv += ashort;
49704364
LW
1684 }
1685 break;
1686#else
1687 /* Fallthrough! */
a6ec74c1 1688#endif
49704364 1689 case 's':
73cb7263 1690 while (len-- > 0) {
08ca2aa3
TH
1691 I16 ai16;
1692
1693#if U16SIZE > SIZE16
1694 ai16 = 0;
1695#endif
f337b084 1696 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1697 DO_BO_UNPACK(ai16, 16);
1109a392 1698#if U16SIZE > SIZE16
73cb7263
NC
1699 if (ai16 > 32767)
1700 ai16 -= 65536;
a6ec74c1 1701#endif
08ca2aa3 1702 if (!checksum)
6e449a3a 1703 mPUSHi(ai16);
73cb7263
NC
1704 else if (checksum > bits_in_uv)
1705 cdouble += (NV)ai16;
1706 else
1707 cuv += ai16;
a6ec74c1
JH
1708 }
1709 break;
49704364
LW
1710 case 'S' | TYPE_IS_SHRIEKING:
1711#if SHORTSIZE != SIZE16
73cb7263 1712 while (len-- > 0) {
08ca2aa3 1713 unsigned short aushort;
f337b084
TH
1714 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1715 DO_BO_UNPACK(aushort, s);
08ca2aa3 1716 if (!checksum)
6e449a3a 1717 mPUSHu(aushort);
73cb7263
NC
1718 else if (checksum > bits_in_uv)
1719 cdouble += (NV)aushort;
1720 else
1721 cuv += aushort;
49704364
LW
1722 }
1723 break;
1724#else
1725 /* Fallhrough! */
1726#endif
a6ec74c1
JH
1727 case 'v':
1728 case 'n':
1729 case 'S':
73cb7263 1730 while (len-- > 0) {
08ca2aa3
TH
1731 U16 au16;
1732#if U16SIZE > SIZE16
1733 au16 = 0;
1734#endif
f337b084 1735 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1736 DO_BO_UNPACK(au16, 16);
a6ec74c1 1737#ifdef HAS_NTOHS
73cb7263
NC
1738 if (datumtype == 'n')
1739 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1740#endif
1741#ifdef HAS_VTOHS
73cb7263
NC
1742 if (datumtype == 'v')
1743 au16 = vtohs(au16);
a6ec74c1 1744#endif
08ca2aa3 1745 if (!checksum)
6e449a3a 1746 mPUSHu(au16);
73cb7263 1747 else if (checksum > bits_in_uv)
f337b084 1748 cdouble += (NV) au16;
73cb7263
NC
1749 else
1750 cuv += au16;
a6ec74c1
JH
1751 }
1752 break;
7212898e 1753#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1754 case 'v' | TYPE_IS_SHRIEKING:
1755 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1756 while (len-- > 0) {
08ca2aa3
TH
1757 I16 ai16;
1758# if U16SIZE > SIZE16
1759 ai16 = 0;
1760# endif
f337b084 1761 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1762# ifdef HAS_NTOHS
73cb7263 1763 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1764 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1765# endif /* HAS_NTOHS */
1766# ifdef HAS_VTOHS
73cb7263 1767 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1768 ai16 = (I16) vtohs((U16) ai16);
1769# endif /* HAS_VTOHS */
1770 if (!checksum)
6e449a3a 1771 mPUSHi(ai16);
73cb7263 1772 else if (checksum > bits_in_uv)
08ca2aa3 1773 cdouble += (NV) ai16;
73cb7263
NC
1774 else
1775 cuv += ai16;
068bd2e7
MHM
1776 }
1777 break;
08ca2aa3 1778#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1779 case 'i':
49704364 1780 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1781 while (len-- > 0) {
08ca2aa3 1782 int aint;
f337b084
TH
1783 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1784 DO_BO_UNPACK(aint, i);
08ca2aa3 1785 if (!checksum)
6e449a3a 1786 mPUSHi(aint);
73cb7263
NC
1787 else if (checksum > bits_in_uv)
1788 cdouble += (NV)aint;
1789 else
1790 cuv += aint;
a6ec74c1
JH
1791 }
1792 break;
1793 case 'I':
49704364 1794 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1795 while (len-- > 0) {
08ca2aa3 1796 unsigned int auint;
f337b084
TH
1797 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1798 DO_BO_UNPACK(auint, i);
08ca2aa3 1799 if (!checksum)
6e449a3a 1800 mPUSHu(auint);
73cb7263
NC
1801 else if (checksum > bits_in_uv)
1802 cdouble += (NV)auint;
1803 else
1804 cuv += auint;
a6ec74c1
JH
1805 }
1806 break;
92d41999 1807 case 'j':
73cb7263 1808 while (len-- > 0) {
08ca2aa3 1809 IV aiv;
f337b084 1810 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1811#if IVSIZE == INTSIZE
f337b084 1812 DO_BO_UNPACK(aiv, i);
1109a392 1813#elif IVSIZE == LONGSIZE
f337b084 1814 DO_BO_UNPACK(aiv, l);
1109a392 1815#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1816 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1817#else
1818 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1819#endif
08ca2aa3 1820 if (!checksum)
6e449a3a 1821 mPUSHi(aiv);
73cb7263
NC
1822 else if (checksum > bits_in_uv)
1823 cdouble += (NV)aiv;
1824 else
1825 cuv += aiv;
92d41999
JH
1826 }
1827 break;
1828 case 'J':
73cb7263 1829 while (len-- > 0) {
08ca2aa3 1830 UV auv;
f337b084 1831 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1832#if IVSIZE == INTSIZE
f337b084 1833 DO_BO_UNPACK(auv, i);
08ca2aa3 1834#elif IVSIZE == LONGSIZE
f337b084 1835 DO_BO_UNPACK(auv, l);
08ca2aa3 1836#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1837 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1838#else
1839 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1840#endif
08ca2aa3 1841 if (!checksum)
6e449a3a 1842 mPUSHu(auv);
73cb7263
NC
1843 else if (checksum > bits_in_uv)
1844 cdouble += (NV)auv;
1845 else
1846 cuv += auv;
92d41999
JH
1847 }
1848 break;
49704364
LW
1849 case 'l' | TYPE_IS_SHRIEKING:
1850#if LONGSIZE != SIZE32
73cb7263 1851 while (len-- > 0) {
08ca2aa3 1852 long along;
f337b084
TH
1853 SHIFT_VAR(utf8, s, strend, along, datumtype);
1854 DO_BO_UNPACK(along, l);
08ca2aa3 1855 if (!checksum)
6e449a3a 1856 mPUSHi(along);
73cb7263
NC
1857 else if (checksum > bits_in_uv)
1858 cdouble += (NV)along;
1859 else
1860 cuv += along;
49704364
LW
1861 }
1862 break;
1863#else
1864 /* Fallthrough! */
a6ec74c1 1865#endif
49704364 1866 case 'l':
73cb7263 1867 while (len-- > 0) {
08ca2aa3
TH
1868 I32 ai32;
1869#if U32SIZE > SIZE32
1870 ai32 = 0;
1871#endif
f337b084 1872 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1873 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1874#if U32SIZE > SIZE32
08ca2aa3 1875 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1876#endif
08ca2aa3 1877 if (!checksum)
6e449a3a 1878 mPUSHi(ai32);
73cb7263
NC
1879 else if (checksum > bits_in_uv)
1880 cdouble += (NV)ai32;
1881 else
1882 cuv += ai32;
a6ec74c1
JH
1883 }
1884 break;
49704364
LW
1885 case 'L' | TYPE_IS_SHRIEKING:
1886#if LONGSIZE != SIZE32
73cb7263 1887 while (len-- > 0) {
08ca2aa3 1888 unsigned long aulong;
f337b084
TH
1889 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1890 DO_BO_UNPACK(aulong, l);
08ca2aa3 1891 if (!checksum)
6e449a3a 1892 mPUSHu(aulong);
73cb7263
NC
1893 else if (checksum > bits_in_uv)
1894 cdouble += (NV)aulong;
1895 else
1896 cuv += aulong;
49704364
LW
1897 }
1898 break;
1899#else
1900 /* Fall through! */
1901#endif
a6ec74c1
JH
1902 case 'V':
1903 case 'N':
1904 case 'L':
73cb7263 1905 while (len-- > 0) {
08ca2aa3
TH
1906 U32 au32;
1907#if U32SIZE > SIZE32
1908 au32 = 0;
1909#endif
f337b084 1910 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1911 DO_BO_UNPACK(au32, 32);
a6ec74c1 1912#ifdef HAS_NTOHL
73cb7263
NC
1913 if (datumtype == 'N')
1914 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1915#endif
1916#ifdef HAS_VTOHL
73cb7263
NC
1917 if (datumtype == 'V')
1918 au32 = vtohl(au32);
a6ec74c1 1919#endif
08ca2aa3 1920 if (!checksum)
6e449a3a 1921 mPUSHu(au32);
fc241834
RGS
1922 else if (checksum > bits_in_uv)
1923 cdouble += (NV)au32;
1924 else
1925 cuv += au32;
a6ec74c1
JH
1926 }
1927 break;
7212898e 1928#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1929 case 'V' | TYPE_IS_SHRIEKING:
1930 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1931 while (len-- > 0) {
08ca2aa3
TH
1932 I32 ai32;
1933# if U32SIZE > SIZE32
1934 ai32 = 0;
1935# endif
f337b084 1936 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1937# ifdef HAS_NTOHL
73cb7263
NC
1938 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1939 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1940# endif
1941# ifdef HAS_VTOHL
73cb7263
NC
1942 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1943 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1944# endif
1945 if (!checksum)
6e449a3a 1946 mPUSHi(ai32);
73cb7263
NC
1947 else if (checksum > bits_in_uv)
1948 cdouble += (NV)ai32;
1949 else
1950 cuv += ai32;
068bd2e7
MHM
1951 }
1952 break;
08ca2aa3 1953#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1954 case 'p':
a6ec74c1 1955 while (len-- > 0) {
f7fe979e 1956 const char *aptr;
f337b084 1957 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1958 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1959 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1960 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1961 }
1962 break;
1963 case 'w':
a6ec74c1
JH
1964 {
1965 UV auv = 0;
1966 U32 bytes = 0;
fc241834 1967
08ca2aa3
TH
1968 while (len > 0 && s < strend) {
1969 U8 ch;
f337b084 1970 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1971 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1972 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1973 if (ch < 0x80) {
a6ec74c1 1974 bytes = 0;
6e449a3a 1975 mPUSHu(auv);
a6ec74c1
JH
1976 len--;
1977 auv = 0;
08ca2aa3 1978 continue;
a6ec74c1 1979 }
08ca2aa3 1980 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1981 const char *t;
a6ec74c1 1982
f5992bc4 1983 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1984 while (s < strend) {
f337b084 1985 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1986 sv = mul128(sv, (U8)(ch & 0x7f));
1987 if (!(ch & 0x80)) {
a6ec74c1
JH
1988 bytes = 0;
1989 break;
1990 }
1991 }
10516c54 1992 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1993 while (*t == '0')
1994 t++;
1995 sv_chop(sv, t);
6e449a3a 1996 mPUSHs(sv);
a6ec74c1
JH
1997 len--;
1998 auv = 0;
1999 }
2000 }
2001 if ((s >= strend) && bytes)
49704364 2002 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2003 }
2004 break;
2005 case 'P':
49704364
LW
2006 if (symptr->howlen == e_star)
2007 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2008 EXTEND(SP, 1);
2d3e0934 2009 if (s + sizeof(char*) <= strend) {
08ca2aa3 2010 char *aptr;
f337b084 2011 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2012 DO_BO_UNPACK_PC(aptr);
fc241834 2013 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2014 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2015 }
a6ec74c1
JH
2016 break;
2017#ifdef HAS_QUAD
2018 case 'q':
73cb7263 2019 while (len-- > 0) {
08ca2aa3 2020 Quad_t aquad;
f337b084
TH
2021 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2022 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2023 if (!checksum)
6e449a3a
MHM
2024 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2025 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2026 else if (checksum > bits_in_uv)
2027 cdouble += (NV)aquad;
2028 else
2029 cuv += aquad;
2030 }
a6ec74c1
JH
2031 break;
2032 case 'Q':
73cb7263 2033 while (len-- > 0) {
08ca2aa3 2034 Uquad_t auquad;
f337b084
TH
2035 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2036 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2037 if (!checksum)
6e449a3a
MHM
2038 mPUSHs(auquad <= UV_MAX ?
2039 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2040 else if (checksum > bits_in_uv)
2041 cdouble += (NV)auquad;
2042 else
2043 cuv += auquad;
a6ec74c1
JH
2044 }
2045 break;
08ca2aa3 2046#endif /* HAS_QUAD */
a6ec74c1
JH
2047 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2048 case 'f':
73cb7263 2049 while (len-- > 0) {
08ca2aa3 2050 float afloat;
f337b084 2051 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2052 DO_BO_UNPACK_N(afloat, float);
2053 if (!checksum)
6e449a3a 2054 mPUSHn(afloat);
08ca2aa3 2055 else
73cb7263 2056 cdouble += afloat;
fc241834 2057 }
a6ec74c1
JH
2058 break;
2059 case 'd':
73cb7263 2060 while (len-- > 0) {
08ca2aa3 2061 double adouble;
f337b084 2062 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2063 DO_BO_UNPACK_N(adouble, double);
2064 if (!checksum)
6e449a3a 2065 mPUSHn(adouble);
08ca2aa3 2066 else
73cb7263 2067 cdouble += adouble;
fc241834 2068 }
a6ec74c1 2069 break;
92d41999 2070 case 'F':
73cb7263 2071 while (len-- > 0) {
08ca2aa3 2072 NV anv;
f337b084 2073 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2074 DO_BO_UNPACK_N(anv, NV);
2075 if (!checksum)
6e449a3a 2076 mPUSHn(anv);
08ca2aa3 2077 else
73cb7263 2078 cdouble += anv;
fc241834 2079 }
92d41999
JH
2080 break;
2081#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2082 case 'D':
73cb7263 2083 while (len-- > 0) {
08ca2aa3 2084 long double aldouble;
f337b084 2085 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2086 DO_BO_UNPACK_N(aldouble, long double);
2087 if (!checksum)
6e449a3a 2088 mPUSHn(aldouble);
08ca2aa3
TH
2089 else
2090 cdouble += aldouble;
92d41999
JH
2091 }
2092 break;
2093#endif
a6ec74c1 2094 case 'u':
08ca2aa3 2095 {
f7fe979e 2096 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2097 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2098 if (l) SvPOK_on(sv);
2099 }
2100 if (utf8) {
2101 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2102 I32 a, b, c, d;
db187877 2103 char hunk[3];
08ca2aa3 2104
08ca2aa3
TH
2105 while (len > 0) {
2106 next_uni_uu(aTHX_ &s, strend, &a);
2107 next_uni_uu(aTHX_ &s, strend, &b);
2108 next_uni_uu(aTHX_ &s, strend, &c);
2109 next_uni_uu(aTHX_ &s, strend, &d);
2110 hunk[0] = (char)((a << 2) | (b >> 4));
2111 hunk[1] = (char)((b << 4) | (c >> 2));
2112 hunk[2] = (char)((c << 6) | d);
2113 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2114 len -= 3;
2115 }
2116 if (s < strend) {
f7fe979e
AL
2117 if (*s == '\n') {
2118 s++;
2119 }
08ca2aa3
TH
2120 else {
2121 /* possible checksum byte */
f7fe979e
AL
2122 const char *skip = s+UTF8SKIP(s);
2123 if (skip < strend && *skip == '\n')
2124 s = skip+1;
08ca2aa3
TH
2125 }
2126 }
2127 }
2128 } else {
fc241834
RGS
2129 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2130 I32 a, b, c, d;
db187877 2131 char hunk[3];
a6ec74c1 2132
fc241834
RGS
2133 len = PL_uudmap[*(U8*)s++] & 077;
2134 while (len > 0) {
2135 if (s < strend && ISUUCHAR(*s))
2136 a = PL_uudmap[*(U8*)s++] & 077;
2137 else
2138 a = 0;
2139 if (s < strend && ISUUCHAR(*s))
2140 b = PL_uudmap[*(U8*)s++] & 077;
2141 else
2142 b = 0;
2143 if (s < strend && ISUUCHAR(*s))
2144 c = PL_uudmap[*(U8*)s++] & 077;
2145 else
2146 c = 0;
2147 if (s < strend && ISUUCHAR(*s))
2148 d = PL_uudmap[*(U8*)s++] & 077;
2149 else
2150 d = 0;
2151 hunk[0] = (char)((a << 2) | (b >> 4));
2152 hunk[1] = (char)((b << 4) | (c >> 2));
2153 hunk[2] = (char)((c << 6) | d);
2154 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2155 len -= 3;
2156 }
2157 if (*s == '\n')
2158 s++;
2159 else /* possible checksum byte */
2160 if (s + 1 < strend && s[1] == '\n')
2161 s += 2;
a6ec74c1 2162 }
08ca2aa3
TH
2163 }
2164 XPUSHs(sv);
a6ec74c1
JH
2165 break;
2166 }
49704364 2167
a6ec74c1 2168 if (checksum) {
1109a392 2169 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2170 (checksum > bits_in_uv &&
08ca2aa3
TH
2171 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2172 NV trouble, anv;
a6ec74c1 2173
08ca2aa3 2174 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2175 while (checksum >= 16) {
2176 checksum -= 16;
08ca2aa3 2177 anv *= 65536.0;
a6ec74c1 2178 }
a6ec74c1 2179 while (cdouble < 0.0)
08ca2aa3
TH
2180 cdouble += anv;
2181 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2182 sv = newSVnv(cdouble);
a6ec74c1
JH
2183 }
2184 else {
fa8ec7c1
NC
2185 if (checksum < bits_in_uv) {
2186 UV mask = ((UV)1 << checksum) - 1;
92d41999 2187 cuv &= mask;
a6ec74c1 2188 }
c4c5f44a 2189 sv = newSVuv(cuv);
a6ec74c1 2190 }
6e449a3a 2191 mXPUSHs(sv);
a6ec74c1
JH
2192 checksum = 0;
2193 }
fc241834 2194
49704364
LW
2195 if (symptr->flags & FLAG_SLASH){
2196 if (SP - PL_stack_base - start_sp_offset <= 0)
2197 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2198 if( next_symbol(symptr) ){
2199 if( symptr->howlen == e_number )
2200 Perl_croak(aTHX_ "Count after length/code in unpack" );
2201 if( beyond ){
2202 /* ...end of char buffer then no decent length available */
2203 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2204 } else {
2205 /* take top of stack (hope it's numeric) */
2206 len = POPi;
2207 if( len < 0 )
2208 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2209 }
2210 } else {
2211 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2212 }
2213 datumtype = symptr->code;
21c16052 2214 explicit_length = FALSE;
49704364
LW
2215 goto redo_switch;
2216 }
a6ec74c1 2217 }
49704364 2218
18529408
IZ
2219 if (new_s)
2220 *new_s = s;
2221 PUTBACK;
2222 return SP - PL_stack_base - start_sp_offset;
2223}
2224
2225PP(pp_unpack)
2226{
97aff369 2227 dVAR;
18529408 2228 dSP;
bab9c0ac 2229 dPOPPOPssrl;
18529408
IZ
2230 I32 gimme = GIMME_V;
2231 STRLEN llen;
2232 STRLEN rlen;
5c144d81
NC
2233 const char *pat = SvPV_const(left, llen);
2234 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2235 const char *strend = s + rlen;
2236 const char *patend = pat + llen;
08ca2aa3 2237 I32 cnt;
18529408
IZ
2238
2239 PUTBACK;
7accc089 2240 cnt = unpackstring(pat, patend, s, strend,
49704364 2241 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2242 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2243
18529408
IZ
2244 SPAGAIN;
2245 if ( !cnt && gimme == G_SCALAR )
2246 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2247 RETURN;
2248}
2249
f337b084 2250STATIC U8 *
f7fe979e 2251doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2252{
f337b084 2253 *h++ = PL_uuemap[len];
a6ec74c1 2254 while (len > 2) {
f337b084
TH
2255 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2256 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2257 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2258 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2259 s += 3;
2260 len -= 3;
2261 }
2262 if (len > 0) {
f7fe979e 2263 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2267 *h++ = PL_uuemap[0];
a6ec74c1 2268 }
f337b084
TH
2269 *h++ = '\n';
2270 return h;
a6ec74c1
JH
2271}
2272
2273STATIC SV *
f7fe979e 2274S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2275{
8b6e33c7
AL
2276 SV *result = newSVpvn(s, l);
2277 char *const result_c = SvPV_nolen(result); /* convenience */
2278 char *out = result_c;
2279 bool skip = 1;
2280 bool ignore = 0;
a6ec74c1
JH
2281
2282 while (*s) {
2283 switch (*s) {
2284 case ' ':
2285 break;
2286 case '+':
2287 if (!skip) {
2288 SvREFCNT_dec(result);
2289 return (NULL);
2290 }
2291 break;
2292 case '0':
2293 case '1':
2294 case '2':
2295 case '3':
2296 case '4':
2297 case '5':
2298 case '6':
2299 case '7':
2300 case '8':
2301 case '9':
2302 skip = 0;
2303 if (!ignore) {
2304 *(out++) = *s;
2305 }
2306 break;
2307 case '.':
2308 ignore = 1;
2309 break;
2310 default:
2311 SvREFCNT_dec(result);
2312 return (NULL);
2313 }
2314 s++;
2315 }
2316 *(out++) = '\0';
2317 SvCUR_set(result, out - result_c);
2318 return (result);
2319}
2320
2321/* pnum must be '\0' terminated */
2322STATIC int
2323S_div128(pTHX_ SV *pnum, bool *done)
2324{
8b6e33c7
AL
2325 STRLEN len;
2326 char * const s = SvPV(pnum, len);
2327 char *t = s;
2328 int m = 0;
2329
2330 *done = 1;
2331 while (*t) {
2332 const int i = m * 10 + (*t - '0');
2333 const int r = (i >> 7); /* r < 10 */
2334 m = i & 0x7F;
2335 if (r) {
2336 *done = 0;
2337 }
2338 *(t++) = '0' + r;
a6ec74c1 2339 }
8b6e33c7
AL
2340 *(t++) = '\0';
2341 SvCUR_set(pnum, (STRLEN) (t - s));
2342 return (m);
a6ec74c1
JH
2343}
2344
18529408 2345/*
7accc089
JH
2346=for apidoc packlist
2347
2348The engine implementing pack() Perl function.
2349
bfce84ec
AL
2350=cut
2351*/
7accc089
JH
2352
2353void
f7fe979e 2354Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2355{
97aff369 2356 dVAR;
aadb217d
JH
2357 tempsym_t sym;
2358
f7fe979e 2359 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2360
f337b084
TH
2361 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2362 Also make sure any UTF8 flag is loaded */
56eb0262 2363 SvPV_force_nolen(cat);
bfce84ec
AL
2364 if (DO_UTF8(cat))
2365 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2366
49704364
LW
2367 (void)pack_rec( cat, &sym, beglist, endlist );
2368}
2369
f337b084
TH
2370/* like sv_utf8_upgrade, but also repoint the group start markers */
2371STATIC void
2372marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2373 STRLEN len;
2374 tempsym_t *group;
f7fe979e
AL
2375 const char *from_ptr, *from_start, *from_end, **marks, **m;
2376 char *to_start, *to_ptr;
f337b084
TH
2377
2378 if (SvUTF8(sv)) return;
2379
aa07b2f6 2380 from_start = SvPVX_const(sv);
f337b084
TH
2381 from_end = from_start + SvCUR(sv);
2382 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2383 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2384 if (from_ptr == from_end) {
2385 /* Simple case: no character needs to be changed */
2386 SvUTF8_on(sv);
2387 return;
2388 }
2389
3473cf63 2390 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2391 Newx(to_start, len, char);
f337b084
TH
2392 Copy(from_start, to_start, from_ptr-from_start, char);
2393 to_ptr = to_start + (from_ptr-from_start);
2394
a02a5408 2395 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2396 for (group=sym_ptr; group; group = group->previous)
2397 marks[group->level] = from_start + group->strbeg;
2398 marks[sym_ptr->level+1] = from_end+1;
2399 for (m = marks; *m < from_ptr; m++)
2400 *m = to_start + (*m-from_start);
2401
2402 for (;from_ptr < from_end; from_ptr++) {
2403 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2404 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2405 }
2406 *to_ptr = 0;
2407
2408 while (*m == from_ptr) *m++ = to_ptr;
2409 if (m != marks + sym_ptr->level+1) {
2410 Safefree(marks);
2411 Safefree(to_start);
2412 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2413 }
2414 for (group=sym_ptr; group; group = group->previous)
2415 group->strbeg = marks[group->level] - to_start;
2416 Safefree(marks);
2417
2418 if (SvOOK(sv)) {
2419 if (SvIVX(sv)) {
b162af07 2420 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2421 from_start -= SvIVX(sv);
2422 SvIV_set(sv, 0);
2423 }
2424 SvFLAGS(sv) &= ~SVf_OOK;
2425 }
2426 if (SvLEN(sv) != 0)
2427 Safefree(from_start);
f880fe2f 2428 SvPV_set(sv, to_start);
b162af07
SP
2429 SvCUR_set(sv, to_ptr - to_start);
2430 SvLEN_set(sv, len);
f337b084
TH
2431 SvUTF8_on(sv);
2432}
2433
2434/* Exponential string grower. Makes string extension effectively O(n)
2435 needed says how many extra bytes we need (not counting the final '\0')
2436 Only grows the string if there is an actual lack of space
2437*/
2438STATIC char *
0bd48802 2439S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2440 const STRLEN cur = SvCUR(sv);
2441 const STRLEN len = SvLEN(sv);
f337b084
TH
2442 STRLEN extend;
2443 if (len - cur > needed) return SvPVX(sv);
2444 extend = needed > len ? needed : len;
2445 return SvGROW(sv, len+extend+1);
2446}
49704364
LW
2447
2448STATIC
2449SV **
f337b084 2450S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2451{
97aff369 2452 dVAR;
49704364 2453 tempsym_t lookahead;
f337b084
TH
2454 I32 items = endlist - beglist;
2455 bool found = next_symbol(symptr);
2456 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2457 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2458
2459 if (symptr->level == 0 && found && symptr->code == 'U') {
2460 marked_upgrade(aTHX_ cat, symptr);
2461 symptr->flags |= FLAG_DO_UTF8;
2462 utf8 = 0;
49704364 2463 }
f337b084 2464 symptr->strbeg = SvCUR(cat);
49704364
LW
2465
2466 while (found) {
f337b084
TH
2467 SV *fromstr;
2468 STRLEN fromlen;
2469 I32 len;
a0714e2c 2470 SV *lengthcode = NULL;
49704364 2471 I32 datumtype = symptr->code;
f337b084
TH
2472 howlen_t howlen = symptr->howlen;
2473 char *start = SvPVX(cat);
2474 char *cur = start + SvCUR(cat);
49704364 2475
f337b084
TH
2476#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2477
2478 switch (howlen) {
fc241834 2479 case e_star:
f337b084
TH
2480 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2481 0 : items;
2482 break;
2483 default:
2484 /* e_no_len and e_number */
2485 len = symptr->length;
49704364
LW
2486 break;
2487 }
2488
f337b084 2489 if (len) {
a7a3cfaa 2490 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2491
a7a3cfaa
TH
2492 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2493 /* We can process this letter. */
2494 STRLEN size = props & PACK_SIZE_MASK;
2495 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2496 }
f337b084
TH
2497 }
2498
49704364
LW
2499 /* Look ahead for next symbol. Do we have code/code? */
2500 lookahead = *symptr;
2501 found = next_symbol(&lookahead);
246f24af
TH
2502 if (symptr->flags & FLAG_SLASH) {
2503 IV count;
f337b084 2504 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2505 if (strchr("aAZ", lookahead.code)) {
2506 if (lookahead.howlen == e_number) count = lookahead.length;
2507 else {
ce399ba6
NC
2508 if (items > 0) {
2509 if (SvGAMAGIC(*beglist)) {
2510 /* Avoid reading the active data more than once
2511 by copying it to a temporary. */
2512 STRLEN len;
2513 const char *const pv = SvPV_const(*beglist, len);
740cce10 2514 SV *const temp
59cd0e26
NC
2515 = newSVpvn_flags(pv, len,
2516 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2517 *beglist = temp;
2518 }
246f24af
TH
2519 count = DO_UTF8(*beglist) ?
2520 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2521 }
246f24af
TH
2522 else count = 0;
2523 if (lookahead.code == 'Z') count++;
2524 }
2525 } else {
2526 if (lookahead.howlen == e_number && lookahead.length < items)
2527 count = lookahead.length;
2528 else count = items;
2529 }
2530 lookahead.howlen = e_number;
2531 lookahead.length = count;
2532 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2533 }
49704364 2534
fc241834
RGS
2535 /* Code inside the switch must take care to properly update
2536 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2537 doesn't simply leave using break */
1109a392 2538 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2539 default:
f337b084
TH
2540 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2541 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2542 case '%':
49704364 2543 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2544 {
2545 char *from;
2546#ifdef PERL_PACK_CAN_SHRIEKSIGN
2547 case '.' | TYPE_IS_SHRIEKING:
2548#endif
2549 case '.':
2550 if (howlen == e_star) from = start;
2551 else if (len == 0) from = cur;
2552 else {
2553 tempsym_t *group = symptr;
2554
2555 while (--len && group) group = group->previous;
2556 from = group ? start + group->strbeg : start;
2557 }
2558 fromstr = NEXTFROM;
2559 len = SvIV(fromstr);
2560 goto resize;
2561#ifdef PERL_PACK_CAN_SHRIEKSIGN
2562 case '@' | TYPE_IS_SHRIEKING:
2563#endif
a6ec74c1 2564 case '@':
28be1210
TH
2565 from = start + symptr->strbeg;
2566 resize:
2567#ifdef PERL_PACK_CAN_SHRIEKSIGN
2568 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2569#else /* PERL_PACK_CAN_SHRIEKSIGN */
2570 if (utf8)
2571#endif
2572 if (len >= 0) {
2573 while (len && from < cur) {
2574 from += UTF8SKIP(from);
2575 len--;
2576 }
2577 if (from > cur)
2578 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2579 if (len) {
2580 /* Here we know from == cur */
2581 grow:
2582 GROWING(0, cat, start, cur, len);
2583 Zero(cur, len, char);
2584 cur += len;
2585 } else if (from < cur) {
2586 len = cur - from;
2587 goto shrink;
2588 } else goto no_change;
2589 } else {
2590 cur = from;
2591 len = -len;
2592 goto utf8_shrink;
f337b084 2593 }
28be1210
TH
2594 else {
2595 len -= cur - from;
f337b084 2596 if (len > 0) goto grow;
28be1210 2597 if (len == 0) goto no_change;
fc241834 2598 len = -len;
28be1210 2599 goto shrink;
f337b084 2600 }
a6ec74c1 2601 break;
28be1210 2602 }
fc241834 2603 case '(': {
49704364 2604 tempsym_t savsym = *symptr;
66c611c5
MHM
2605 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2606 symptr->flags |= group_modifiers;
49704364
LW
2607 symptr->patend = savsym.grpend;
2608 symptr->level++;
f337b084 2609 symptr->previous = &lookahead;
18529408 2610 while (len--) {
f337b084
TH
2611 U32 was_utf8;
2612 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2613 else symptr->flags &= ~FLAG_PARSE_UTF8;
2614 was_utf8 = SvUTF8(cat);
49704364 2615 symptr->patptr = savsym.grpbeg;
f337b084
TH
2616 beglist = pack_rec(cat, symptr, beglist, endlist);
2617 if (SvUTF8(cat) != was_utf8)
2618 /* This had better be an upgrade while in utf8==0 mode */
2619 utf8 = 1;
2620
49704364 2621 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2622 break; /* No way to continue */
2623 }
ee790063 2624 items = endlist - beglist;
f337b084
TH
2625 lookahead.flags = symptr->flags & ~group_modifiers;
2626 goto no_change;
18529408 2627 }
62f95557
IZ
2628 case 'X' | TYPE_IS_SHRIEKING:
2629 if (!len) /* Avoid division by 0 */
2630 len = 1;
f337b084
TH
2631 if (utf8) {
2632 char *hop, *last;
2633 I32 l = len;
2634 hop = last = start;
2635 while (hop < cur) {
2636 hop += UTF8SKIP(hop);
2637 if (--l == 0) {
2638 last = hop;
2639 l = len;
2640 }
2641 }
2642 if (last > cur)
2643 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2644 cur = last;
2645 break;
2646 }
2647 len = (cur-start) % len;
62f95557 2648 /* FALL THROUGH */
a6ec74c1 2649 case 'X':
f337b084
TH
2650 if (utf8) {
2651 if (len < 1) goto no_change;
28be1210 2652 utf8_shrink:
f337b084
TH
2653 while (len > 0) {
2654 if (cur <= start)
28be1210
TH
2655 Perl_croak(aTHX_ "'%c' outside of string in pack",
2656 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2657 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2658 if (cur <= start)
28be1210
TH
2659 Perl_croak(aTHX_ "'%c' outside of string in pack",
2660 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2661 }
2662 len--;
2663 }
2664 } else {
fc241834 2665 shrink:
f337b084 2666 if (cur - start < len)
28be1210
TH
2667 Perl_croak(aTHX_ "'%c' outside of string in pack",
2668 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2669 cur -= len;
2670 }
2671 if (cur < start+symptr->strbeg) {
2672 /* Make sure group starts don't point into the void */
2673 tempsym_t *group;
9e27e96a 2674 const STRLEN length = cur-start;
f337b084
TH
2675 for (group = symptr;
2676 group && length < group->strbeg;
2677 group = group->previous) group->strbeg = length;
2678 lookahead.strbeg = length;
2679 }
a6ec74c1 2680 break;
fc241834
RGS
2681 case 'x' | TYPE_IS_SHRIEKING: {
2682 I32 ai32;
62f95557
IZ
2683 if (!len) /* Avoid division by 0 */
2684 len = 1;
230e1fce 2685 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2686 else ai32 = (cur - start) % len;
2687 if (ai32 == 0) goto no_change;
2688 len -= ai32;
2689 }
2690 /* FALL THROUGH */
a6ec74c1 2691 case 'x':
f337b084 2692 goto grow;
a6ec74c1
JH
2693 case 'A':
2694 case 'Z':
f337b084 2695 case 'a': {
f7fe979e 2696 const char *aptr;
f337b084 2697
a6ec74c1 2698 fromstr = NEXTFROM;
e62f0680 2699 aptr = SvPV_const(fromstr, fromlen);
f337b084 2700 if (DO_UTF8(fromstr)) {
f7fe979e 2701 const char *end, *s;
f337b084
TH
2702
2703 if (!utf8 && !SvUTF8(cat)) {
2704 marked_upgrade(aTHX_ cat, symptr);
2705 lookahead.flags |= FLAG_DO_UTF8;
2706 lookahead.strbeg = symptr->strbeg;
2707 utf8 = 1;
2708 start = SvPVX(cat);
2709 cur = start + SvCUR(cat);
2710 }
fc241834 2711 if (howlen == e_star) {
f337b084
TH
2712 if (utf8) goto string_copy;
2713 len = fromlen+1;
2714 }
2715 s = aptr;
2716 end = aptr + fromlen;
2717 fromlen = datumtype == 'Z' ? len-1 : len;
2718 while ((I32) fromlen > 0 && s < end) {
2719 s += UTF8SKIP(s);
2720 fromlen--;
2721 }
2722 if (s > end)
2723 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2724 if (utf8) {
fc241834 2725 len = fromlen;
f337b084
TH
2726 if (datumtype == 'Z') len++;
2727 fromlen = s-aptr;
2728 len += fromlen;
fc241834 2729
f337b084 2730 goto string_copy;
fc241834 2731 }
f337b084
TH
2732 fromlen = len - fromlen;
2733 if (datumtype == 'Z') fromlen--;
2734 if (howlen == e_star) {
2735 len = fromlen;
2736 if (datumtype == 'Z') len++;
fc241834 2737 }
f337b084 2738 GROWING(0, cat, start, cur, len);
fc241834 2739 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2740 datumtype | TYPE_IS_PACK))
2741 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2742 cur += fromlen;
a6ec74c1 2743 len -= fromlen;
f337b084
TH
2744 } else if (utf8) {
2745 if (howlen == e_star) {
2746 len = fromlen;
2747 if (datumtype == 'Z') len++;
a6ec74c1 2748 }
f337b084
TH
2749 if (len <= (I32) fromlen) {
2750 fromlen = len;
2751 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2752 }
fc241834 2753 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2754 upgrade, so:
2755 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2756 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2757 len -= fromlen;
2758 while (fromlen > 0) {
230e1fce 2759 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2760 aptr++;
2761 fromlen--;
fc241834 2762 }
f337b084
TH
2763 } else {
2764 string_copy:
2765 if (howlen == e_star) {
2766 len = fromlen;
2767 if (datumtype == 'Z') len++;
2768 }
2769 if (len <= (I32) fromlen) {
2770 fromlen = len;
2771 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2772 }
f337b084
TH
2773 GROWING(0, cat, start, cur, len);
2774 Copy(aptr, cur, fromlen, char);
2775 cur += fromlen;
2776 len -= fromlen;
a6ec74c1 2777 }
f337b084
TH
2778 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2779 cur += len;
a6ec74c1 2780 break;
f337b084 2781 }
a6ec74c1 2782 case 'B':
f337b084 2783 case 'b': {
b83604b4 2784 const char *str, *end;
f337b084
TH
2785 I32 l, field_len;
2786 U8 bits;
2787 bool utf8_source;
2788 U32 utf8_flags;
a6ec74c1 2789
fc241834 2790 fromstr = NEXTFROM;
b83604b4 2791 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2792 end = str + fromlen;
2793 if (DO_UTF8(fromstr)) {
2794 utf8_source = TRUE;
041457d9 2795 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2796 } else {
2797 utf8_source = FALSE;
2798 utf8_flags = 0; /* Unused, but keep compilers happy */
2799 }
2800 if (howlen == e_star) len = fromlen;
2801 field_len = (len+7)/8;
2802 GROWING(utf8, cat, start, cur, field_len);
2803 if (len > (I32)fromlen) len = fromlen;
2804 bits = 0;
2805 l = 0;
2806 if (datumtype == 'B')
2807 while (l++ < len) {
2808 if (utf8_source) {
95b63a38 2809 UV val = 0;
f337b084
TH
2810 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2811 bits |= val & 1;
2812 } else bits |= *str++ & 1;
2813 if (l & 7) bits <<= 1;
fc241834 2814 else {
f337b084
TH
2815 PUSH_BYTE(utf8, cur, bits);
2816 bits = 0;
a6ec74c1
JH
2817 }
2818 }
f337b084
TH
2819 else
2820 /* datumtype == 'b' */
2821 while (l++ < len) {
2822 if (utf8_source) {
95b63a38 2823 UV val = 0;
f337b084
TH
2824 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2825 if (val & 1) bits |= 0x80;
2826 } else if (*str++ & 1)
2827 bits |= 0x80;
2828 if (l & 7) bits >>= 1;
fc241834 2829 else {
f337b084
TH
2830 PUSH_BYTE(utf8, cur, bits);
2831 bits = 0;
a6ec74c1
JH
2832 }
2833 }
f337b084
TH
2834 l--;
2835 if (l & 7) {
fc241834 2836 if (datumtype == 'B')
f337b084 2837 bits <<= 7 - (l & 7);
fc241834 2838 else
f337b084
TH
2839 bits >>= 7 - (l & 7);
2840 PUSH_BYTE(utf8, cur, bits);
2841 l += 7;
a6ec74c1 2842 }
f337b084
TH
2843 /* Determine how many chars are left in the requested field */
2844 l /= 8;
2845 if (howlen == e_star) field_len = 0;
2846 else field_len -= l;
2847 Zero(cur, field_len, char);
2848 cur += field_len;
a6ec74c1 2849 break;
f337b084 2850 }
a6ec74c1 2851 case 'H':
f337b084 2852 case 'h': {
10516c54 2853 const char *str, *end;
f337b084
TH
2854 I32 l, field_len;
2855 U8 bits;
2856 bool utf8_source;
2857 U32 utf8_flags;
a6ec74c1 2858
fc241834 2859 fromstr = NEXTFROM;
10516c54 2860 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2861 end = str + fromlen;
2862 if (DO_UTF8(fromstr)) {
2863 utf8_source = TRUE;
041457d9 2864 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2865 } else {
2866 utf8_source = FALSE;
2867 utf8_flags = 0; /* Unused, but keep compilers happy */
2868 }
2869 if (howlen == e_star) len = fromlen;
2870 field_len = (len+1)/2;
2871 GROWING(utf8, cat, start, cur, field_len);
2872 if (!utf8 && len > (I32)fromlen) len = fromlen;
2873 bits = 0;
2874 l = 0;
2875 if (datumtype == 'H')
2876 while (l++ < len) {
2877 if (utf8_source) {
95b63a38 2878 UV val = 0;
f337b084
TH
2879 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2880 if (val < 256 && isALPHA(val))
2881 bits |= (val + 9) & 0xf;
a6ec74c1 2882 else
f337b084
TH
2883 bits |= val & 0xf;
2884 } else if (isALPHA(*str))
2885 bits |= (*str++ + 9) & 0xf;
2886 else
2887 bits |= *str++ & 0xf;
2888 if (l & 1) bits <<= 4;
fc241834 2889 else {
f337b084
TH
2890 PUSH_BYTE(utf8, cur, bits);
2891 bits = 0;
a6ec74c1
JH
2892 }
2893 }
f337b084
TH
2894 else
2895 while (l++ < len) {
2896 if (utf8_source) {
95b63a38 2897 UV val = 0;
f337b084
TH
2898 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2899 if (val < 256 && isALPHA(val))
2900 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2901 else
f337b084
TH
2902 bits |= (val & 0xf) << 4;
2903 } else if (isALPHA(*str))
2904 bits |= ((*str++ + 9) & 0xf) << 4;
2905 else
2906 bits |= (*str++ & 0xf) << 4;
2907 if (l & 1) bits >>= 4;
fc241834 2908 else {
f337b084
TH
2909 PUSH_BYTE(utf8, cur, bits);
2910 bits = 0;
a6ec74c1 2911 }
fc241834 2912 }
f337b084
TH
2913 l--;
2914 if (l & 1) {
2915 PUSH_BYTE(utf8, cur, bits);
2916 l++;
2917 }
2918 /* Determine how many chars are left in the requested field */
2919 l /= 2;
2920 if (howlen == e_star) field_len = 0;
2921 else field_len -= l;
2922 Zero(cur, field_len, char);
2923 cur += field_len;
2924 break;
fc241834
RGS
2925 }
2926 case 'c':
f337b084
TH
2927 while (len-- > 0) {
2928 IV aiv;
2929 fromstr = NEXTFROM;
2930 aiv = SvIV(fromstr);
2931 if ((-128 > aiv || aiv > 127) &&
2932 ckWARN(WARN_PACK))
2933 Perl_warner(aTHX_ packWARN(WARN_PACK),
2934 "Character in 'c' format wrapped in pack");
585ec06d 2935 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2936 }
2937 break;
2938 case 'C':
f337b084
TH
2939 if (len == 0) {
2940 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2941 break;
2942 }
a6ec74c1 2943 while (len-- > 0) {
f337b084 2944 IV aiv;
a6ec74c1 2945 fromstr = NEXTFROM;
f337b084
TH
2946 aiv = SvIV(fromstr);
2947 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2948 ckWARN(WARN_PACK))
2949 Perl_warner(aTHX_ packWARN(WARN_PACK),
2950 "Character in 'C' format wrapped in pack");
1651fc44 2951 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2952 }
fc241834
RGS
2953 break;
2954 case 'W': {
2955 char *end;
670f1322 2956 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2957
2958 end = start+SvLEN(cat)-1;
2959 if (utf8) end -= UTF8_MAXLEN-1;
2960 while (len-- > 0) {
2961 UV auv;
2962 fromstr = NEXTFROM;
2963 auv = SvUV(fromstr);
2964 if (in_bytes) auv = auv % 0x100;
2965 if (utf8) {
2966 W_utf8:
2967 if (cur > end) {
2968 *cur = '\0';
b162af07 2969 SvCUR_set(cat, cur - start);
fc241834
RGS
2970
2971 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2972 end = start+SvLEN(cat)-UTF8_MAXLEN;
2973 }
230e1fce
NC
2974 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2975 NATIVE_TO_UNI(auv),
041457d9 2976 warn_utf8 ?
230e1fce 2977 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2978 } else {
2979 if (auv >= 0x100) {
2980 if (!SvUTF8(cat)) {
2981 *cur = '\0';
b162af07 2982 SvCUR_set(cat, cur - start);
fc241834
RGS
2983 marked_upgrade(aTHX_ cat, symptr);
2984 lookahead.flags |= FLAG_DO_UTF8;
2985 lookahead.strbeg = symptr->strbeg;
2986 utf8 = 1;
2987 start = SvPVX(cat);
2988 cur = start + SvCUR(cat);
2989 end = start+SvLEN(cat)-UTF8_MAXLEN;
2990 goto W_utf8;
2991 }
2992 if (ckWARN(WARN_PACK))
2993 Perl_warner(aTHX_ packWARN(WARN_PACK),
2994 "Character in 'W' format wrapped in pack");
2995 auv &= 0xff;
2996 }
2997 if (cur >= end) {
2998 *cur = '\0';
b162af07 2999 SvCUR_set(cat, cur - start);
fc241834
RGS
3000 GROWING(0, cat, start, cur, len+1);
3001 end = start+SvLEN(cat)-1;
3002 }
fe2774ed 3003 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3004 }
3005 }
3006 break;
fc241834
RGS
3007 }
3008 case 'U': {
3009 char *end;
3010
3011 if (len == 0) {
3012 if (!(symptr->flags & FLAG_DO_UTF8)) {
3013 marked_upgrade(aTHX_ cat, symptr);
3014 lookahead.flags |= FLAG_DO_UTF8;
3015 lookahead.strbeg = symptr->strbeg;
3016 }
3017 utf8 = 0;
3018 goto no_change;
3019 }
3020
3021 end = start+SvLEN(cat);
3022 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3023 while (len-- > 0) {
fc241834 3024 UV auv;
a6ec74c1 3025 fromstr = NEXTFROM;
fc241834
RGS
3026 auv = SvUV(fromstr);
3027 if (utf8) {
230e1fce 3028 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3029 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3030 warn_utf8 ?
fc241834
RGS
3031 0 : UNICODE_ALLOW_ANY);
3032 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3033 *cur = '\0';
b162af07 3034 SvCUR_set(cat, cur - start);
fc241834
RGS
3035 GROWING(0, cat, start, cur,
3036 len+(endb-buffer)*UTF8_EXPAND);
3037 end = start+SvLEN(cat);
3038 }
64844641 3039 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3040 } else {
3041 if (cur >= end) {
3042 *cur = '\0';
b162af07 3043 SvCUR_set(cat, cur - start);
fc241834
RGS
3044 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3045 end = start+SvLEN(cat)-UTF8_MAXLEN;
3046 }
230e1fce 3047 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3048 warn_utf8 ?
230e1fce 3049 0 : UNICODE_ALLOW_ANY);
fc241834 3050 }
a6ec74c1 3051 }
a6ec74c1 3052 break;
fc241834 3053 }
a6ec74c1
JH
3054 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3055 case 'f':
a6ec74c1 3056 while (len-- > 0) {
f337b084
TH
3057 float afloat;
3058 NV anv;
a6ec74c1 3059 fromstr = NEXTFROM;
f337b084 3060 anv = SvNV(fromstr);
5cdb9e01 3061#ifdef __VOS__
f337b084 3062 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3063 during conversion from double to float into infinity, so we
3064 do it by hand. This code should either be generalized for
3065 any OS that needs it, or removed if and when VOS implements
3066 posix-976 (suggestion to support mapping to infinity).
3067 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3068{
3069extern const float _float_constants[];
f337b084 3070 if (anv > FLT_MAX)
fc241834 3071 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3072 else if (anv < -FLT_MAX)
fc241834 3073 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3074 else afloat = (float) anv;
3722f0dc 3075}
f337b084 3076#else /* __VOS__ */
baf3cf9c 3077# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3078 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3079 * on Alpha; fake it if we don't have them.
3080 */
f337b084 3081 if (anv > FLT_MAX)
fc241834 3082 afloat = FLT_MAX;
f337b084 3083 else if (anv < -FLT_MAX)
fc241834 3084 afloat = -FLT_MAX;
f337b084 3085 else afloat = (float)anv;
baf3cf9c 3086# else
f337b084 3087 afloat = (float)anv;
baf3cf9c 3088# endif
f337b084 3089#endif /* __VOS__ */
1109a392 3090 DO_BO_PACK_N(afloat, float);
f337b084 3091 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3092 }
3093 break;
3094 case 'd':
a6ec74c1 3095 while (len-- > 0) {
f337b084
TH
3096 double adouble;
3097 NV anv;
a6ec74c1 3098 fromstr = NEXTFROM;
f337b084 3099 anv = SvNV(fromstr);
5cdb9e01 3100#ifdef __VOS__
f337b084 3101 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3102 during conversion from long double to double into infinity,
3103 so we do it by hand. This code should either be generalized
3104 for any OS that needs it, or removed if and when VOS
3105 implements posix-976 (suggestion to support mapping to
3106 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3107{
3108extern const double _double_constants[];
f337b084 3109 if (anv > DBL_MAX)
fc241834 3110 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3111 else if (anv < -DBL_MAX)
fc241834 3112 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3113 else adouble = (double) anv;
3722f0dc 3114}
f337b084 3115#else /* __VOS__ */
baf3cf9c 3116# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3117 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3118 * on Alpha; fake it if we don't have them.
3119 */
f337b084 3120 if (anv > DBL_MAX)
fc241834 3121 adouble = DBL_MAX;
f337b084 3122 else if (anv < -DBL_MAX)
fc241834 3123 adouble = -DBL_MAX;
f337b084 3124 else adouble = (double)anv;
baf3cf9c 3125# else
f337b084 3126 adouble = (double)anv;
baf3cf9c 3127# endif
f337b084 3128#endif /* __VOS__ */
1109a392 3129 DO_BO_PACK_N(adouble, double);
f337b084 3130 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3131 }
3132 break;
fc241834
RGS
3133 case 'F': {
3134 NV anv;
1109a392 3135 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3136 while (len-- > 0) {
3137 fromstr = NEXTFROM;
3138 anv = SvNV(fromstr);
1109a392 3139 DO_BO_PACK_N(anv, NV);
fc241834 3140 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3141 }
3142 break;
fc241834 3143 }
92d41999 3144#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3145 case 'D': {
3146 long double aldouble;
1109a392
MHM
3147 /* long doubles can have unused bits, which may be nonzero */
3148 Zero(&aldouble, 1, long double);
92d41999
JH
3149 while (len-- > 0) {
3150 fromstr = NEXTFROM;
3151 aldouble = (long double)SvNV(fromstr);
1109a392 3152 DO_BO_PACK_N(aldouble, long double);
fc241834 3153 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3154 }
3155 break;
fc241834 3156 }
92d41999 3157#endif
7212898e 3158#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3159 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3160#endif
a6ec74c1
JH
3161 case 'n':
3162 while (len-- > 0) {
f337b084 3163 I16 ai16;
a6ec74c1 3164 fromstr = NEXTFROM;
ef108786 3165 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3166#ifdef HAS_HTONS
ef108786 3167 ai16 = PerlSock_htons(ai16);
a6ec74c1 3168#endif
f337b084 3169 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3170 }
3171 break;
7212898e 3172#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3173 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3174#endif
a6ec74c1
JH
3175 case 'v':
3176 while (len-- > 0) {
f337b084 3177 I16 ai16;
a6ec74c1 3178 fromstr = NEXTFROM;
ef108786 3179 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3180#ifdef HAS_HTOVS
ef108786 3181 ai16 = htovs(ai16);
a6ec74c1 3182#endif
f337b084 3183 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3184 }
3185 break;
49704364 3186 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3187#if SHORTSIZE != SIZE16
fc241834 3188 while (len-- > 0) {
f337b084 3189 unsigned short aushort;
fc241834
RGS
3190 fromstr = NEXTFROM;
3191 aushort = SvUV(fromstr);
3192 DO_BO_PACK(aushort, s);
f337b084 3193 PUSH_VAR(utf8, cur, aushort);
fc241834 3194 }
49704364
LW
3195 break;
3196#else
3197 /* Fall through! */
a6ec74c1 3198#endif
49704364 3199 case 'S':
fc241834 3200 while (len-- > 0) {
f337b084 3201 U16 au16;
fc241834
RGS
3202 fromstr = NEXTFROM;
3203 au16 = (U16)SvUV(fromstr);
3204 DO_BO_PACK(au16, 16);
f337b084 3205 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3206 }
3207 break;
49704364 3208 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3209#if SHORTSIZE != SIZE16
fc241834 3210 while (len-- > 0) {
f337b084 3211 short ashort;
fc241834
RGS
3212 fromstr = NEXTFROM;
3213 ashort = SvIV(fromstr);
3214 DO_BO_PACK(ashort, s);
f337b084 3215 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3216 }
49704364
LW
3217 break;
3218#else
3219 /* Fall through! */
a6ec74c1 3220#endif
49704364
LW
3221 case 's':
3222 while (len-- > 0) {
f337b084 3223 I16 ai16;
49704364 3224 fromstr = NEXTFROM;
ef108786
MHM
3225 ai16 = (I16)SvIV(fromstr);
3226 DO_BO_PACK(ai16, 16);
f337b084 3227 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3228 }
3229 break;
3230 case 'I':
49704364 3231 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3232 while (len-- > 0) {
f337b084 3233 unsigned int auint;
a6ec74c1
JH
3234 fromstr = NEXTFROM;
3235 auint = SvUV(fromstr);
1109a392 3236 DO_BO_PACK(auint, i);
f337b084 3237 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3238 }
3239 break;
92d41999
JH
3240 case 'j':
3241 while (len-- > 0) {
f337b084 3242 IV aiv;
92d41999
JH
3243 fromstr = NEXTFROM;
3244 aiv = SvIV(fromstr);
1109a392
MHM
3245#if IVSIZE == INTSIZE
3246 DO_BO_PACK(aiv, i);
3247#elif IVSIZE == LONGSIZE
3248 DO_BO_PACK(aiv, l);
3249#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3250 DO_BO_PACK(aiv, 64);
f337b084
TH
3251#else
3252 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3253#endif
f337b084 3254 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3255 }
3256 break;
3257 case 'J':
3258 while (len-- > 0) {
f337b084 3259 UV auv;
92d41999
JH
3260 fromstr = NEXTFROM;
3261 auv = SvUV(fromstr);
1109a392
MHM
3262#if UVSIZE == INTSIZE
3263 DO_BO_PACK(auv, i);
3264#elif UVSIZE == LONGSIZE
3265 DO_BO_PACK(auv, l);
3266#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3267 DO_BO_PACK(auv, 64);
f337b084
TH
3268#else
3269 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3270#endif
f337b084 3271 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3272 }
3273 break;
a6ec74c1
JH
3274 case 'w':
3275 while (len-- > 0) {
f337b084 3276 NV anv;
a6ec74c1 3277 fromstr = NEXTFROM;
15e9f109 3278 anv = SvNV(fromstr);
a6ec74c1 3279
f337b084
TH
3280 if (anv < 0) {
3281 *cur = '\0';
b162af07 3282 SvCUR_set(cat, cur - start);
49704364 3283 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3284 }
a6ec74c1 3285
196b62db
NC
3286 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3287 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3288 any negative IVs will have already been got by the croak()
3289 above. IOK is untrue for fractions, so we test them
3290 against UV_MAX_P1. */
f337b084
TH
3291 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3292 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3293 char *in = buf + sizeof(buf);
196b62db 3294 UV auv = SvUV(fromstr);
a6ec74c1
JH
3295
3296 do {
eb160463 3297 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3298 auv >>= 7;
3299 } while (auv);
3300 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3301 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3302 in, (buf + sizeof(buf)) - in);
3303 } else if (SvPOKp(fromstr))
3304 goto w_string;
a6ec74c1 3305 else if (SvNOKp(fromstr)) {
0258719b
NC
3306 /* 10**NV_MAX_10_EXP is the largest power of 10
3307 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3308 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3309 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3310 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3311 Some C compilers are strict about integral constant
3312 expressions so we conservatively divide by a slightly
3313 smaller integer instead of multiplying by the exact
3314 floating-point value.
0258719b
NC
3315 */
3316#ifdef NV_MAX_10_EXP
f337b084 3317 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3318 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3319#else
f337b084 3320 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3321 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3322#endif
a6ec74c1
JH
3323 char *in = buf + sizeof(buf);
3324
8b6e33c7 3325 anv = Perl_floor(anv);
a6ec74c1 3326 do {
8b6e33c7 3327 const NV next = Perl_floor(anv / 128);
a6ec74c1 3328 if (in <= buf) /* this cannot happen ;-) */
49704364 3329 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3330 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3331 anv = next;
3332 } while (anv > 0);
a6ec74c1 3333 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3334 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3335 in, (buf + sizeof(buf)) - in);
3336 } else {
8b6e33c7
AL
3337 const char *from;
3338 char *result, *in;
735b914b
JH
3339 SV *norm;
3340 STRLEN len;
3341 bool done;
3342
f337b084 3343 w_string:
735b914b 3344 /* Copy string and check for compliance */
349d4f2f 3345 from = SvPV_const(fromstr, len);
735b914b 3346 if ((norm = is_an_int(from, len)) == NULL)
49704364 3347 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3348
a02a5408 3349 Newx(result, len, char);
735b914b
JH
3350 in = result + len;
3351 done = FALSE;
f337b084 3352 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3353 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3354 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3355 in, (result + len) - in);
735b914b
JH
3356 Safefree(result);
3357 SvREFCNT_dec(norm); /* free norm */
fc241834 3358 }
a6ec74c1
JH
3359 }
3360 break;
3361 case 'i':
49704364 3362 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3363 while (len-- > 0) {
f337b084 3364 int aint;
a6ec74c1
JH
3365 fromstr = NEXTFROM;
3366 aint = SvIV(fromstr);
1109a392 3367 DO_BO_PACK(aint, i);
f337b084 3368 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3369 }
3370 break;
7212898e 3371#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3372 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3373#endif
a6ec74c1
JH
3374 case 'N':
3375 while (len-- > 0) {
f337b084 3376 U32 au32;
a6ec74c1 3377 fromstr = NEXTFROM;
ef108786 3378 au32 = SvUV(fromstr);
a6ec74c1 3379#ifdef HAS_HTONL
ef108786 3380 au32 = PerlSock_htonl(au32);
a6ec74c1 3381#endif
f337b084 3382 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3383 }
3384 break;
7212898e 3385#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3386 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3387#endif
a6ec74c1
JH
3388 case 'V':
3389 while (len-- > 0) {
f337b084 3390 U32 au32;
a6ec74c1 3391 fromstr = NEXTFROM;
ef108786 3392 au32 = SvUV(fromstr);
a6ec74c1 3393#ifdef HAS_HTOVL
ef108786 3394 au32 = htovl(au32);
a6ec74c1 3395#endif
f337b084 3396 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3397 }
3398 break;
49704364 3399 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3400#if LONGSIZE != SIZE32
fc241834 3401 while (len-- > 0) {
f337b084 3402 unsigned long aulong;
fc241834
RGS
3403 fromstr = NEXTFROM;
3404 aulong = SvUV(fromstr);
3405 DO_BO_PACK(aulong, l);
f337b084 3406 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3407 }
49704364
LW
3408 break;
3409#else
3410 /* Fall though! */
a6ec74c1 3411#endif
49704364 3412 case 'L':
fc241834 3413 while (len-- > 0) {
f337b084 3414 U32 au32;
fc241834
RGS
3415 fromstr = NEXTFROM;
3416 au32 = SvUV(fromstr);
3417 DO_BO_PACK(au32, 32);
f337b084 3418 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3419 }
3420 break;
49704364 3421 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3422#if LONGSIZE != SIZE32
fc241834 3423 while (len-- > 0) {
f337b084 3424 long along;
fc241834
RGS
3425 fromstr = NEXTFROM;
3426 along = SvIV(fromstr);
3427 DO_BO_PACK(along, l);
f337b084 3428 PUSH_VAR(utf8, cur, along);
a6ec74c1 3429 }
49704364
LW
3430 break;
3431#else
3432 /* Fall though! */
a6ec74c1 3433#endif
49704364
LW
3434 case 'l':
3435 while (len-- > 0) {
f337b084 3436 I32 ai32;
49704364 3437 fromstr = NEXTFROM;
ef108786
MHM
3438 ai32 = SvIV(fromstr);
3439 DO_BO_PACK(ai32, 32);
f337b084 3440 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3441 }
3442 break;
3443#ifdef HAS_QUAD
3444 case 'Q':
3445 while (len-- > 0) {
f337b084 3446 Uquad_t auquad;
a6ec74c1 3447 fromstr = NEXTFROM;
f337b084 3448 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3449 DO_BO_PACK(auquad, 64);
f337b084 3450 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3451 }
3452 break;
3453 case 'q':
3454 while (len-- > 0) {
f337b084 3455 Quad_t aquad;
a6ec74c1
JH
3456 fromstr = NEXTFROM;
3457 aquad = (Quad_t)SvIV(fromstr);
1109a392 3458 DO_BO_PACK(aquad, 64);
f337b084 3459 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3460 }
3461 break;
f337b084 3462#endif /* HAS_QUAD */
a6ec74c1
JH
3463 case 'P':
3464 len = 1; /* assume SV is correct length */
f337b084 3465 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3466 /* Fall through! */
a6ec74c1
JH
3467 case 'p':
3468 while (len-- > 0) {
83003860 3469 const char *aptr;
f337b084 3470
a6ec74c1 3471 fromstr = NEXTFROM;
28a4f200 3472 SvGETMAGIC(fromstr);
3473 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3474 else {
a6ec74c1
JH
3475 /* XXX better yet, could spirit away the string to
3476 * a safe spot and hang on to it until the result
3477 * of pack() (and all copies of the result) are
3478 * gone.
3479 */
041457d9
DM
3480 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3481 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3482 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3483 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3484 }
3485 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3486 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3487 else
2596d9fe 3488 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3489 }
07409e01 3490 DO_BO_PACK_PC(aptr);
f337b084 3491 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3492 }
3493 break;
fc241834 3494 case 'u': {
f7fe979e 3495 const char *aptr, *aend;
fc241834 3496 bool from_utf8;
f337b084 3497
a6ec74c1 3498 fromstr = NEXTFROM;
fc241834
RGS
3499 if (len <= 2) len = 45;
3500 else len = len / 3 * 3;
3501 if (len >= 64) {
0fdc08b6
RGS
3502 if (ckWARN(WARN_PACK))
3503 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS