This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Regression test for the fix in change #25308,
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
66c611c5 243
f337b084 244#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
245
246# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
247# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
248
249# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
250
251# define DO_BO_UNPACK(var, type) \
1109a392 252 STMT_START { \
66c611c5 253 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
254 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
255 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
256 default: break; \
257 } \
258 } STMT_END
259
7212898e 260# define DO_BO_PACK(var, type) \
1109a392 261 STMT_START { \
66c611c5 262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
265 default: break; \
266 } \
267 } STMT_END
268
07409e01 269# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 270 STMT_START { \
66c611c5 271 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 272 case TYPE_IS_BIG_ENDIAN: \
07409e01 273 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
274 break; \
275 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 276 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
277 break; \
278 default: \
279 break; \
280 } \
281 } STMT_END
282
07409e01 283# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 284 STMT_START { \
66c611c5 285 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 286 case TYPE_IS_BIG_ENDIAN: \
07409e01 287 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
288 break; \
289 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 290 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
291 break; \
292 default: \
293 break; \
294 } \
295 } STMT_END
296
7212898e 297# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
298 STMT_START { \
299 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
300 case TYPE_IS_BIG_ENDIAN: \
301 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
302 "platform", #action, #type); \
303 break; \
304 case TYPE_IS_LITTLE_ENDIAN: \
305 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
306 "platform", #action, #type); \
307 break; \
308 default: \
309 break; \
310 } \
311 } STMT_END
312
7212898e 313# if PTRSIZE == INTSIZE
07409e01
NC
314# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
315# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
316# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
317# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 318# elif PTRSIZE == LONGSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
7212898e
NC
323# else
324# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
325# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
326# endif
1109a392 327
7212898e 328# if defined(my_htolen) && defined(my_letohn) && \
1109a392 329 defined(my_htoben) && defined(my_betohn)
7212898e 330# define DO_BO_UNPACK_N(var, type) \
1109a392 331 STMT_START { \
66c611c5 332 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
333 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
334 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
335 default: break; \
336 } \
337 } STMT_END
338
7212898e 339# define DO_BO_PACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
7212898e
NC
347# else
348# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
349# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
350# endif
351
f337b084 352#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 353
78d46eaa 354#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 355#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
356#define PACK_SIZE_MASK 0x3F
357
78d46eaa
NC
358/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
359 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
360
361#if TYPE_IS_SHRIEKING != 0x100
362 ++++shriek offset should be 256
363#endif
364
365typedef U8 packprops_t;
78d46eaa
NC
366#if 'J'-'I' == 1
367/* ASCII */
a7a3cfaa
TH
368const packprops_t packprops[512] = {
369 /* normal */
370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
374 0, 0, 0,
375 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 376#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 377 /* D */ LONG_DOUBLESIZE,
80a13697 378#else
a7a3cfaa 379 0,
80a13697 380#endif
a7a3cfaa
TH
381 0,
382 /* F */ NVSIZE,
383 0, 0,
384 /* I */ sizeof(unsigned int),
385 /* J */ UVSIZE,
386 0,
387 /* L */ SIZE32,
388 0,
389 /* N */ SIZE32,
390 0, 0,
80a13697 391#if defined(HAS_QUAD)
a7a3cfaa 392 /* Q */ sizeof(Uquad_t),
80a13697 393#else
a7a3cfaa 394 0,
80a13697 395#endif
a7a3cfaa
TH
396 0,
397 /* S */ SIZE16,
398 0,
399 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
400 /* V */ SIZE32,
401 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* c */ sizeof(char),
404 /* d */ sizeof(double),
405 0,
406 /* f */ sizeof(float),
407 0, 0,
408 /* i */ sizeof(int),
409 /* j */ IVSIZE,
410 0,
411 /* l */ SIZE32,
412 0,
413 /* n */ SIZE16,
414 0,
415 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 416#if defined(HAS_QUAD)
a7a3cfaa 417 /* q */ sizeof(Quad_t),
80a13697 418#else
a7a3cfaa 419 0,
80a13697 420#endif
a7a3cfaa
TH
421 0,
422 /* s */ SIZE16,
423 0, 0,
424 /* v */ SIZE16,
425 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
435 /* shrieking */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 /* I */ sizeof(unsigned int),
442 0, 0,
443 /* L */ sizeof(unsigned long),
444 0,
7212898e 445#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 446 /* N */ SIZE32,
7212898e 447#else
a7a3cfaa 448 0,
7212898e 449#endif
a7a3cfaa
TH
450 0, 0, 0, 0,
451 /* S */ sizeof(unsigned short),
452 0, 0,
7212898e 453#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 454 /* V */ SIZE32,
7212898e 455#else
a7a3cfaa 456 0,
7212898e 457#endif
a7a3cfaa
TH
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0,
460 /* i */ sizeof(int),
461 0, 0,
462 /* l */ sizeof(long),
463 0,
7212898e 464#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 465 /* n */ SIZE16,
7212898e 466#else
a7a3cfaa 467 0,
7212898e 468#endif
a7a3cfaa
TH
469 0, 0, 0, 0,
470 /* s */ sizeof(short),
471 0, 0,
7212898e 472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 473 /* v */ SIZE16,
7212898e 474#else
a7a3cfaa 475 0,
7212898e 476#endif
a7a3cfaa
TH
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
486};
487#else
488/* EBCDIC (or bust) */
a7a3cfaa
TH
489const packprops_t packprops[512] = {
490 /* normal */
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0,
500 /* c */ sizeof(char),
501 /* d */ sizeof(double),
502 0,
503 /* f */ sizeof(float),
504 0, 0,
505 /* i */ sizeof(int),
506 0, 0, 0, 0, 0, 0, 0,
507 /* j */ IVSIZE,
508 0,
509 /* l */ SIZE32,
510 0,
511 /* n */ SIZE16,
512 0,
513 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 514#if defined(HAS_QUAD)
a7a3cfaa 515 /* q */ sizeof(Quad_t),
80a13697 516#else
a7a3cfaa 517 0,
80a13697 518#endif
a7a3cfaa
TH
519 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 /* s */ SIZE16,
521 0, 0,
522 /* v */ SIZE16,
523 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 527#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 528 /* D */ LONG_DOUBLESIZE,
80a13697 529#else
a7a3cfaa 530 0,
80a13697 531#endif
a7a3cfaa
TH
532 0,
533 /* F */ NVSIZE,
534 0, 0,
535 /* I */ sizeof(unsigned int),
536 0, 0, 0, 0, 0, 0, 0,
537 /* J */ UVSIZE,
538 0,
539 /* L */ SIZE32,
540 0,
541 /* N */ SIZE32,
542 0, 0,
80a13697 543#if defined(HAS_QUAD)
a7a3cfaa 544 /* Q */ sizeof(Uquad_t),
80a13697 545#else
a7a3cfaa 546 0,
80a13697 547#endif
a7a3cfaa
TH
548 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 /* S */ SIZE16,
550 0,
551 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
552 /* V */ SIZE32,
553 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, 0,
556 /* shrieking */
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 /* i */ sizeof(int),
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* l */ sizeof(long),
569 0,
7212898e 570#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 571 /* n */ SIZE16,
7212898e 572#else
a7a3cfaa 573 0,
7212898e 574#endif
a7a3cfaa
TH
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* s */ sizeof(short),
577 0, 0,
7212898e 578#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 579 /* v */ SIZE16,
7212898e 580#else
a7a3cfaa 581 0,
7212898e 582#endif
a7a3cfaa
TH
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0,
586 /* I */ sizeof(unsigned int),
587 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* L */ sizeof(unsigned long),
589 0,
7212898e 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 591 /* N */ SIZE32,
7212898e 592#else
a7a3cfaa 593 0,
7212898e 594#endif
a7a3cfaa
TH
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* S */ sizeof(unsigned short),
597 0, 0,
7212898e 598#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 599 /* V */ SIZE32,
7212898e 600#else
a7a3cfaa 601 0,
7212898e 602#endif
a7a3cfaa
TH
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
605};
606#endif
607
08ca2aa3 608STATIC U8
f7fe979e 609uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3
TH
610{
611 UV val;
612 STRLEN retlen;
f337b084
TH
613 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
615 /* We try to process malformed UTF-8 as much as possible (preferrably with
616 warnings), but these two mean we make no progress in the string and
617 might enter an infinite loop */
618 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
619 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
620 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 621 if (val >= 0x100) {
f337b084 622 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Character in '%c' format wrapped in unpack",
f337b084 625 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
626 val &= 0xff;
627 }
628 *s += retlen;
fe2774ed 629 return (U8)val;
08ca2aa3
TH
630}
631
f337b084
TH
632#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
633 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
634 *(U8 *)(s)++)
635
636STATIC bool
f7fe979e 637uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
638{
639 UV val;
640 STRLEN retlen;
f7fe979e 641 const char *from = *s;
08ca2aa3 642 int bad = 0;
f7fe979e 643 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
644 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
645 for (;buf_len > 0; buf_len--) {
646 if (from >= end) return FALSE;
f337b084 647 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
648 if (retlen == (STRLEN) -1 || retlen == 0) {
649 from += UTF8SKIP(from);
650 bad |= 1;
651 } else from += retlen;
652 if (val >= 0x100) {
653 bad |= 2;
654 val &= 0xff;
655 }
fe2774ed 656 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
657 }
658 /* We have enough characters for the buffer. Did we have problems ? */
659 if (bad) {
660 if (bad & 1) {
661 /* Rewalk the string fragment while warning */
f7fe979e 662 const char *ptr;
9e27e96a 663 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
664 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
665 if (ptr >= end) break;
f337b084 666 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
667 }
668 if (from > end) from = end;
669 }
670 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 671 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
672 WARN_PACK : WARN_UNPACK),
673 "Character(s) in '%c' format wrapped in %s",
fc241834 674 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 675 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
676 }
677 *s = from;
678 return TRUE;
679}
680
681STATIC bool
f7fe979e 682next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3
TH
683{
684 UV val;
685 STRLEN retlen;
f337b084 686 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
687 if (val >= 0x100 || !ISUUCHAR(val) ||
688 retlen == (STRLEN) -1 || retlen == 0) {
689 *out = 0;
690 return FALSE;
691 }
692 *out = PL_uudmap[val] & 077;
f337b084 693 *s += retlen;
08ca2aa3
TH
694 return TRUE;
695}
78d46eaa 696
f337b084 697STATIC void
f7fe979e 698bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 699 U8 buffer[UTF8_MAXLEN];
f7fe979e 700 const U8 *end = start + len;
f337b084
TH
701 char *d = *dest;
702 while (start < end) {
f7fe979e 703 const int length =
f337b084
TH
704 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
705 switch(length) {
706 case 1:
707 *d++ = buffer[0];
708 break;
709 case 2:
710 *d++ = buffer[0];
711 *d++ = buffer[1];
712 break;
713 default:
714 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
715 *start, length);
716 }
717 start++;
718 }
719 *dest = d;
720}
721
230e1fce
NC
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
724 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
725 else { \
726 Copy(buf, cur, len, char); \
727 (cur) += (len); \
728 } \
f337b084
TH
729} STMT_END
730
731#define GROWING(utf8, cat, start, cur, in_len) \
732STMT_START { \
733 STRLEN glen = (in_len); \
3473cf63 734 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(aTHX_ cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
738 } \
739} STMT_END
740
741#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742STMT_START { \
f7fe979e 743 const STRLEN glen = (in_len); \
f337b084 744 STRLEN gl = glen; \
3473cf63 745 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
747 *cur = '\0'; \
b162af07 748 SvCUR_set((cat), (cur) - (start)); \
f337b084
TH
749 (start) = sv_exp_grow(aTHX_ cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
751 } \
752 PUSH_BYTES(utf8, cur, buf, glen); \
753} STMT_END
754
755#define PUSH_BYTE(utf8, s, byte) \
756STMT_START { \
757 if (utf8) { \
f7fe979e 758 const U8 au8 = (byte); \
f337b084
TH
759 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
760 } else *(U8 *)(s)++ = (byte); \
761} STMT_END
762
763/* Only to be used inside a loop (see the break) */
764#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
765STMT_START { \
766 STRLEN retlen; \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
770 *cur = '\0'; \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
772 } \
773 str += retlen; \
774} STMT_END
775
f7fe979e
AL
776static const char *_action( const tempsym_t* symptr )
777{
778 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
779}
780
206947d2 781/* Returns the sizeof() struct described by pat */
028d1f6d 782STATIC I32
f337b084 783S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 784{
f337b084 785 I32 total = 0;
206947d2 786
49704364 787 while (next_symbol(symptr)) {
f337b084 788 I32 len;
f7fe979e 789 int size;
f337b084
TH
790
791 switch (symptr->howlen) {
fc241834 792 case e_star:
49704364 793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 794 _action( symptr ) );
49704364 795 break;
f337b084
TH
796 default:
797 /* e_no_len and e_number */
798 len = symptr->length;
799 break;
49704364
WL
800 }
801
a7a3cfaa 802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 803 if (!size) {
f7fe979e 804 int star;
80a13697
NC
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 default:
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 810 _action( symptr ) );
28be1210
TH
811#ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
814#endif
80a13697 815 case '@':
28be1210 816 case '.':
80a13697
NC
817 case '/':
818 case 'U': /* XXXX Is it correct? */
819 case 'w':
820 case 'u':
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 822 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 823 _action( symptr ) );
80a13697
NC
824 case '%':
825 size = 0;
826 break;
827 case '(':
fc241834
RGS
828 {
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
837 *symptr = savsym;
838 break;
839 }
80a13697
NC
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
842 */
843 if (!len) /* Avoid division by 0 */
844 len = 1;
845 len = total % len; /* Assumed: the start is aligned. */
846 /* FALL THROUGH */
847 case 'X':
848 size = -1;
849 if (total < len)
f7fe979e 850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
851 break;
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
854 len = 1;
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
857 len = len - star;
858 else
859 len = 0;
860 /* FALL THROUGH */
861 case 'x':
862 case 'A':
863 case 'Z':
864 case 'a':
80a13697
NC
865 size = 1;
866 break;
867 case 'B':
868 case 'b':
869 len = (len + 7)/8;
870 size = 1;
871 break;
872 case 'H':
873 case 'h':
874 len = (len + 1)/2;
875 size = 1;
876 break;
78d46eaa 877
80a13697
NC
878 case 'P':
879 len = 1;
880 size = sizeof(char*);
78d46eaa
NC
881 break;
882 }
206947d2
IZ
883 }
884 total += len * size;
885 }
886 return total;
887}
888
49704364
WL
889
890/* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
892 */
f7fe979e
AL
893STATIC const char *
894S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 895{
49704364 896 while (patptr < patend) {
f7fe979e 897 const char c = *patptr++;
49704364
WL
898
899 if (isSPACE(c))
900 continue;
901 else if (c == ender)
902 return patptr-1;
903 else if (c == '#') {
904 while (patptr < patend && *patptr != '\n')
905 patptr++;
906 continue;
907 } else if (c == '(')
908 patptr = group_end(patptr, patend, ')') + 1;
909 else if (c == '[')
910 patptr = group_end(patptr, patend, ']') + 1;
18529408 911 }
49704364
WL
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
913 ender);
914 return 0;
18529408
IZ
915}
916
49704364
WL
917
918/* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
fc241834 921 */
f7fe979e
AL
922STATIC const char *
923S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
924{
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
930 }
931 *lenptr = len;
932 return patptr;
933}
934
935/* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
937 */
938STATIC bool
f337b084 939S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 940{
f7fe979e
AL
941 const char* patptr = symptr->patptr;
942 const char* patend = symptr->patend;
49704364
WL
943
944 symptr->flags &= ~FLAG_SLASH;
945
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
948 patptr++;
949 else if (*patptr == '#') {
950 patptr++;
951 while (patptr < patend && *patptr != '\n')
952 patptr++;
953 if (patptr < patend)
954 patptr++;
955 } else {
fc241834 956 /* We should have found a template code */
49704364 957 I32 code = *patptr++ & 0xFF;
66c611c5 958 U32 inherited_modifiers = 0;
49704364
WL
959
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 964 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
965 }
966 continue;
967 }
fc241834 968
49704364 969 /* for '(', skip to ')' */
fc241834 970 if (code == '(') {
49704364
WL
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 973 _action( symptr ) );
49704364
WL
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 978 _action( symptr ) );
49704364
WL
979 }
980
66c611c5
MHM
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985 }
986
1109a392
MHM
987 /* look for modifiers */
988 while (patptr < patend) {
b7787f18
AL
989 const char *allowed;
990 I32 modifier;
1109a392
MHM
991 switch (*patptr) {
992 case '!':
993 modifier = TYPE_IS_SHRIEKING;
7212898e 994 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 995 break;
7212898e 996#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
997 case '>':
998 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 999 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1000 break;
1001 case '<':
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1004 break;
f337b084 1005#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1006 default:
b7787f18
AL
1007 allowed = "";
1008 modifier = 0;
1109a392
MHM
1009 break;
1010 }
66c611c5 1011
1109a392
MHM
1012 if (modifier == 0)
1013 break;
66c611c5 1014
1109a392
MHM
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1017 allowed, _action( symptr ) );
66c611c5
MHM
1018
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1025 *patptr, _action( symptr ) );
66c611c5 1026
041457d9 1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1031 _action( symptr ) );
1109a392 1032 }
66c611c5 1033
1109a392
MHM
1034 code |= modifier;
1035 patptr++;
49704364
WL
1036 }
1037
66c611c5
MHM
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1040
fc241834 1041 /* look for count and/or / */
49704364
WL
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1046
1047 } else if (*patptr == '*') {
1048 patptr++;
1049 symptr->howlen = e_star;
1050
1051 } else if (*patptr == '[') {
f7fe979e 1052 const char* lenptr = ++patptr;
49704364
WL
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1060 _action( symptr ) );
49704364
WL
1061 } else {
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1066 *symptr = savsym;
1067 }
1068 } else {
1069 symptr->howlen = e_no_len;
1070 symptr->length = 1;
1071 }
1072
1073 /* try to find / */
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1076 patptr++;
1077 else if (*patptr == '#') {
1078 patptr++;
1079 while (patptr < patend && *patptr != '\n')
1080 patptr++;
1081 if (patptr < patend)
1082 patptr++;
1083 } else {
66c611c5 1084 if (*patptr == '/') {
49704364
WL
1085 symptr->flags |= FLAG_SLASH;
1086 patptr++;
66c611c5
MHM
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1090 _action( symptr ) );
49704364
WL
1091 }
1092 break;
1093 }
18529408 1094 }
49704364
WL
1095 } else {
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1098 symptr->length = 1;
1099 }
1100
1101 symptr->code = code;
fc241834 1102 symptr->patptr = patptr;
49704364 1103 return TRUE;
18529408 1104 }
49704364 1105 }
fc241834 1106 symptr->patptr = patptr;
49704364 1107 return FALSE;
18529408
IZ
1108}
1109
18529408 1110/*
fc241834 1111 There is no way to cleanly handle the case where we should process the
08ca2aa3 1112 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1117 themselves if they need to do a lot of unpacks like this on it
1118*/
fc241834 1119STATIC bool
08ca2aa3
TH
1120need_utf8(const char *pat, const char *patend)
1121{
1122 bool first = TRUE;
1123 while (pat < patend) {
1124 if (pat[0] == '#') {
1125 pat++;
f7fe979e 1126 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1127 if (!pat) return FALSE;
1128 } else if (pat[0] == 'U') {
1129 if (first || pat[1] == '0') return TRUE;
1130 } else first = FALSE;
1131 pat++;
1132 }
1133 return FALSE;
1134}
1135
1136STATIC char
1137first_symbol(const char *pat, const char *patend) {
1138 while (pat < patend) {
1139 if (pat[0] != '#') return pat[0];
1140 pat++;
f7fe979e 1141 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1142 if (!pat) return 0;
1143 pat++;
1144 }
1145 return 0;
1146}
1147
1148/*
18529408
IZ
1149=for apidoc unpack_str
1150
7accc089
JH
1151The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1152and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1153
1154=cut */
1155
1156I32
f7fe979e 1157Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1158{
f7fe979e 1159 tempsym_t sym;
8772537c
AL
1160 PERL_UNUSED_ARG(strbeg);
1161 PERL_UNUSED_ARG(new_s);
1162 PERL_UNUSED_ARG(ocnt);
08ca2aa3 1163
f337b084 1164 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1165 else if (need_utf8(pat, patend)) {
1166 /* We probably should try to avoid this in case a scalar context call
1167 wouldn't get to the "U0" */
1168 STRLEN len = strend - s;
230e1fce 1169 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1170 SAVEFREEPV(s);
1171 strend = s + len;
f337b084 1172 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1173 }
1174
f337b084
TH
1175 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1176 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1177
f7fe979e 1178 TEMPSYM_INIT(&sym, pat, patend, flags);
49704364
WL
1179
1180 return unpack_rec(&sym, s, s, strend, NULL );
1181}
1182
7accc089
JH
1183/*
1184=for apidoc unpackstring
1185
608d3aed
WL
1186The engine implementing unpack() Perl function. C<unpackstring> puts the
1187extracted list items on the stack and returns the number of elements.
1188Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1189
1190=cut */
1191
1192I32
f7fe979e 1193Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1194{
f7fe979e 1195 tempsym_t sym;
08ca2aa3 1196
f337b084 1197 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1198 else if (need_utf8(pat, patend)) {
1199 /* We probably should try to avoid this in case a scalar context call
1200 wouldn't get to the "U0" */
1201 STRLEN len = strend - s;
230e1fce 1202 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1203 SAVEFREEPV(s);
1204 strend = s + len;
f337b084 1205 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1206 }
1207
f337b084
TH
1208 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1209 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1210
f7fe979e 1211 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1212
1213 return unpack_rec(&sym, s, s, strend, NULL );
1214}
1215
49704364
WL
1216STATIC
1217I32
f7fe979e 1218S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1219{
27da23d5 1220 dVAR; dSP;
18529408 1221 SV *sv;
f7fe979e 1222 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1223 howlen_t howlen;
a6ec74c1 1224
a6ec74c1 1225 I32 checksum = 0;
92d41999 1226 UV cuv = 0;
a6ec74c1 1227 NV cdouble = 0.0;
f337b084 1228 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1229 bool beyond = FALSE;
21c16052 1230 bool explicit_length;
9e27e96a 1231 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1232 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1233 symptr->strbeg = s - strbeg;
49704364 1234
49704364 1235 while (next_symbol(symptr)) {
a7a3cfaa 1236 packprops_t props;
9e27e96a 1237 I32 len;
f337b084 1238 I32 datumtype = symptr->code;
206947d2 1239 /* do first one only unless in list context
08ca2aa3 1240 / is implemented by unpacking the count, then popping it from the
206947d2 1241 stack, so must check that we're not in the middle of a / */
49704364 1242 if ( unpack_only_one
206947d2 1243 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1244 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1245 break;
49704364 1246
f337b084 1247 switch (howlen = symptr->howlen) {
fc241834
RGS
1248 case e_star:
1249 len = strend - strbeg; /* long enough */
49704364 1250 break;
f337b084
TH
1251 default:
1252 /* e_no_len and e_number */
1253 len = symptr->length;
1254 break;
49704364 1255 }
18529408 1256
21c16052 1257 explicit_length = TRUE;
a6ec74c1 1258 redo_switch:
49704364 1259 beyond = s >= strend;
a7a3cfaa
TH
1260
1261 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1262 if (props) {
1263 /* props nonzero means we can process this letter. */
9e27e96a
AL
1264 const long size = props & PACK_SIZE_MASK;
1265 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1266 if (len > howmany)
1267 len = howmany;
1268
1269 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1270 if (len && unpack_only_one) len = 1;
1271 EXTEND(SP, len);
1272 EXTEND_MORTAL(len);
78d46eaa
NC
1273 }
1274 }
a7a3cfaa 1275
1109a392 1276 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1277 default:
1109a392 1278 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1279
a6ec74c1 1280 case '%':
49704364 1281 if (howlen == e_no_len)
18529408 1282 len = 16; /* len is not specified */
a6ec74c1 1283 checksum = len;
92d41999 1284 cuv = 0;
a6ec74c1 1285 cdouble = 0;
18529408 1286 continue;
a6ec74c1 1287 break;
18529408
IZ
1288 case '(':
1289 {
49704364 1290 tempsym_t savsym = *symptr;
9e27e96a 1291 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1292 symptr->flags |= group_modifiers;
49704364 1293 symptr->patend = savsym.grpend;
28be1210 1294 symptr->previous = &savsym;
49704364 1295 symptr->level++;
18529408
IZ
1296 PUTBACK;
1297 while (len--) {
49704364 1298 symptr->patptr = savsym.grpbeg;
f337b084
TH
1299 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1300 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1301 unpack_rec(symptr, s, strbeg, strend, &s);
1302 if (s == strend && savsym.howlen == e_star)
49704364 1303 break; /* No way to continue */
18529408
IZ
1304 }
1305 SPAGAIN;
28be1210 1306 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1307 *symptr = savsym;
18529408
IZ
1308 break;
1309 }
28be1210
TH
1310#ifdef PERL_PACK_CAN_SHRIEKSIGN
1311 case '.' | TYPE_IS_SHRIEKING:
1312#endif
1313 case '.': {
9e27e96a 1314 const char *from;
28be1210
TH
1315 SV *sv;
1316#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1317 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1318#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1319 const bool u8 = utf8;
28be1210
TH
1320#endif
1321 if (howlen == e_star) from = strbeg;
1322 else if (len <= 0) from = s;
1323 else {
1324 tempsym_t *group = symptr;
1325
1326 while (--len && group) group = group->previous;
1327 from = group ? strbeg + group->strbeg : strbeg;
1328 }
1329 sv = from <= s ?
00646304
CB
1330 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1331 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1332 XPUSHs(sv_2mortal(sv));
1333 break;
1334 }
1335#ifdef PERL_PACK_CAN_SHRIEKSIGN
1336 case '@' | TYPE_IS_SHRIEKING:
1337#endif
a6ec74c1 1338 case '@':
28be1210
TH
1339 s = strbeg + symptr->strbeg;
1340#ifdef PERL_PACK_CAN_SHRIEKSIGN
1341 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1342#else /* PERL_PACK_CAN_SHRIEKSIGN */
1343 if (utf8)
1344#endif
1345 {
08ca2aa3
TH
1346 while (len > 0) {
1347 if (s >= strend)
1348 Perl_croak(aTHX_ "'@' outside of string in unpack");
1349 s += UTF8SKIP(s);
1350 len--;
1351 }
1352 if (s > strend)
1353 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1354 } else {
28be1210 1355 if (strend-s < len)
fc241834 1356 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1357 s += len;
08ca2aa3 1358 }
a6ec74c1 1359 break;
62f95557
IZ
1360 case 'X' | TYPE_IS_SHRIEKING:
1361 if (!len) /* Avoid division by 0 */
1362 len = 1;
08ca2aa3 1363 if (utf8) {
f7fe979e 1364 const char *hop, *last;
f337b084
TH
1365 I32 l = len;
1366 hop = last = strbeg;
1367 while (hop < s) {
1368 hop += UTF8SKIP(hop);
1369 if (--l == 0) {
08ca2aa3 1370 last = hop;
f337b084
TH
1371 l = len;
1372 }
fc241834 1373 }
f337b084
TH
1374 if (last > s)
1375 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1376 s = last;
1377 break;
f337b084
TH
1378 }
1379 len = (s - strbeg) % len;
62f95557 1380 /* FALL THROUGH */
a6ec74c1 1381 case 'X':
08ca2aa3
TH
1382 if (utf8) {
1383 while (len > 0) {
1384 if (s <= strbeg)
1385 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1386 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1387 if (s <= strbeg)
1388 Perl_croak(aTHX_ "'X' outside of string in unpack");
1389 }
1390 len--;
1391 }
1392 } else {
fc241834
RGS
1393 if (len > s - strbeg)
1394 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1395 s -= len;
08ca2aa3 1396 }
a6ec74c1 1397 break;
9e27e96a
AL
1398 case 'x' | TYPE_IS_SHRIEKING: {
1399 I32 ai32;
62f95557
IZ
1400 if (!len) /* Avoid division by 0 */
1401 len = 1;
230e1fce
NC
1402 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1403 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1404 if (ai32 == 0) break;
1405 len -= ai32;
9e27e96a 1406 }
62f95557 1407 /* FALL THROUGH */
a6ec74c1 1408 case 'x':
08ca2aa3
TH
1409 if (utf8) {
1410 while (len>0) {
1411 if (s >= strend)
1412 Perl_croak(aTHX_ "'x' outside of string in unpack");
1413 s += UTF8SKIP(s);
1414 len--;
1415 }
1416 } else {
fc241834
RGS
1417 if (len > strend - s)
1418 Perl_croak(aTHX_ "'x' outside of string in unpack");
1419 s += len;
f337b084 1420 }
a6ec74c1
JH
1421 break;
1422 case '/':
49704364
WL
1423 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1424 break;
a6ec74c1
JH
1425 case 'A':
1426 case 'Z':
1427 case 'a':
08ca2aa3
TH
1428 if (checksum) {
1429 /* Preliminary length estimate is assumed done in 'W' */
1430 if (len > strend - s) len = strend - s;
1431 goto W_checksum;
1432 }
1433 if (utf8) {
1434 I32 l;
f7fe979e 1435 const char *hop;
08ca2aa3
TH
1436 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1437 if (hop >= strend) {
1438 if (hop > strend)
1439 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1440 break;
fc241834 1441 }
a6ec74c1 1442 }
08ca2aa3
TH
1443 if (hop > strend)
1444 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1445 len = hop - s;
1446 } else if (len > strend - s)
1447 len = strend - s;
1448
1449 if (datumtype == 'Z') {
1450 /* 'Z' strips stuff after first null */
f7fe979e 1451 const char *ptr, *end;
f337b084
TH
1452 end = s + len;
1453 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1454 sv = newSVpvn(s, ptr-s);
1455 if (howlen == e_star) /* exact for 'Z*' */
1456 len = ptr-s + (ptr != strend ? 1 : 0);
1457 } else if (datumtype == 'A') {
1458 /* 'A' strips both nulls and spaces */
f7fe979e 1459 const char *ptr;
18bdf90a
TH
1460 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1461 for (ptr = s+len-1; ptr >= s; ptr--)
1462 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1463 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1464 if (ptr >= s) ptr += UTF8SKIP(ptr);
1465 else ptr++;
28be1210 1466 if (ptr > s+len)
18bdf90a
TH
1467 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1468 } else {
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !isSPACE(*ptr)) break;
1471 ptr++;
1472 }
08ca2aa3
TH
1473 sv = newSVpvn(s, ptr-s);
1474 } else sv = newSVpvn(s, len);
1475
1476 if (utf8) {
1477 SvUTF8_on(sv);
1478 /* Undo any upgrade done due to need_utf8() */
f337b084 1479 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1480 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1481 }
1482 XPUSHs(sv_2mortal(sv));
08ca2aa3 1483 s += len;
a6ec74c1
JH
1484 break;
1485 case 'B':
08ca2aa3
TH
1486 case 'b': {
1487 char *str;
49704364 1488 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1489 len = (strend - s) * 8;
1490 if (checksum) {
1491 if (!PL_bitcount) {
08ca2aa3 1492 int bits;
a02a5408 1493 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1494 for (bits = 1; bits < 256; bits++) {
1495 if (bits & 1) PL_bitcount[bits]++;
1496 if (bits & 2) PL_bitcount[bits]++;
1497 if (bits & 4) PL_bitcount[bits]++;
1498 if (bits & 8) PL_bitcount[bits]++;
1499 if (bits & 16) PL_bitcount[bits]++;
1500 if (bits & 32) PL_bitcount[bits]++;
1501 if (bits & 64) PL_bitcount[bits]++;
1502 if (bits & 128) PL_bitcount[bits]++;
1503 }
1504 }
f337b084 1505 if (utf8)
08ca2aa3 1506 while (len >= 8 && s < strend) {
f337b084 1507 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1508 len -= 8;
1509 }
f337b084 1510 else
fc241834 1511 while (len >= 8) {
08ca2aa3 1512 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1513 len -= 8;
1514 }
08ca2aa3
TH
1515 if (len && s < strend) {
1516 U8 bits;
f337b084
TH
1517 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1518 if (datumtype == 'b')
a6ec74c1 1519 while (len-- > 0) {
92d41999 1520 if (bits & 1) cuv++;
a6ec74c1
JH
1521 bits >>= 1;
1522 }
f337b084 1523 else
a6ec74c1 1524 while (len-- > 0) {
08ca2aa3 1525 if (bits & 0x80) cuv++;
a6ec74c1
JH
1526 bits <<= 1;
1527 }
fc241834 1528 }
a6ec74c1
JH
1529 break;
1530 }
08ca2aa3
TH
1531
1532 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1533 SvPOK_on(sv);
1534 str = SvPVX(sv);
1535 if (datumtype == 'b') {
f337b084 1536 U8 bits = 0;
f7fe979e 1537 const I32 ai32 = len;
08ca2aa3
TH
1538 for (len = 0; len < ai32; len++) {
1539 if (len & 7) bits >>= 1;
1540 else if (utf8) {
1541 if (s >= strend) break;
f337b084 1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1543 } else bits = *(U8 *) s++;
1544 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1545 }
08ca2aa3 1546 } else {
f337b084 1547 U8 bits = 0;
f7fe979e 1548 const I32 ai32 = len;
08ca2aa3
TH
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 7) bits <<= 1;
1551 else if (utf8) {
1552 if (s >= strend) break;
f337b084 1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1554 } else bits = *(U8 *) s++;
1555 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1556 }
1557 }
1558 *str = '\0';
aa07b2f6 1559 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1560 XPUSHs(sv);
a6ec74c1 1561 break;
08ca2aa3 1562 }
a6ec74c1 1563 case 'H':
08ca2aa3
TH
1564 case 'h': {
1565 char *str;
fc241834 1566 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1567 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1568 len = (strend - s) * 2;
fc241834 1569 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1570 SvPOK_on(sv);
1571 str = SvPVX(sv);
1572 if (datumtype == 'h') {
f337b084 1573 U8 bits = 0;
9e27e96a 1574 I32 ai32 = len;
fc241834
RGS
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits >>= 4;
1577 else if (utf8) {
1578 if (s >= strend) break;
f337b084 1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1580 } else bits = * (U8 *) s++;
a6ec74c1
JH
1581 *str++ = PL_hexdigit[bits & 15];
1582 }
08ca2aa3 1583 } else {
f337b084 1584 U8 bits = 0;
f7fe979e 1585 const I32 ai32 = len;
08ca2aa3
TH
1586 for (len = 0; len < ai32; len++) {
1587 if (len & 1) bits <<= 4;
1588 else if (utf8) {
1589 if (s >= strend) break;
f337b084 1590 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1591 } else bits = *(U8 *) s++;
a6ec74c1
JH
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
1593 }
1594 }
1595 *str = '\0';
aa07b2f6 1596 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1597 XPUSHs(sv);
a6ec74c1 1598 break;
08ca2aa3 1599 }
a6ec74c1 1600 case 'c':
73cb7263 1601 while (len-- > 0) {
f337b084 1602 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1603 if (aint >= 128) /* fake up signed chars */
1604 aint -= 256;
08ca2aa3 1605 if (!checksum)
ac7f3b1b 1606 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV)aint;
1609 else
1610 cuv += aint;
a6ec74c1
JH
1611 }
1612 break;
1613 case 'C':
08ca2aa3
TH
1614 case 'W':
1615 W_checksum:
35bcd338 1616 if (len == 0) {
fc241834 1617 if (explicit_length && datumtype == 'C')
08ca2aa3 1618 /* Switch to "character" mode */
f337b084 1619 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1620 break;
1621 }
fc241834 1622 if (datumtype == 'C' ?
f337b084
TH
1623 (symptr->flags & FLAG_DO_UTF8) &&
1624 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1625 while (len-- > 0 && s < strend) {
08ca2aa3 1626 STRLEN retlen;
f7fe979e 1627 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1628 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1629 if (retlen == (STRLEN) -1 || retlen == 0)
1630 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1631 s += retlen;
1632 if (!checksum)
1633 PUSHs(sv_2mortal(newSVuv((UV) val)));
1634 else if (checksum > bits_in_uv)
1635 cdouble += (NV) val;
d6d3e8bd 1636 else
08ca2aa3 1637 cuv += val;
fc241834 1638 }
08ca2aa3 1639 } else if (!checksum)
a6ec74c1 1640 while (len-- > 0) {
f7fe979e 1641 const U8 ch = *(U8 *) s++;
08ca2aa3 1642 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1643 }
08ca2aa3
TH
1644 else if (checksum > bits_in_uv)
1645 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1646 else
1647 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1648 break;
1649 case 'U':
35bcd338 1650 if (len == 0) {
08ca2aa3
TH
1651 if (explicit_length) {
1652 /* Switch to "bytes in UTF-8" mode */
f337b084 1653 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1654 else
1655 /* Should be impossible due to the need_utf8() test */
1656 Perl_croak(aTHX_ "U0 mode on a byte string");
1657 }
35bcd338
JH
1658 break;
1659 }
08ca2aa3 1660 if (len > strend - s) len = strend - s;
fc241834 1661 if (!checksum) {
08ca2aa3
TH
1662 if (len && unpack_only_one) len = 1;
1663 EXTEND(SP, len);
1664 EXTEND_MORTAL(len);
fc241834 1665 }
08ca2aa3
TH
1666 while (len-- > 0 && s < strend) {
1667 STRLEN retlen;
1668 UV auv;
1669 if (utf8) {
1670 U8 result[UTF8_MAXLEN];
f7fe979e 1671 const char *ptr = s;
08ca2aa3 1672 STRLEN len;
08ca2aa3
TH
1673 /* Bug: warns about bad utf8 even if we are short on bytes
1674 and will break out of the loop */
230e1fce
NC
1675 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1676 'U'))
08ca2aa3
TH
1677 break;
1678 len = UTF8SKIP(result);
fc241834 1679 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1680 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1681 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1682 s = ptr;
1683 } else {
1684 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1685 if (retlen == (STRLEN) -1 || retlen == 0)
1686 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1687 s += retlen;
1688 }
1689 if (!checksum)
1690 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1691 else if (checksum > bits_in_uv)
08ca2aa3 1692 cdouble += (NV) auv;
73cb7263 1693 else
08ca2aa3 1694 cuv += auv;
a6ec74c1
JH
1695 }
1696 break;
49704364
WL
1697 case 's' | TYPE_IS_SHRIEKING:
1698#if SHORTSIZE != SIZE16
73cb7263 1699 while (len-- > 0) {
08ca2aa3 1700 short ashort;
f337b084
TH
1701 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1702 DO_BO_UNPACK(ashort, s);
08ca2aa3 1703 if (!checksum)
ac7f3b1b 1704 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1705 else if (checksum > bits_in_uv)
1706 cdouble += (NV)ashort;
1707 else
1708 cuv += ashort;
49704364
WL
1709 }
1710 break;
1711#else
1712 /* Fallthrough! */
a6ec74c1 1713#endif
49704364 1714 case 's':
73cb7263 1715 while (len-- > 0) {
08ca2aa3
TH
1716 I16 ai16;
1717
1718#if U16SIZE > SIZE16
1719 ai16 = 0;
1720#endif
f337b084 1721 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1722 DO_BO_UNPACK(ai16, 16);
1109a392 1723#if U16SIZE > SIZE16
73cb7263
NC
1724 if (ai16 > 32767)
1725 ai16 -= 65536;
a6ec74c1 1726#endif
08ca2aa3 1727 if (!checksum)
ac7f3b1b 1728 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)ai16;
1731 else
1732 cuv += ai16;
a6ec74c1
JH
1733 }
1734 break;
49704364
WL
1735 case 'S' | TYPE_IS_SHRIEKING:
1736#if SHORTSIZE != SIZE16
73cb7263 1737 while (len-- > 0) {
08ca2aa3 1738 unsigned short aushort;
f337b084
TH
1739 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1740 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1741 if (!checksum)
1742 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)aushort;
1745 else
1746 cuv += aushort;
49704364
WL
1747 }
1748 break;
1749#else
1750 /* Fallhrough! */
1751#endif
a6ec74c1
JH
1752 case 'v':
1753 case 'n':
1754 case 'S':
73cb7263 1755 while (len-- > 0) {
08ca2aa3
TH
1756 U16 au16;
1757#if U16SIZE > SIZE16
1758 au16 = 0;
1759#endif
f337b084 1760 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1761 DO_BO_UNPACK(au16, 16);
a6ec74c1 1762#ifdef HAS_NTOHS
73cb7263
NC
1763 if (datumtype == 'n')
1764 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1765#endif
1766#ifdef HAS_VTOHS
73cb7263
NC
1767 if (datumtype == 'v')
1768 au16 = vtohs(au16);
a6ec74c1 1769#endif
08ca2aa3
TH
1770 if (!checksum)
1771 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1772 else if (checksum > bits_in_uv)
f337b084 1773 cdouble += (NV) au16;
73cb7263
NC
1774 else
1775 cuv += au16;
a6ec74c1
JH
1776 }
1777 break;
7212898e 1778#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1779 case 'v' | TYPE_IS_SHRIEKING:
1780 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1781 while (len-- > 0) {
08ca2aa3
TH
1782 I16 ai16;
1783# if U16SIZE > SIZE16
1784 ai16 = 0;
1785# endif
f337b084 1786 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1787# ifdef HAS_NTOHS
73cb7263 1788 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1789 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1790# endif /* HAS_NTOHS */
1791# ifdef HAS_VTOHS
73cb7263 1792 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1793 ai16 = (I16) vtohs((U16) ai16);
1794# endif /* HAS_VTOHS */
1795 if (!checksum)
ac7f3b1b 1796 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1797 else if (checksum > bits_in_uv)
08ca2aa3 1798 cdouble += (NV) ai16;
73cb7263
NC
1799 else
1800 cuv += ai16;
068bd2e7
MHM
1801 }
1802 break;
08ca2aa3 1803#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1804 case 'i':
49704364 1805 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1806 while (len-- > 0) {
08ca2aa3 1807 int aint;
f337b084
TH
1808 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1809 DO_BO_UNPACK(aint, i);
08ca2aa3 1810 if (!checksum)
ac7f3b1b 1811 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)aint;
1814 else
1815 cuv += aint;
a6ec74c1
JH
1816 }
1817 break;
1818 case 'I':
49704364 1819 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1820 while (len-- > 0) {
08ca2aa3 1821 unsigned int auint;
f337b084
TH
1822 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1823 DO_BO_UNPACK(auint, i);
08ca2aa3 1824 if (!checksum)
ac7f3b1b 1825 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1826 else if (checksum > bits_in_uv)
1827 cdouble += (NV)auint;
1828 else
1829 cuv += auint;
a6ec74c1
JH
1830 }
1831 break;
92d41999 1832 case 'j':
73cb7263 1833 while (len-- > 0) {
08ca2aa3 1834 IV aiv;
f337b084 1835 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1836#if IVSIZE == INTSIZE
f337b084 1837 DO_BO_UNPACK(aiv, i);
1109a392 1838#elif IVSIZE == LONGSIZE
f337b084 1839 DO_BO_UNPACK(aiv, l);
1109a392 1840#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1841 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1842#else
1843 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1844#endif
08ca2aa3 1845 if (!checksum)
ac7f3b1b 1846 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1847 else if (checksum > bits_in_uv)
1848 cdouble += (NV)aiv;
1849 else
1850 cuv += aiv;
92d41999
JH
1851 }
1852 break;
1853 case 'J':
73cb7263 1854 while (len-- > 0) {
08ca2aa3 1855 UV auv;
f337b084 1856 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1857#if IVSIZE == INTSIZE
f337b084 1858 DO_BO_UNPACK(auv, i);
08ca2aa3 1859#elif IVSIZE == LONGSIZE
f337b084 1860 DO_BO_UNPACK(auv, l);
08ca2aa3 1861#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1862 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1863#else
1864 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1865#endif
08ca2aa3 1866 if (!checksum)
ac7f3b1b 1867 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1868 else if (checksum > bits_in_uv)
1869 cdouble += (NV)auv;
1870 else
1871 cuv += auv;
92d41999
JH
1872 }
1873 break;
49704364
WL
1874 case 'l' | TYPE_IS_SHRIEKING:
1875#if LONGSIZE != SIZE32
73cb7263 1876 while (len-- > 0) {
08ca2aa3 1877 long along;
f337b084
TH
1878 SHIFT_VAR(utf8, s, strend, along, datumtype);
1879 DO_BO_UNPACK(along, l);
08ca2aa3 1880 if (!checksum)
ac7f3b1b 1881 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)along;
1884 else
1885 cuv += along;
49704364
WL
1886 }
1887 break;
1888#else
1889 /* Fallthrough! */
a6ec74c1 1890#endif
49704364 1891 case 'l':
73cb7263 1892 while (len-- > 0) {
08ca2aa3
TH
1893 I32 ai32;
1894#if U32SIZE > SIZE32
1895 ai32 = 0;
1896#endif
f337b084 1897 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1898 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1899#if U32SIZE > SIZE32
08ca2aa3 1900 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1901#endif
08ca2aa3 1902 if (!checksum)
ac7f3b1b 1903 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1904 else if (checksum > bits_in_uv)
1905 cdouble += (NV)ai32;
1906 else
1907 cuv += ai32;
a6ec74c1
JH
1908 }
1909 break;
49704364
WL
1910 case 'L' | TYPE_IS_SHRIEKING:
1911#if LONGSIZE != SIZE32
73cb7263 1912 while (len-- > 0) {
08ca2aa3 1913 unsigned long aulong;
f337b084
TH
1914 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1915 DO_BO_UNPACK(aulong, l);
08ca2aa3 1916 if (!checksum)
ac7f3b1b 1917 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1918 else if (checksum > bits_in_uv)
1919 cdouble += (NV)aulong;
1920 else
1921 cuv += aulong;
49704364
WL
1922 }
1923 break;
1924#else
1925 /* Fall through! */
1926#endif
a6ec74c1
JH
1927 case 'V':
1928 case 'N':
1929 case 'L':
73cb7263 1930 while (len-- > 0) {
08ca2aa3
TH
1931 U32 au32;
1932#if U32SIZE > SIZE32
1933 au32 = 0;
1934#endif
f337b084 1935 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1936 DO_BO_UNPACK(au32, 32);
a6ec74c1 1937#ifdef HAS_NTOHL
73cb7263
NC
1938 if (datumtype == 'N')
1939 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1940#endif
1941#ifdef HAS_VTOHL
73cb7263
NC
1942 if (datumtype == 'V')
1943 au32 = vtohl(au32);
a6ec74c1 1944#endif
08ca2aa3 1945 if (!checksum)
fc241834
RGS
1946 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1947 else if (checksum > bits_in_uv)
1948 cdouble += (NV)au32;
1949 else
1950 cuv += au32;
a6ec74c1
JH
1951 }
1952 break;
7212898e 1953#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1954 case 'V' | TYPE_IS_SHRIEKING:
1955 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1956 while (len-- > 0) {
08ca2aa3
TH
1957 I32 ai32;
1958# if U32SIZE > SIZE32
1959 ai32 = 0;
1960# endif
f337b084 1961 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1962# ifdef HAS_NTOHL
73cb7263
NC
1963 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1964 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1965# endif
1966# ifdef HAS_VTOHL
73cb7263
NC
1967 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1968 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1969# endif
1970 if (!checksum)
ac7f3b1b 1971 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1972 else if (checksum > bits_in_uv)
1973 cdouble += (NV)ai32;
1974 else
1975 cuv += ai32;
068bd2e7
MHM
1976 }
1977 break;
08ca2aa3 1978#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1979 case 'p':
a6ec74c1 1980 while (len-- > 0) {
f7fe979e 1981 const char *aptr;
f337b084 1982 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1983 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1984 /* newSVpv generates undef if aptr is NULL */
1985 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1986 }
1987 break;
1988 case 'w':
a6ec74c1
JH
1989 {
1990 UV auv = 0;
1991 U32 bytes = 0;
fc241834 1992
08ca2aa3
TH
1993 while (len > 0 && s < strend) {
1994 U8 ch;
f337b084 1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1996 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1997 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1998 if (ch < 0x80) {
a6ec74c1 1999 bytes = 0;
ac7f3b1b 2000 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2001 len--;
2002 auv = 0;
08ca2aa3 2003 continue;
a6ec74c1 2004 }
08ca2aa3 2005 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2006 const char *t;
a6ec74c1
JH
2007
2008 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2009 while (s < strend) {
f337b084 2010 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2011 sv = mul128(sv, (U8)(ch & 0x7f));
2012 if (!(ch & 0x80)) {
a6ec74c1
JH
2013 bytes = 0;
2014 break;
2015 }
2016 }
10516c54 2017 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2018 while (*t == '0')
2019 t++;
2020 sv_chop(sv, t);
2021 PUSHs(sv_2mortal(sv));
2022 len--;
2023 auv = 0;
2024 }
2025 }
2026 if ((s >= strend) && bytes)
49704364 2027 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2028 }
2029 break;
2030 case 'P':
49704364
WL
2031 if (symptr->howlen == e_star)
2032 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2033 EXTEND(SP, 1);
08ca2aa3
TH
2034 if (sizeof(char*) <= strend - s) {
2035 char *aptr;
f337b084 2036 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2037 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2038 /* newSVpvn generates undef if aptr is NULL */
2039 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2040 }
a6ec74c1
JH
2041 break;
2042#ifdef HAS_QUAD
2043 case 'q':
73cb7263 2044 while (len-- > 0) {
08ca2aa3 2045 Quad_t aquad;
f337b084
TH
2046 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2047 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2048 if (!checksum)
2049 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2050 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2051 else if (checksum > bits_in_uv)
2052 cdouble += (NV)aquad;
2053 else
2054 cuv += aquad;
2055 }
a6ec74c1
JH
2056 break;
2057 case 'Q':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 Uquad_t auquad;
f337b084
TH
2060 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2061 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2062 if (!checksum)
2063 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2064 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2065 else if (checksum > bits_in_uv)
2066 cdouble += (NV)auquad;
2067 else
2068 cuv += auquad;
a6ec74c1
JH
2069 }
2070 break;
08ca2aa3 2071#endif /* HAS_QUAD */
a6ec74c1
JH
2072 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2073 case 'f':
73cb7263 2074 while (len-- > 0) {
08ca2aa3 2075 float afloat;
f337b084 2076 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2077 DO_BO_UNPACK_N(afloat, float);
2078 if (!checksum)
2079 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2080 else
73cb7263 2081 cdouble += afloat;
fc241834 2082 }
a6ec74c1
JH
2083 break;
2084 case 'd':
73cb7263 2085 while (len-- > 0) {
08ca2aa3 2086 double adouble;
f337b084 2087 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2088 DO_BO_UNPACK_N(adouble, double);
2089 if (!checksum)
2090 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2091 else
73cb7263 2092 cdouble += adouble;
fc241834 2093 }
a6ec74c1 2094 break;
92d41999 2095 case 'F':
73cb7263 2096 while (len-- > 0) {
08ca2aa3 2097 NV anv;
f337b084 2098 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2099 DO_BO_UNPACK_N(anv, NV);
2100 if (!checksum)
2101 PUSHs(sv_2mortal(newSVnv(anv)));
2102 else
73cb7263 2103 cdouble += anv;
fc241834 2104 }
92d41999
JH
2105 break;
2106#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2107 case 'D':
73cb7263 2108 while (len-- > 0) {
08ca2aa3 2109 long double aldouble;
f337b084 2110 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2111 DO_BO_UNPACK_N(aldouble, long double);
2112 if (!checksum)
2113 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2114 else
2115 cdouble += aldouble;
92d41999
JH
2116 }
2117 break;
2118#endif
a6ec74c1
JH
2119 case 'u':
2120 /* MKS:
2121 * Initialise the decode mapping. By using a table driven
2122 * algorithm, the code will be character-set independent
2123 * (and just as fast as doing character arithmetic)
2124 */
2125 if (PL_uudmap['M'] == 0) {
2126 int i;
2127
2128 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2129 PL_uudmap[(U8)PL_uuemap[i]] = i;
2130 /*
2131 * Because ' ' and '`' map to the same value,
2132 * we need to decode them both the same.
2133 */
2134 PL_uudmap[' '] = 0;
2135 }
08ca2aa3 2136 {
f7fe979e 2137 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
08ca2aa3
TH
2138 sv = sv_2mortal(NEWSV(42, l));
2139 if (l) SvPOK_on(sv);
2140 }
2141 if (utf8) {
2142 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2143 I32 a, b, c, d;
2144 char hunk[4];
2145
2146 hunk[3] = '\0';
2147 while (len > 0) {
2148 next_uni_uu(aTHX_ &s, strend, &a);
2149 next_uni_uu(aTHX_ &s, strend, &b);
2150 next_uni_uu(aTHX_ &s, strend, &c);
2151 next_uni_uu(aTHX_ &s, strend, &d);
2152 hunk[0] = (char)((a << 2) | (b >> 4));
2153 hunk[1] = (char)((b << 4) | (c >> 2));
2154 hunk[2] = (char)((c << 6) | d);
2155 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2156 len -= 3;
2157 }
2158 if (s < strend) {
f7fe979e
AL
2159 if (*s == '\n') {
2160 s++;
2161 }
08ca2aa3
TH
2162 else {
2163 /* possible checksum byte */
f7fe979e
AL
2164 const char *skip = s+UTF8SKIP(s);
2165 if (skip < strend && *skip == '\n')
2166 s = skip+1;
08ca2aa3
TH
2167 }
2168 }
2169 }
2170 } else {
fc241834
RGS
2171 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2172 I32 a, b, c, d;
2173 char hunk[4];
a6ec74c1 2174
fc241834
RGS
2175 hunk[3] = '\0';
2176 len = PL_uudmap[*(U8*)s++] & 077;
2177 while (len > 0) {
2178 if (s < strend && ISUUCHAR(*s))
2179 a = PL_uudmap[*(U8*)s++] & 077;
2180 else
2181 a = 0;
2182 if (s < strend && ISUUCHAR(*s))
2183 b = PL_uudmap[*(U8*)s++] & 077;
2184 else
2185 b = 0;
2186 if (s < strend && ISUUCHAR(*s))
2187 c = PL_uudmap[*(U8*)s++] & 077;
2188 else
2189 c = 0;
2190 if (s < strend && ISUUCHAR(*s))
2191 d = PL_uudmap[*(U8*)s++] & 077;
2192 else
2193 d = 0;
2194 hunk[0] = (char)((a << 2) | (b >> 4));
2195 hunk[1] = (char)((b << 4) | (c >> 2));
2196 hunk[2] = (char)((c << 6) | d);
2197 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2198 len -= 3;
2199 }
2200 if (*s == '\n')
2201 s++;
2202 else /* possible checksum byte */
2203 if (s + 1 < strend && s[1] == '\n')
2204 s += 2;
a6ec74c1 2205 }
08ca2aa3
TH
2206 }
2207 XPUSHs(sv);
a6ec74c1
JH
2208 break;
2209 }
49704364 2210
a6ec74c1 2211 if (checksum) {
1109a392 2212 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2213 (checksum > bits_in_uv &&
08ca2aa3
TH
2214 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2215 NV trouble, anv;
a6ec74c1 2216
08ca2aa3 2217 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2218 while (checksum >= 16) {
2219 checksum -= 16;
08ca2aa3 2220 anv *= 65536.0;
a6ec74c1 2221 }
a6ec74c1 2222 while (cdouble < 0.0)
08ca2aa3
TH
2223 cdouble += anv;
2224 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2225 sv = newSVnv(cdouble);
a6ec74c1
JH
2226 }
2227 else {
fa8ec7c1
NC
2228 if (checksum < bits_in_uv) {
2229 UV mask = ((UV)1 << checksum) - 1;
92d41999 2230 cuv &= mask;
a6ec74c1 2231 }
c4c5f44a 2232 sv = newSVuv(cuv);
a6ec74c1
JH
2233 }
2234 XPUSHs(sv_2mortal(sv));
2235 checksum = 0;
2236 }
fc241834 2237
49704364
WL
2238 if (symptr->flags & FLAG_SLASH){
2239 if (SP - PL_stack_base - start_sp_offset <= 0)
2240 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2241 if( next_symbol(symptr) ){
2242 if( symptr->howlen == e_number )
2243 Perl_croak(aTHX_ "Count after length/code in unpack" );
2244 if( beyond ){
2245 /* ...end of char buffer then no decent length available */
2246 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2247 } else {
2248 /* take top of stack (hope it's numeric) */
2249 len = POPi;
2250 if( len < 0 )
2251 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2252 }
2253 } else {
2254 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2255 }
2256 datumtype = symptr->code;
21c16052 2257 explicit_length = FALSE;
49704364
WL
2258 goto redo_switch;
2259 }
a6ec74c1 2260 }
49704364 2261
18529408
IZ
2262 if (new_s)
2263 *new_s = s;
2264 PUTBACK;
2265 return SP - PL_stack_base - start_sp_offset;
2266}
2267
2268PP(pp_unpack)
2269{
2270 dSP;
bab9c0ac 2271 dPOPPOPssrl;
18529408
IZ
2272 I32 gimme = GIMME_V;
2273 STRLEN llen;
2274 STRLEN rlen;
5c144d81
NC
2275 const char *pat = SvPV_const(left, llen);
2276 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2277 const char *strend = s + rlen;
2278 const char *patend = pat + llen;
08ca2aa3 2279 I32 cnt;
18529408
IZ
2280
2281 PUTBACK;
7accc089 2282 cnt = unpackstring(pat, patend, s, strend,
49704364 2283 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2284 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2285
18529408
IZ
2286 SPAGAIN;
2287 if ( !cnt && gimme == G_SCALAR )
2288 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2289 RETURN;
2290}
2291
f337b084 2292STATIC U8 *
f7fe979e 2293doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2294{
f337b084 2295 *h++ = PL_uuemap[len];
a6ec74c1 2296 while (len > 2) {
f337b084
TH
2297 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2298 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2299 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2300 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2301 s += 3;
2302 len -= 3;
2303 }
2304 if (len > 0) {
f7fe979e 2305 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2309 *h++ = PL_uuemap[0];
a6ec74c1 2310 }
f337b084
TH
2311 *h++ = '\n';
2312 return h;
a6ec74c1
JH
2313}
2314
2315STATIC SV *
f7fe979e 2316S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2317{
8b6e33c7
AL
2318 SV *result = newSVpvn(s, l);
2319 char *const result_c = SvPV_nolen(result); /* convenience */
2320 char *out = result_c;
2321 bool skip = 1;
2322 bool ignore = 0;
a6ec74c1
JH
2323
2324 while (*s) {
2325 switch (*s) {
2326 case ' ':
2327 break;
2328 case '+':
2329 if (!skip) {
2330 SvREFCNT_dec(result);
2331 return (NULL);
2332 }
2333 break;
2334 case '0':
2335 case '1':
2336 case '2':
2337 case '3':
2338 case '4':
2339 case '5':
2340 case '6':
2341 case '7':
2342 case '8':
2343 case '9':
2344 skip = 0;
2345 if (!ignore) {
2346 *(out++) = *s;
2347 }
2348 break;
2349 case '.':
2350 ignore = 1;
2351 break;
2352 default:
2353 SvREFCNT_dec(result);
2354 return (NULL);
2355 }
2356 s++;
2357 }
2358 *(out++) = '\0';
2359 SvCUR_set(result, out - result_c);
2360 return (result);
2361}
2362
2363/* pnum must be '\0' terminated */
2364STATIC int
2365S_div128(pTHX_ SV *pnum, bool *done)
2366{
8b6e33c7
AL
2367 STRLEN len;
2368 char * const s = SvPV(pnum, len);
2369 char *t = s;
2370 int m = 0;
2371
2372 *done = 1;
2373 while (*t) {
2374 const int i = m * 10 + (*t - '0');
2375 const int r = (i >> 7); /* r < 10 */
2376 m = i & 0x7F;
2377 if (r) {
2378 *done = 0;
2379 }
2380 *(t++) = '0' + r;
a6ec74c1 2381 }
8b6e33c7
AL
2382 *(t++) = '\0';
2383 SvCUR_set(pnum, (STRLEN) (t - s));
2384 return (m);
a6ec74c1
JH
2385}
2386
18529408
IZ
2387/*
2388=for apidoc pack_cat
2389
7accc089
JH
2390The engine implementing pack() Perl function. Note: parameters next_in_list and
2391flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2392
2393=cut */
2394
49704364 2395
18529408 2396void
f7fe979e 2397Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2398{
aadb217d 2399 tempsym_t sym;
8772537c
AL
2400 PERL_UNUSED_ARG(next_in_list);
2401 PERL_UNUSED_ARG(flags);
7accc089 2402
f7fe979e 2403 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2404
7accc089
JH
2405 (void)pack_rec( cat, &sym, beglist, endlist );
2406}
2407
2408
2409/*
2410=for apidoc packlist
2411
2412The engine implementing pack() Perl function.
2413
2414=cut */
2415
2416
2417void
f7fe979e 2418Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2419{
f337b084 2420 STRLEN no_len;
aadb217d
JH
2421 tempsym_t sym;
2422
f7fe979e 2423 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2424
f337b084
TH
2425 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2426 Also make sure any UTF8 flag is loaded */
2427 SvPV_force(cat, no_len);
2428 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2429
49704364
WL
2430 (void)pack_rec( cat, &sym, beglist, endlist );
2431}
2432
f337b084
TH
2433/* like sv_utf8_upgrade, but also repoint the group start markers */
2434STATIC void
2435marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2436 STRLEN len;
2437 tempsym_t *group;
f7fe979e
AL
2438 const char *from_ptr, *from_start, *from_end, **marks, **m;
2439 char *to_start, *to_ptr;
f337b084
TH
2440
2441 if (SvUTF8(sv)) return;
2442
aa07b2f6 2443 from_start = SvPVX_const(sv);
f337b084
TH
2444 from_end = from_start + SvCUR(sv);
2445 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2446 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2447 if (from_ptr == from_end) {
2448 /* Simple case: no character needs to be changed */
2449 SvUTF8_on(sv);
2450 return;
2451 }
2452
3473cf63 2453 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2454 Newx(to_start, len, char);
f337b084
TH
2455 Copy(from_start, to_start, from_ptr-from_start, char);
2456 to_ptr = to_start + (from_ptr-from_start);
2457
a02a5408 2458 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2459 for (group=sym_ptr; group; group = group->previous)
2460 marks[group->level] = from_start + group->strbeg;
2461 marks[sym_ptr->level+1] = from_end+1;
2462 for (m = marks; *m < from_ptr; m++)
2463 *m = to_start + (*m-from_start);
2464
2465 for (;from_ptr < from_end; from_ptr++) {
2466 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2467 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2468 }
2469 *to_ptr = 0;
2470
2471 while (*m == from_ptr) *m++ = to_ptr;
2472 if (m != marks + sym_ptr->level+1) {
2473 Safefree(marks);
2474 Safefree(to_start);
2475 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2476 }
2477 for (group=sym_ptr; group; group = group->previous)
2478 group->strbeg = marks[group->level] - to_start;
2479 Safefree(marks);
2480
2481 if (SvOOK(sv)) {
2482 if (SvIVX(sv)) {
b162af07 2483 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2484 from_start -= SvIVX(sv);
2485 SvIV_set(sv, 0);
2486 }
2487 SvFLAGS(sv) &= ~SVf_OOK;
2488 }
2489 if (SvLEN(sv) != 0)
2490 Safefree(from_start);
f880fe2f 2491 SvPV_set(sv, to_start);
b162af07
SP
2492 SvCUR_set(sv, to_ptr - to_start);
2493 SvLEN_set(sv, len);
f337b084
TH
2494 SvUTF8_on(sv);
2495}
2496
2497/* Exponential string grower. Makes string extension effectively O(n)
2498 needed says how many extra bytes we need (not counting the final '\0')
2499 Only grows the string if there is an actual lack of space
2500*/
2501STATIC char *
2502sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2503 const STRLEN cur = SvCUR(sv);
2504 const STRLEN len = SvLEN(sv);
f337b084
TH
2505 STRLEN extend;
2506 if (len - cur > needed) return SvPVX(sv);
2507 extend = needed > len ? needed : len;
2508 return SvGROW(sv, len+extend+1);
2509}
49704364
WL
2510
2511STATIC
2512SV **
f337b084 2513S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2514{
49704364 2515 tempsym_t lookahead;
f337b084
TH
2516 I32 items = endlist - beglist;
2517 bool found = next_symbol(symptr);
2518 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2519 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2520
2521 if (symptr->level == 0 && found && symptr->code == 'U') {
2522 marked_upgrade(aTHX_ cat, symptr);
2523 symptr->flags |= FLAG_DO_UTF8;
2524 utf8 = 0;
49704364 2525 }
f337b084 2526 symptr->strbeg = SvCUR(cat);
49704364
WL
2527
2528 while (found) {
f337b084
TH
2529 SV *fromstr;
2530 STRLEN fromlen;
2531 I32 len;
a6ec74c1 2532 SV *lengthcode = Nullsv;
49704364 2533 I32 datumtype = symptr->code;
f337b084
TH
2534 howlen_t howlen = symptr->howlen;
2535 char *start = SvPVX(cat);
2536 char *cur = start + SvCUR(cat);
49704364 2537
f337b084
TH
2538#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2539
2540 switch (howlen) {
fc241834 2541 case e_star:
f337b084
TH
2542 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2543 0 : items;
2544 break;
2545 default:
2546 /* e_no_len and e_number */
2547 len = symptr->length;
49704364
WL
2548 break;
2549 }
2550
f337b084 2551 if (len) {
a7a3cfaa 2552 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2553
a7a3cfaa
TH
2554 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2555 /* We can process this letter. */
2556 STRLEN size = props & PACK_SIZE_MASK;
2557 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2558 }
f337b084
TH
2559 }
2560
49704364
WL
2561 /* Look ahead for next symbol. Do we have code/code? */
2562 lookahead = *symptr;
2563 found = next_symbol(&lookahead);
246f24af
TH
2564 if (symptr->flags & FLAG_SLASH) {
2565 IV count;
f337b084 2566 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2567 if (strchr("aAZ", lookahead.code)) {
2568 if (lookahead.howlen == e_number) count = lookahead.length;
2569 else {
2570 if (items > 0)
2571 count = DO_UTF8(*beglist) ?
2572 sv_len_utf8(*beglist) : sv_len(*beglist);
2573 else count = 0;
2574 if (lookahead.code == 'Z') count++;
2575 }
2576 } else {
2577 if (lookahead.howlen == e_number && lookahead.length < items)
2578 count = lookahead.length;
2579 else count = items;
2580 }
2581 lookahead.howlen = e_number;
2582 lookahead.length = count;
2583 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2584 }
49704364 2585
fc241834
RGS
2586 /* Code inside the switch must take care to properly update
2587 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2588 doesn't simply leave using break */
1109a392 2589 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2590 default:
f337b084
TH
2591 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2592 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2593 case '%':
49704364 2594 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2595 {
2596 char *from;
2597#ifdef PERL_PACK_CAN_SHRIEKSIGN
2598 case '.' | TYPE_IS_SHRIEKING:
2599#endif
2600 case '.':
2601 if (howlen == e_star) from = start;
2602 else if (len == 0) from = cur;
2603 else {
2604 tempsym_t *group = symptr;
2605
2606 while (--len && group) group = group->previous;
2607 from = group ? start + group->strbeg : start;
2608 }
2609 fromstr = NEXTFROM;
2610 len = SvIV(fromstr);
2611 goto resize;
2612#ifdef PERL_PACK_CAN_SHRIEKSIGN
2613 case '@' | TYPE_IS_SHRIEKING:
2614#endif
a6ec74c1 2615 case '@':
28be1210
TH
2616 from = start + symptr->strbeg;
2617 resize:
2618#ifdef PERL_PACK_CAN_SHRIEKSIGN
2619 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2620#else /* PERL_PACK_CAN_SHRIEKSIGN */
2621 if (utf8)
2622#endif
2623 if (len >= 0) {
2624 while (len && from < cur) {
2625 from += UTF8SKIP(from);
2626 len--;
2627 }
2628 if (from > cur)
2629 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2630 if (len) {
2631 /* Here we know from == cur */
2632 grow:
2633 GROWING(0, cat, start, cur, len);
2634 Zero(cur, len, char);
2635 cur += len;
2636 } else if (from < cur) {
2637 len = cur - from;
2638 goto shrink;
2639 } else goto no_change;
2640 } else {
2641 cur = from;
2642 len = -len;
2643 goto utf8_shrink;
f337b084 2644 }
28be1210
TH
2645 else {
2646 len -= cur - from;
f337b084 2647 if (len > 0) goto grow;
28be1210 2648 if (len == 0) goto no_change;
fc241834 2649 len = -len;
28be1210 2650 goto shrink;
f337b084 2651 }
a6ec74c1 2652 break;
28be1210 2653 }
fc241834 2654 case '(': {
49704364 2655 tempsym_t savsym = *symptr;
66c611c5
MHM
2656 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2657 symptr->flags |= group_modifiers;
49704364
WL
2658 symptr->patend = savsym.grpend;
2659 symptr->level++;
f337b084 2660 symptr->previous = &lookahead;
18529408 2661 while (len--) {
f337b084
TH
2662 U32 was_utf8;
2663 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2664 else symptr->flags &= ~FLAG_PARSE_UTF8;
2665 was_utf8 = SvUTF8(cat);
49704364 2666 symptr->patptr = savsym.grpbeg;
f337b084
TH
2667 beglist = pack_rec(cat, symptr, beglist, endlist);
2668 if (SvUTF8(cat) != was_utf8)
2669 /* This had better be an upgrade while in utf8==0 mode */
2670 utf8 = 1;
2671
49704364 2672 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2673 break; /* No way to continue */
2674 }
f337b084
TH
2675 lookahead.flags = symptr->flags & ~group_modifiers;
2676 goto no_change;
18529408 2677 }
62f95557
IZ
2678 case 'X' | TYPE_IS_SHRIEKING:
2679 if (!len) /* Avoid division by 0 */
2680 len = 1;
f337b084
TH
2681 if (utf8) {
2682 char *hop, *last;
2683 I32 l = len;
2684 hop = last = start;
2685 while (hop < cur) {
2686 hop += UTF8SKIP(hop);
2687 if (--l == 0) {
2688 last = hop;
2689 l = len;
2690 }
2691 }
2692 if (last > cur)
2693 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2694 cur = last;
2695 break;
2696 }
2697 len = (cur-start) % len;
62f95557 2698 /* FALL THROUGH */
a6ec74c1 2699 case 'X':
f337b084
TH
2700 if (utf8) {
2701 if (len < 1) goto no_change;
28be1210 2702 utf8_shrink:
f337b084
TH
2703 while (len > 0) {
2704 if (cur <= start)
28be1210
TH
2705 Perl_croak(aTHX_ "'%c' outside of string in pack",
2706 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2707 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2708 if (cur <= start)
28be1210
TH
2709 Perl_croak(aTHX_ "'%c' outside of string in pack",
2710 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2711 }
2712 len--;
2713 }
2714 } else {
fc241834 2715 shrink:
f337b084 2716 if (cur - start < len)
28be1210
TH
2717 Perl_croak(aTHX_ "'%c' outside of string in pack",
2718 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2719 cur -= len;
2720 }
2721 if (cur < start+symptr->strbeg) {
2722 /* Make sure group starts don't point into the void */
2723 tempsym_t *group;
9e27e96a 2724 const STRLEN length = cur-start;
f337b084
TH
2725 for (group = symptr;
2726 group && length < group->strbeg;
2727 group = group->previous) group->strbeg = length;
2728 lookahead.strbeg = length;
2729 }
a6ec74c1 2730 break;
fc241834
RGS
2731 case 'x' | TYPE_IS_SHRIEKING: {
2732 I32 ai32;
62f95557
IZ
2733 if (!len) /* Avoid division by 0 */
2734 len = 1;
230e1fce 2735 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2736 else ai32 = (cur - start) % len;
2737 if (ai32 == 0) goto no_change;
2738 len -= ai32;
2739 }
2740 /* FALL THROUGH */
a6ec74c1 2741 case 'x':
f337b084 2742 goto grow;
a6ec74c1
JH
2743 case 'A':
2744 case 'Z':
f337b084 2745 case 'a': {
f7fe979e 2746 const char *aptr;
f337b084 2747
a6ec74c1 2748 fromstr = NEXTFROM;
e62f0680 2749 aptr = SvPV_const(fromstr, fromlen);
f337b084 2750 if (DO_UTF8(fromstr)) {
f7fe979e 2751 const char *end, *s;
f337b084
TH
2752
2753 if (!utf8 && !SvUTF8(cat)) {
2754 marked_upgrade(aTHX_ cat, symptr);
2755 lookahead.flags |= FLAG_DO_UTF8;
2756 lookahead.strbeg = symptr->strbeg;
2757 utf8 = 1;
2758 start = SvPVX(cat);
2759 cur = start + SvCUR(cat);
2760 }
fc241834 2761 if (howlen == e_star) {
f337b084
TH
2762 if (utf8) goto string_copy;
2763 len = fromlen+1;
2764 }
2765 s = aptr;
2766 end = aptr + fromlen;
2767 fromlen = datumtype == 'Z' ? len-1 : len;
2768 while ((I32) fromlen > 0 && s < end) {
2769 s += UTF8SKIP(s);
2770 fromlen--;
2771 }
2772 if (s > end)
2773 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2774 if (utf8) {
fc241834 2775 len = fromlen;
f337b084
TH
2776 if (datumtype == 'Z') len++;
2777 fromlen = s-aptr;
2778 len += fromlen;
fc241834 2779
f337b084 2780 goto string_copy;
fc241834 2781 }
f337b084
TH
2782 fromlen = len - fromlen;
2783 if (datumtype == 'Z') fromlen--;
2784 if (howlen == e_star) {
2785 len = fromlen;
2786 if (datumtype == 'Z') len++;
fc241834 2787 }
f337b084 2788 GROWING(0, cat, start, cur, len);
fc241834 2789 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2790 datumtype | TYPE_IS_PACK))
2791 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2792 cur += fromlen;
a6ec74c1 2793 len -= fromlen;
f337b084
TH
2794 } else if (utf8) {
2795 if (howlen == e_star) {
2796 len = fromlen;
2797 if (datumtype == 'Z') len++;
a6ec74c1 2798 }
f337b084
TH
2799 if (len <= (I32) fromlen) {
2800 fromlen = len;
2801 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2802 }
fc241834 2803 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2804 upgrade, so:
2805 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2806 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2807 len -= fromlen;
2808 while (fromlen > 0) {
230e1fce 2809 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2810 aptr++;
2811 fromlen--;
fc241834 2812 }
f337b084
TH
2813 } else {
2814 string_copy:
2815 if (howlen == e_star) {
2816 len = fromlen;
2817 if (datumtype == 'Z') len++;
2818 }
2819 if (len <= (I32) fromlen) {
2820 fromlen = len;
2821 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2822 }
f337b084
TH
2823 GROWING(0, cat, start, cur, len);
2824 Copy(aptr, cur, fromlen, char);
2825 cur += fromlen;
2826 len -= fromlen;
a6ec74c1 2827 }
f337b084
TH
2828 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2829 cur += len;
a6ec74c1 2830 break;
f337b084 2831 }
a6ec74c1 2832 case 'B':
f337b084 2833 case 'b': {
b83604b4 2834 const char *str, *end;
f337b084
TH
2835 I32 l, field_len;
2836 U8 bits;
2837 bool utf8_source;
2838 U32 utf8_flags;
a6ec74c1 2839
fc241834 2840 fromstr = NEXTFROM;
b83604b4 2841 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2842 end = str + fromlen;
2843 if (DO_UTF8(fromstr)) {
2844 utf8_source = TRUE;
041457d9 2845 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2846 } else {
2847 utf8_source = FALSE;
2848 utf8_flags = 0; /* Unused, but keep compilers happy */
2849 }
2850 if (howlen == e_star) len = fromlen;
2851 field_len = (len+7)/8;
2852 GROWING(utf8, cat, start, cur, field_len);
2853 if (len > (I32)fromlen) len = fromlen;
2854 bits = 0;
2855 l = 0;
2856 if (datumtype == 'B')
2857 while (l++ < len) {
2858 if (utf8_source) {
2859 UV val;
2860 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2861 bits |= val & 1;
2862 } else bits |= *str++ & 1;
2863 if (l & 7) bits <<= 1;
fc241834 2864 else {
f337b084
TH
2865 PUSH_BYTE(utf8, cur, bits);
2866 bits = 0;
a6ec74c1
JH
2867 }
2868 }
f337b084
TH
2869 else
2870 /* datumtype == 'b' */
2871 while (l++ < len) {
2872 if (utf8_source) {
2873 UV val;
2874 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2875 if (val & 1) bits |= 0x80;
2876 } else if (*str++ & 1)
2877 bits |= 0x80;
2878 if (l & 7) bits >>= 1;
fc241834 2879 else {
f337b084
TH
2880 PUSH_BYTE(utf8, cur, bits);
2881 bits = 0;
a6ec74c1
JH
2882 }
2883 }
f337b084
TH
2884 l--;
2885 if (l & 7) {
fc241834 2886 if (datumtype == 'B')
f337b084 2887 bits <<= 7 - (l & 7);
fc241834 2888 else
f337b084
TH
2889 bits >>= 7 - (l & 7);
2890 PUSH_BYTE(utf8, cur, bits);
2891 l += 7;
a6ec74c1 2892 }
f337b084
TH
2893 /* Determine how many chars are left in the requested field */
2894 l /= 8;
2895 if (howlen == e_star) field_len = 0;
2896 else field_len -= l;
2897 Zero(cur, field_len, char);
2898 cur += field_len;
a6ec74c1 2899 break;
f337b084 2900 }
a6ec74c1 2901 case 'H':
f337b084 2902 case 'h': {
10516c54 2903 const char *str, *end;
f337b084
TH
2904 I32 l, field_len;
2905 U8 bits;
2906 bool utf8_source;
2907 U32 utf8_flags;
a6ec74c1 2908
fc241834 2909 fromstr = NEXTFROM;
10516c54 2910 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2911 end = str + fromlen;
2912 if (DO_UTF8(fromstr)) {
2913 utf8_source = TRUE;
041457d9 2914 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2915 } else {
2916 utf8_source = FALSE;
2917 utf8_flags = 0; /* Unused, but keep compilers happy */
2918 }
2919 if (howlen == e_star) len = fromlen;
2920 field_len = (len+1)/2;
2921 GROWING(utf8, cat, start, cur, field_len);
2922 if (!utf8 && len > (I32)fromlen) len = fromlen;
2923 bits = 0;
2924 l = 0;
2925 if (datumtype == 'H')
2926 while (l++ < len) {
2927 if (utf8_source) {
2928 UV val;
2929 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2930 if (val < 256 && isALPHA(val))
2931 bits |= (val + 9) & 0xf;
a6ec74c1 2932 else
f337b084
TH
2933 bits |= val & 0xf;
2934 } else if (isALPHA(*str))
2935 bits |= (*str++ + 9) & 0xf;
2936 else
2937 bits |= *str++ & 0xf;
2938 if (l & 1) bits <<= 4;
fc241834 2939 else {
f337b084
TH
2940 PUSH_BYTE(utf8, cur, bits);
2941 bits = 0;
a6ec74c1
JH
2942 }
2943 }
f337b084
TH
2944 else
2945 while (l++ < len) {
2946 if (utf8_source) {
2947 UV val;
2948 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2949 if (val < 256 && isALPHA(val))
2950 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2951 else
f337b084
TH
2952 bits |= (val & 0xf) << 4;
2953 } else if (isALPHA(*str))
2954 bits |= ((*str++ + 9) & 0xf) << 4;
2955 else
2956 bits |= (*str++ & 0xf) << 4;
2957 if (l & 1) bits >>= 4;
fc241834 2958 else {
f337b084
TH
2959 PUSH_BYTE(utf8, cur, bits);
2960 bits = 0;
a6ec74c1 2961 }
fc241834 2962 }
f337b084
TH
2963 l--;
2964 if (l & 1) {
2965 PUSH_BYTE(utf8, cur, bits);
2966 l++;
2967 }
2968 /* Determine how many chars are left in the requested field */
2969 l /= 2;
2970 if (howlen == e_star) field_len = 0;
2971 else field_len -= l;
2972 Zero(cur, field_len, char);
2973 cur += field_len;
2974 break;
fc241834
RGS
2975 }
2976 case 'c':
f337b084
TH
2977 while (len-- > 0) {
2978 IV aiv;
2979 fromstr = NEXTFROM;
2980 aiv = SvIV(fromstr);
2981 if ((-128 > aiv || aiv > 127) &&
2982 ckWARN(WARN_PACK))
2983 Perl_warner(aTHX_ packWARN(WARN_PACK),
2984 "Character in 'c' format wrapped in pack");
2985 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2986 }
2987 break;
2988 case 'C':
f337b084
TH
2989 if (len == 0) {
2990 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2991 break;
2992 }
2993 GROWING(0, cat, start, cur, len);
a6ec74c1 2994 while (len-- > 0) {
f337b084 2995 IV aiv;
a6ec74c1 2996 fromstr = NEXTFROM;
f337b084
TH
2997 aiv = SvIV(fromstr);
2998 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2999 ckWARN(WARN_PACK))
3000 Perl_warner(aTHX_ packWARN(WARN_PACK),
3001 "Character in 'C' format wrapped in pack");
f337b084
TH
3002 *cur++ = aiv & 0xff;
3003 }
fc241834
RGS
3004 break;
3005 case 'W': {
3006 char *end;
3007 U8 in_bytes = IN_BYTES;
3008
3009 end = start+SvLEN(cat)-1;
3010 if (utf8) end -= UTF8_MAXLEN-1;
3011 while (len-- > 0) {
3012 UV auv;
3013 fromstr = NEXTFROM;
3014 auv = SvUV(fromstr);
3015 if (in_bytes) auv = auv % 0x100;
3016 if (utf8) {
3017 W_utf8:
3018 if (cur > end) {
3019 *cur = '\0';
b162af07 3020 SvCUR_set(cat, cur - start);
fc241834
RGS
3021
3022 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3023 end = start+SvLEN(cat)-UTF8_MAXLEN;
3024 }
230e1fce
NC
3025 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3026 NATIVE_TO_UNI(auv),
041457d9 3027 warn_utf8 ?
230e1fce 3028 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3029 } else {
3030 if (auv >= 0x100) {
3031 if (!SvUTF8(cat)) {
3032 *cur = '\0';
b162af07 3033 SvCUR_set(cat, cur - start);
fc241834
RGS
3034 marked_upgrade(aTHX_ cat, symptr);
3035 lookahead.flags |= FLAG_DO_UTF8;
3036 lookahead.strbeg = symptr->strbeg;
3037 utf8 = 1;
3038 start = SvPVX(cat);
3039 cur = start + SvCUR(cat);
3040 end = start+SvLEN(cat)-UTF8_MAXLEN;
3041 goto W_utf8;
3042 }
3043 if (ckWARN(WARN_PACK))
3044 Perl_warner(aTHX_ packWARN(WARN_PACK),
3045 "Character in 'W' format wrapped in pack");
3046 auv &= 0xff;
3047 }
3048 if (cur >= end) {
3049 *cur = '\0';
b162af07 3050 SvCUR_set(cat, cur - start);
fc241834
RGS
3051 GROWING(0, cat, start, cur, len+1);
3052 end = start+SvLEN(cat)-1;
3053 }
fe2774ed 3054 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3055 }
3056 }
3057 break;
fc241834
RGS
3058 }
3059 case 'U': {
3060 char *end;
3061
3062 if (len == 0) {
3063 if (!(symptr->flags & FLAG_DO_UTF8)) {
3064 marked_upgrade(aTHX_ cat, symptr);
3065 lookahead.flags |= FLAG_DO_UTF8;
3066 lookahead.strbeg = symptr->strbeg;
3067 }
3068 utf8 = 0;
3069 goto no_change;
3070 }
3071
3072 end = start+SvLEN(cat);
3073 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3074 while (len-- > 0) {
fc241834 3075 UV auv;
a6ec74c1 3076 fromstr = NEXTFROM;
fc241834
RGS
3077 auv = SvUV(fromstr);
3078 if (utf8) {
230e1fce 3079 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3080 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3081 warn_utf8 ?
fc241834
RGS
3082 0 : UNICODE_ALLOW_ANY);
3083 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3084 *cur = '\0';
b162af07 3085 SvCUR_set(cat, cur - start);
fc241834
RGS
3086 GROWING(0, cat, start, cur,
3087 len+(endb-buffer)*UTF8_EXPAND);
3088 end = start+SvLEN(cat);
3089 }
3090 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3091 } else {
3092 if (cur >= end) {
3093 *cur = '\0';
b162af07 3094 SvCUR_set(cat, cur - start);
fc241834
RGS
3095 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3096 end = start+SvLEN(cat)-UTF8_MAXLEN;
3097 }
230e1fce 3098 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3099 warn_utf8 ?
230e1fce 3100 0 : UNICODE_ALLOW_ANY);
fc241834 3101 }
a6ec74c1 3102 }
a6ec74c1 3103 break;
fc241834 3104 }
a6ec74c1
JH
3105 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3106 case 'f':
a6ec74c1 3107 while (len-- > 0) {
f337b084
TH
3108 float afloat;
3109 NV anv;
a6ec74c1 3110 fromstr = NEXTFROM;
f337b084 3111 anv = SvNV(fromstr);
5cdb9e01 3112#ifdef __VOS__
f337b084 3113 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3114 during conversion from double to float into infinity, so we
3115 do it by hand. This code should either be generalized for
3116 any OS that needs it, or removed if and when VOS implements
3117 posix-976 (suggestion to support mapping to infinity).
3118 Paul.Green@stratus.com 02-04-02. */
f337b084 3119 if (anv > FLT_MAX)
fc241834 3120 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3121 else if (anv < -FLT_MAX)
fc241834 3122 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3123 else afloat = (float) anv;
3124#else /* __VOS__ */
baf3cf9c 3125# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3126 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3127 * on Alpha; fake it if we don't have them.
3128 */
f337b084 3129 if (anv > FLT_MAX)
fc241834 3130 afloat = FLT_MAX;
f337b084 3131 else if (anv < -FLT_MAX)
fc241834 3132 afloat = -FLT_MAX;
f337b084 3133 else afloat = (float)anv;
baf3cf9c 3134# else
f337b084 3135 afloat = (float)anv;
baf3cf9c 3136# endif
f337b084 3137#endif /* __VOS__ */
1109a392 3138 DO_BO_PACK_N(afloat, float);
f337b084 3139 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3140 }
3141 break;
3142 case 'd':
a6ec74c1 3143 while (len-- > 0) {
f337b084
TH
3144 double adouble;
3145 NV anv;
a6ec74c1 3146 fromstr = NEXTFROM;
f337b084 3147 anv = SvNV(fromstr);
5cdb9e01 3148#ifdef __VOS__
f337b084 3149 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3150 during conversion from long double to double into infinity,
3151 so we do it by hand. This code should either be generalized
3152 for any OS that needs it, or removed if and when VOS
3153 implements posix-976 (suggestion to support mapping to
3154 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3155 if (anv > DBL_MAX)
fc241834 3156 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3157 else if (anv < -DBL_MAX)
fc241834 3158 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3159 else adouble = (double) anv;
3160#else /* __VOS__ */
baf3cf9c 3161# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3162 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3163 * on Alpha; fake it if we don't have them.
3164 */
f337b084 3165 if (anv > DBL_MAX)
fc241834 3166 adouble = DBL_MAX;
f337b084 3167 else if (anv < -DBL_MAX)
fc241834 3168 adouble = -DBL_MAX;
f337b084 3169 else adouble = (double)anv;
baf3cf9c 3170# else
f337b084 3171 adouble = (double)anv;
baf3cf9c 3172# endif
f337b084 3173#endif /* __VOS__ */
1109a392 3174 DO_BO_PACK_N(adouble, double);
f337b084 3175 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3176 }
3177 break;
fc241834
RGS
3178 case 'F': {
3179 NV anv;
1109a392 3180 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3181 while (len-- > 0) {
3182 fromstr = NEXTFROM;
3183 anv = SvNV(fromstr);
1109a392 3184 DO_BO_PACK_N(anv, NV);
fc241834 3185 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3186 }
3187 break;
fc241834 3188 }
92d41999 3189#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3190 case 'D': {
3191 long double aldouble;
1109a392
MHM
3192 /* long doubles can have unused bits, which may be nonzero */
3193 Zero(&aldouble, 1, long double);
92d41999
JH
3194 while (len-- > 0) {
3195 fromstr = NEXTFROM;
3196 aldouble = (long double)SvNV(fromstr);
1109a392 3197 DO_BO_PACK_N(aldouble, long double);
fc241834 3198 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3199 }
3200 break;
fc241834 3201 }
92d41999 3202#endif
7212898e 3203#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3204 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3205#endif
a6ec74c1
JH
3206 case 'n':
3207 while (len-- > 0) {
f337b084 3208 I16 ai16;
a6ec74c1 3209 fromstr = NEXTFROM;
ef108786 3210 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3211#ifdef HAS_HTONS
ef108786 3212 ai16 = PerlSock_htons(ai16);
a6ec74c1 3213#endif
f337b084 3214 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3215 }
3216 break;
7212898e 3217#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3218 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3219#endif
a6ec74c1
JH
3220 case 'v':
3221 while (len-- > 0) {
f337b084 3222 I16 ai16;
a6ec74c1 3223 fromstr = NEXTFROM;
ef108786 3224 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3225#ifdef HAS_HTOVS
ef108786 3226 ai16 = htovs(ai16);
a6ec74c1 3227#endif
f337b084 3228 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3229 }
3230 break;
49704364 3231 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3232#if SHORTSIZE != SIZE16
fc241834 3233 while (len-- > 0) {
f337b084 3234 unsigned short aushort;
fc241834
RGS
3235 fromstr = NEXTFROM;
3236 aushort = SvUV(fromstr);
3237 DO_BO_PACK(aushort, s);
f337b084 3238 PUSH_VAR(utf8, cur, aushort);
fc241834 3239 }
49704364
WL
3240 break;
3241#else
3242 /* Fall through! */
a6ec74c1 3243#endif
49704364 3244 case 'S':
fc241834 3245 while (len-- > 0) {
f337b084 3246 U16 au16;
fc241834
RGS
3247 fromstr = NEXTFROM;
3248 au16 = (U16)SvUV(fromstr);
3249 DO_BO_PACK(au16, 16);
f337b084 3250 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3251 }
3252 break;
49704364 3253 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3254#if SHORTSIZE != SIZE16
fc241834 3255 while (len-- > 0) {
f337b084 3256 short ashort;
fc241834
RGS
3257 fromstr = NEXTFROM;
3258 ashort = SvIV(fromstr);
3259 DO_BO_PACK(ashort, s);
f337b084 3260 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3261 }
49704364
WL
3262 break;
3263#else
3264 /* Fall through! */
a6ec74c1 3265#endif
49704364
WL
3266 case 's':
3267 while (len-- > 0) {
f337b084 3268 I16 ai16;
49704364 3269 fromstr = NEXTFROM;
ef108786
MHM
3270 ai16 = (I16)SvIV(fromstr);
3271 DO_BO_PACK(ai16, 16);
f337b084 3272 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3273 }
3274 break;
3275 case 'I':
49704364 3276 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3277 while (len-- > 0) {
f337b084 3278 unsigned int auint;
a6ec74c1
JH
3279 fromstr = NEXTFROM;
3280 auint = SvUV(fromstr);
1109a392 3281 DO_BO_PACK(auint, i);
f337b084 3282 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3283 }
3284 break;
92d41999
JH
3285 case 'j':
3286 while (len-- > 0) {
f337b084 3287 IV aiv;
92d41999
JH
3288 fromstr = NEXTFROM;
3289 aiv = SvIV(fromstr);
1109a392
MHM
3290#if IVSIZE == INTSIZE
3291 DO_BO_PACK(aiv, i);
3292#elif IVSIZE == LONGSIZE
3293 DO_BO_PACK(aiv, l);
3294#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3295 DO_BO_PACK(aiv, 64);
f337b084
TH
3296#else
3297 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3298#endif
f337b084 3299 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3300 }
3301 break;
3302 case 'J':
3303 while (len-- > 0) {
f337b084 3304 UV auv;
92d41999
JH
3305 fromstr = NEXTFROM;
3306 auv = SvUV(fromstr);
1109a392
MHM
3307#if UVSIZE == INTSIZE
3308 DO_BO_PACK(auv, i);
3309#elif UVSIZE == LONGSIZE
3310 DO_BO_PACK(auv, l);
3311#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3312 DO_BO_PACK(auv, 64);
f337b084
TH
3313#else
3314 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3315#endif
f337b084 3316 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3317 }
3318 break;
a6ec74c1
JH
3319 case 'w':
3320 while (len-- > 0) {
f337b084 3321 NV anv;
a6ec74c1 3322 fromstr = NEXTFROM;
15e9f109 3323 anv = SvNV(fromstr);
a6ec74c1 3324
f337b084
TH
3325 if (anv < 0) {
3326 *cur = '\0';
b162af07 3327 SvCUR_set(cat, cur - start);
49704364 3328 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3329 }
a6ec74c1 3330
196b62db
NC
3331 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3332 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3333 any negative IVs will have already been got by the croak()
3334 above. IOK is untrue for fractions, so we test them
3335 against UV_MAX_P1. */
f337b084
TH
3336 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3337 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3338 char *in = buf + sizeof(buf);
196b62db 3339 UV auv = SvUV(fromstr);
a6ec74c1
JH
3340
3341 do {
eb160463 3342 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3343 auv >>= 7;
3344 } while (auv);
3345 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3346 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3347 in, (buf + sizeof(buf)) - in);
3348 } else if (SvPOKp(fromstr))
3349 goto w_string;
a6ec74c1 3350 else if (SvNOKp(fromstr)) {
0258719b
NC
3351 /* 10**NV_MAX_10_EXP is the largest power of 10
3352 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3353 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3354 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3355 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3356 Some C compilers are strict about integral constant
3357 expressions so we conservatively divide by a slightly
3358 smaller integer instead of multiplying by the exact
3359 floating-point value.
0258719b
NC
3360 */
3361#ifdef NV_MAX_10_EXP
f337b084 3362 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3363 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3364#else
f337b084 3365 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3366 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3367#endif
a6ec74c1
JH
3368 char *in = buf + sizeof(buf);
3369
8b6e33c7 3370 anv = Perl_floor(anv);
a6ec74c1 3371 do {
8b6e33c7 3372 const NV next = Perl_floor(anv / 128);
a6ec74c1 3373 if (in <= buf) /* this cannot happen ;-) */
49704364 3374 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3375 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3376 anv = next;
3377 } while (anv > 0);
a6ec74c1 3378 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3379 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3380 in, (buf + sizeof(buf)) - in);
3381 } else {
8b6e33c7
AL
3382 const char *from;
3383 char *result, *in;
735b914b
JH
3384 SV *norm;
3385 STRLEN len;
3386 bool done;
3387
f337b084 3388 w_string:
735b914b 3389 /* Copy string and check for compliance */
349d4f2f 3390 from = SvPV_const(fromstr, len);
735b914b 3391 if ((norm = is_an_int(from, len)) == NULL)
49704364 3392 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3393
a02a5408 3394 Newx(result, len, char);
735b914b
JH
3395 in = result + len;
3396 done = FALSE;
f337b084 3397 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3398 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3399 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3400 in, (result + len) - in);
735b914b
JH
3401 Safefree(result);
3402 SvREFCNT_dec(norm); /* free norm */
fc241834 3403 }
a6ec74c1
JH
3404 }
3405 break;
3406 case 'i':
49704364 3407 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3408 while (len-- > 0) {
f337b084 3409 int aint;
a6ec74c1
JH
3410 fromstr = NEXTFROM;
3411 aint = SvIV(fromstr);
1109a392 3412 DO_BO_PACK(aint, i);
f337b084 3413 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3414 }
3415 break;
7212898e 3416#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3417 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3418#endif
a6ec74c1
JH
3419 case 'N':
3420 while (len-- > 0) {
f337b084 3421 U32 au32;
a6ec74c1 3422 fromstr = NEXTFROM;
ef108786 3423 au32 = SvUV(fromstr);
a6ec74c1 3424#ifdef HAS_HTONL
ef108786 3425 au32 = PerlSock_htonl(au32);
a6ec74c1 3426#endif
f337b084 3427 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3428 }
3429 break;
7212898e 3430#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3431 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3432#endif
a6ec74c1
JH
3433 case 'V':
3434 while (len-- > 0) {
f337b084 3435 U32 au32;
a6ec74c1 3436 fromstr = NEXTFROM;
ef108786 3437 au32 = SvUV(fromstr);
a6ec74c1 3438#ifdef HAS_HTOVL
ef108786 3439 au32 = htovl(au32);
a6ec74c1 3440#endif
f337b084 3441 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3442 }
3443 break;
49704364 3444 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3445#if LONGSIZE != SIZE32
fc241834 3446 while (len-- > 0) {
f337b084 3447 unsigned long aulong;
fc241834
RGS
3448 fromstr = NEXTFROM;
3449 aulong = SvUV(fromstr);
3450 DO_BO_PACK(aulong, l);
f337b084 3451 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3452 }
49704364
WL
3453 break;
3454#else
3455 /* Fall though! */
a6ec74c1 3456#endif
49704364 3457 case 'L':
fc241834 3458 while (len-- > 0) {
f337b084 3459 U32 au32;
fc241834
RGS
3460 fromstr = NEXTFROM;
3461 au32 = SvUV(fromstr);
3462 DO_BO_PACK(au32, 32);
f337b084 3463 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3464 }
3465 break;
49704364 3466 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3467#if LONGSIZE != SIZE32
fc241834 3468 while (len-- > 0) {
f337b084 3469 long along;
fc241834
RGS
3470 fromstr = NEXTFROM;
3471 along = SvIV(fromstr);
3472 DO_BO_PACK(along, l);
f337b084 3473 PUSH_VAR(utf8, cur, along);
a6ec74c1 3474 }
49704364
WL
3475 break;
3476#else
3477 /* Fall though! */
a6ec74c1 3478#endif
49704364
WL
3479 case 'l':
3480 while (len-- > 0) {
f337b084 3481 I32 ai32;
49704364 3482 fromstr = NEXTFROM;
ef108786
MHM
3483 ai32 = SvIV(fromstr);
3484 DO_BO_PACK(ai32, 32);
f337b084 3485 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3486 }
3487 break;
3488#ifdef HAS_QUAD
3489 case 'Q':
3490 while (len-- > 0) {
f337b084 3491 Uquad_t auquad;
a6ec74c1 3492 fromstr = NEXTFROM;
f337b084 3493 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3494 DO_BO_PACK(auquad, 64);
f337b084 3495 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3496 }
3497 break;
3498 case 'q':
3499 while (len-- > 0) {
f337b084 3500 Quad_t aquad;
a6ec74c1
JH
3501 fromstr = NEXTFROM;
3502 aquad = (Quad_t)SvIV(fromstr);
1109a392 3503 DO_BO_PACK(aquad, 64);
f337b084 3504 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3505 }
3506 break;
f337b084 3507#endif /* HAS_QUAD */
a6ec74c1
JH
3508 case 'P':
3509 len = 1; /* assume SV is correct length */
f337b084 3510 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3511 /* Fall through! */
a6ec74c1
JH
3512 case 'p':
3513 while (len-- > 0) {
83003860 3514 const char *aptr;
f337b084 3515
a6ec74c1 3516 fromstr = NEXTFROM;
28a4f200
TH
3517 SvGETMAGIC(fromstr);
3518 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3519 else {
3520 STRLEN n_a;
3521 /* XXX better yet, could spirit away the string to
3522 * a safe spot and hang on to it until the result
3523 * of pack() (and all copies of the result) are
3524 * gone.
3525 */
041457d9
DM
3526 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3527 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3528 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3529 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3530 }
3531 if (SvPOK(fromstr) || SvNIOK(fromstr))
83003860 3532 aptr = SvPV_nomg_const(fromstr, n_a);
a6ec74c1 3533 else
28a4f200 3534 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3535 }
07409e01 3536 DO_BO_PACK_PC(aptr);
f337b084 3537 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3538 }
3539 break;
fc241834 3540 case 'u': {
f7fe979e 3541 const char *aptr, *aend;
fc241834 3542 bool from_utf8;
f337b084 3543
a6ec74c1 3544 fromstr = NEXTFROM;
fc241834
RGS
3545 if (len <= 2) len = 45;
3546 else len = len / 3 * 3;
3547 if (len >= 64) {
3548 Perl_warner(aTHX_ packWARN(WARN_PACK),
3549 "Field too wide in 'u' format in pack");
3550 len = 63;
3551 }
83003860 3552 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3553 from_utf8 = DO_UTF8(fromstr);
3554 if (from_utf8) {
3555 aend = aptr + fromlen;
3556 fromlen = sv_len_utf8(fromstr);
3557 } else aend = NULL; /* Unused, but keep compilers happy */
3558 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3559 while (fromlen > 0) {
fc241834 3560 U8 *end;
a6ec74c1 3561 I32 todo;
fc241834 3562 U8 hunk[1+63/3*4+1];
a6ec74c1 3563
eb160463 3564 if ((I32)fromlen > len)
a6ec74c1
JH
3565 todo = len;
3566 else
3567 todo = fromlen;
fc241834
RGS
3568 if (from_utf8) {
3569 char buffer[64];
3570 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3571 'u' | TYPE_IS_PACK)) {
3572 *cur = '\0';
b162af07 3573 SvCUR_set(cat, cur - start);
fc241834
RGS
3574 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3575 }
3576 end = doencodes(hunk, buffer, todo);
3577 } else {
3578 end = doencodes(hunk, aptr, todo);
3579 aptr += todo;
3580 }
3581 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3582 fromlen -= todo;
3583 }
a6ec74c1
JH
3584 break;
3585 }
f337b084
TH
3586 }
3587 *cur = '\0';
b162af07 3588 SvCUR_set(cat, cur - start);
f337b084 3589 no_change:
49704364 3590 *symptr = lookahead;
a6ec74c1 3591 }
49704364 3592 return beglist;
18529408
IZ
3593}
3594#undef NEXTFROM
3595
3596
3597PP(pp_pack)
3598{
3599 dSP; dMARK; dORIGMARK; dTARGET;
3600 register SV *cat = TARG;
3601 STRLEN fromlen;
349d4f2f
NC
3602 SV *pat_sv = *++MARK;
3603 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3604 register const char *patend = pat + fromlen;
18529408
IZ
3605
3606 MARK++;
3607 sv_setpvn(cat, "", 0);
f337b084 3608 SvUTF8_off(cat);
18529408 3609
7accc089 3610 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3611
a6ec74c1
JH
3612 SvSETMAGIC(cat);
3613 SP = ORIGMARK;
3614 PUSHs(cat);
3615 RETURN;
3616}
a6ec74c1 3617
73cb7263
NC
3618/*
3619 * Local variables:
3620 * c-indentation-style: bsd
3621 * c-basic-offset: 4
3622 * indent-tabs-mode: t
3623 * End:
3624 *
37442d52
RGS
3625 * ex: set ts=8 sts=4 sw=4 noet:
3626 */