This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
In Perl_refcounted_he_fetch_pvn(), eliminate nested ? : ternary operators.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
WL
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
78d46eaa
NC
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
78d46eaa
NC
400#if 'J'-'I' == 1
401/* ASCII */
f43a9a31 402STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
1651fc44 409 /* C */ sizeof(unsigned char),
80a13697 410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 411 /* D */ LONG_DOUBLESIZE,
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* Q */ sizeof(Uquad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 450#if defined(HAS_QUAD)
a7a3cfaa 451 /* q */ sizeof(Quad_t),
80a13697 452#else
a7a3cfaa 453 0,
80a13697 454#endif
a7a3cfaa
TH
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
7212898e 479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 480 /* N */ SIZE32,
7212898e 481#else
a7a3cfaa 482 0,
7212898e 483#endif
a7a3cfaa
TH
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
7212898e 487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 488 /* V */ SIZE32,
7212898e 489#else
a7a3cfaa 490 0,
7212898e 491#endif
a7a3cfaa
TH
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
7212898e 498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 499 /* n */ SIZE16,
7212898e 500#else
a7a3cfaa 501 0,
7212898e 502#endif
a7a3cfaa
TH
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
7212898e 506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 507 /* v */ SIZE16,
7212898e 508#else
a7a3cfaa 509 0,
7212898e 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
520};
521#else
522/* EBCDIC (or bust) */
f43a9a31 523STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 548#if defined(HAS_QUAD)
a7a3cfaa 549 /* q */ sizeof(Quad_t),
80a13697 550#else
a7a3cfaa 551 0,
80a13697 552#endif
a7a3cfaa
TH
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 560 /* C */ sizeof(unsigned char),
80a13697 561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 562 /* D */ LONG_DOUBLESIZE,
80a13697 563#else
a7a3cfaa 564 0,
80a13697 565#endif
a7a3cfaa
TH
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
80a13697 577#if defined(HAS_QUAD)
a7a3cfaa 578 /* Q */ sizeof(Uquad_t),
80a13697 579#else
a7a3cfaa 580 0,
80a13697 581#endif
a7a3cfaa
TH
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
7212898e 604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 605 /* n */ SIZE16,
7212898e 606#else
a7a3cfaa 607 0,
7212898e 608#endif
a7a3cfaa
TH
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
7212898e 612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 613 /* v */ SIZE16,
7212898e 614#else
a7a3cfaa 615 0,
7212898e 616#endif
a7a3cfaa
TH
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
7212898e 624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 625 /* N */ SIZE32,
7212898e 626#else
a7a3cfaa 627 0,
7212898e 628#endif
a7a3cfaa
TH
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
7212898e 632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 633 /* V */ SIZE32,
7212898e 634#else
a7a3cfaa 635 0,
7212898e 636#endif
a7a3cfaa
TH
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
639};
640#endif
641
08ca2aa3 642STATIC U8
f7fe979e 643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 644{
08ca2aa3 645 STRLEN retlen;
0bcc34c2 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 648 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 654 if (val >= 0x100) {
a2a5de95
NC
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
658 val &= 0xff;
659 }
660 *s += retlen;
fe2774ed 661 return (U8)val;
08ca2aa3
TH
662}
663
f337b084
TH
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
666 *(U8 *)(s)++)
667
668STATIC bool
f7fe979e 669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
670{
671 UV val;
672 STRLEN retlen;
f7fe979e 673 const char *from = *s;
08ca2aa3 674 int bad = 0;
f7fe979e 675 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
f337b084 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
fe2774ed 688 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
f7fe979e 694 const char *ptr;
9e27e96a 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
f337b084 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
699 }
700 if (from > end) from = end;
701 }
a2a5de95
NC
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 704 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
f7fe979e 714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 715{
97aff369 716 dVAR;
08ca2aa3 717 STRLEN retlen;
0bcc34c2 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
f337b084 725 *s += retlen;
08ca2aa3
TH
726 return TRUE;
727}
78d46eaa 728
64844641 729STATIC char *
14333449 730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 731 const U8 * const end = start + len;
64844641 732
7918f24d
NC
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
f337b084 735 while (start < end) {
48fa4626
TS
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
742 }
743 start++;
744 }
64844641 745 return dest;
f337b084
TH
746}
747
230e1fce
NC
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
64844641
AL
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
f337b084
TH
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
3473cf63 761 if (utf8) glen *= UTF8_EXPAND; \
f337b084 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 763 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
f7fe979e 770 const STRLEN glen = (in_len); \
f337b084 771 STRLEN gl = glen; \
3473cf63 772 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
b162af07 775 SvCUR_set((cat), (cur) - (start)); \
0bd48802 776 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
f7fe979e 785 const U8 au8 = (byte); \
64844641 786 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
f7fe979e
AL
803static const char *_action( const tempsym_t* symptr )
804{
10edeb5d 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
806}
807
206947d2 808/* Returns the sizeof() struct described by pat */
028d1f6d 809STATIC I32
f337b084 810S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 811{
f337b084 812 I32 total = 0;
206947d2 813
7918f24d
NC
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
49704364 816 while (next_symbol(symptr)) {
f337b084 817 I32 len;
f7fe979e 818 int size;
f337b084
TH
819
820 switch (symptr->howlen) {
fc241834 821 case e_star:
49704364 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 823 _action( symptr ) );
49704364 824 break;
f337b084
TH
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
49704364
WL
829 }
830
a7a3cfaa 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 832 if (!size) {
f7fe979e 833 int star;
80a13697
NC
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 839 _action( symptr ) );
28be1210
TH
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
80a13697 844 case '@':
28be1210 845 case '.':
80a13697
NC
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 851 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 852 _action( symptr ) );
80a13697
NC
853 case '%':
854 size = 0;
855 break;
856 case '(':
fc241834
RGS
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
80a13697
NC
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
f7fe979e 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
80a13697
NC
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
78d46eaa 906
80a13697
NC
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
78d46eaa
NC
910 break;
911 }
206947d2
IZ
912 }
913 total += len * size;
914 }
915 return total;
916}
917
49704364
WL
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
f7fe979e
AL
922STATIC const char *
923S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 924{
7918f24d
NC
925 PERL_ARGS_ASSERT_GROUP_END;
926
49704364 927 while (patptr < patend) {
f7fe979e 928 const char c = *patptr++;
49704364
WL
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
18529408 942 }
49704364
WL
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
18529408
IZ
946}
947
49704364
WL
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
fc241834 952 */
f7fe979e
AL
953STATIC const char *
954S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
955{
956 I32 len = *patptr++ - '0';
7918f24d
NC
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
49704364
WL
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
f337b084 973S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 974{
f7fe979e 975 const char* patptr = symptr->patptr;
0bcc34c2 976 const char* const patend = symptr->patend;
49704364 977
7918f24d
NC
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
49704364
WL
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
fc241834 992 /* We should have found a template code */
49704364 993 I32 code = *patptr++ & 0xFF;
66c611c5 994 U32 inherited_modifiers = 0;
49704364
WL
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 1000 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
1001 }
1002 continue;
1003 }
fc241834 1004
49704364 1005 /* for '(', skip to ')' */
fc241834 1006 if (code == '(') {
49704364
WL
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 1009 _action( symptr ) );
49704364
WL
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 1014 _action( symptr ) );
49704364
WL
1015 }
1016
66c611c5
MHM
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1109a392
MHM
1023 /* look for modifiers */
1024 while (patptr < patend) {
b7787f18
AL
1025 const char *allowed;
1026 I32 modifier;
1109a392
MHM
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
7212898e 1030 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1031 break;
7212898e 1032#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1040 break;
f337b084 1041#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1042 default:
b7787f18
AL
1043 allowed = "";
1044 modifier = 0;
1109a392
MHM
1045 break;
1046 }
66c611c5 1047
1109a392
MHM
1048 if (modifier == 0)
1049 break;
66c611c5 1050
1109a392
MHM
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1053 allowed, _action( symptr ) );
66c611c5
MHM
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1061 *patptr, _action( symptr ) );
66c611c5 1062
a2a5de95
NC
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1109a392 1068 }
66c611c5 1069
1109a392
MHM
1070 code |= modifier;
1071 patptr++;
49704364
WL
1072 }
1073
66c611c5
MHM
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
fc241834 1077 /* look for count and/or / */
49704364
WL
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
f7fe979e 1088 const char* lenptr = ++patptr;
49704364
WL
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1096 _action( symptr ) );
49704364
WL
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
66c611c5 1120 if (*patptr == '/') {
49704364
WL
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
66c611c5
MHM
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1126 _action( symptr ) );
49704364
WL
1127 }
1128 break;
1129 }
18529408 1130 }
49704364
WL
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
fc241834 1138 symptr->patptr = patptr;
49704364 1139 return TRUE;
18529408 1140 }
49704364 1141 }
fc241834 1142 symptr->patptr = patptr;
49704364 1143 return FALSE;
18529408
IZ
1144}
1145
18529408 1146/*
fc241834 1147 There is no way to cleanly handle the case where we should process the
08ca2aa3 1148 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
fc241834 1155STATIC bool
08ca2aa3
TH
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
7918f24d
NC
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
08ca2aa3
TH
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
f7fe979e 1165 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
08ca2aa3
TH
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
f7fe979e 1182 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
7accc089
JH
1190=for apidoc unpackstring
1191
608d3aed
WL
1192The engine implementing unpack() Perl function. C<unpackstring> puts the
1193extracted list items on the stack and returns the number of elements.
1194Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1195
1196=cut */
1197
1198I32
f7fe979e 1199Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1200{
f7fe979e 1201 tempsym_t sym;
08ca2aa3 1202
7918f24d
NC
1203 PERL_ARGS_ASSERT_UNPACKSTRING;
1204
f337b084 1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
230e1fce 1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1211 SAVEFREEPV(s);
1212 strend = s + len;
f337b084 1213 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1214 }
1215
f337b084
TH
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1218
f7fe979e 1219 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1220
1221 return unpack_rec(&sym, s, s, strend, NULL );
1222}
1223
4136a0f7 1224STATIC I32
f7fe979e 1225S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1226{
27da23d5 1227 dVAR; dSP;
3297d27d 1228 SV *sv = NULL;
f7fe979e 1229 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1230 howlen_t howlen;
a6ec74c1 1231 I32 checksum = 0;
92d41999 1232 UV cuv = 0;
a6ec74c1 1233 NV cdouble = 0.0;
f337b084 1234 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1235 bool beyond = FALSE;
21c16052 1236 bool explicit_length;
9e27e96a 1237 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1238 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1239
1240 PERL_ARGS_ASSERT_UNPACK_REC;
1241
28be1210 1242 symptr->strbeg = s - strbeg;
49704364 1243
49704364 1244 while (next_symbol(symptr)) {
a7a3cfaa 1245 packprops_t props;
9e27e96a 1246 I32 len;
f337b084 1247 I32 datumtype = symptr->code;
206947d2 1248 /* do first one only unless in list context
08ca2aa3 1249 / is implemented by unpacking the count, then popping it from the
206947d2 1250 stack, so must check that we're not in the middle of a / */
49704364 1251 if ( unpack_only_one
206947d2 1252 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1253 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1254 break;
49704364 1255
f337b084 1256 switch (howlen = symptr->howlen) {
fc241834
RGS
1257 case e_star:
1258 len = strend - strbeg; /* long enough */
49704364 1259 break;
f337b084
TH
1260 default:
1261 /* e_no_len and e_number */
1262 len = symptr->length;
1263 break;
49704364 1264 }
18529408 1265
21c16052 1266 explicit_length = TRUE;
a6ec74c1 1267 redo_switch:
49704364 1268 beyond = s >= strend;
a7a3cfaa
TH
1269
1270 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1271 if (props) {
1272 /* props nonzero means we can process this letter. */
9e27e96a
AL
1273 const long size = props & PACK_SIZE_MASK;
1274 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1275 if (len > howmany)
1276 len = howmany;
1277
1278 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
78d46eaa
NC
1282 }
1283 }
a7a3cfaa 1284
1109a392 1285 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1286 default:
1109a392 1287 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1288
a6ec74c1 1289 case '%':
49704364 1290 if (howlen == e_no_len)
18529408 1291 len = 16; /* len is not specified */
a6ec74c1 1292 checksum = len;
92d41999 1293 cuv = 0;
a6ec74c1 1294 cdouble = 0;
18529408 1295 continue;
a6ec74c1 1296 break;
18529408
IZ
1297 case '(':
1298 {
49704364 1299 tempsym_t savsym = *symptr;
9e27e96a 1300 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1301 symptr->flags |= group_modifiers;
49704364 1302 symptr->patend = savsym.grpend;
28be1210 1303 symptr->previous = &savsym;
49704364 1304 symptr->level++;
18529408 1305 PUTBACK;
c6f750d1 1306 if (len && unpack_only_one) len = 1;
18529408 1307 while (len--) {
49704364 1308 symptr->patptr = savsym.grpbeg;
f337b084
TH
1309 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1310 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1311 unpack_rec(symptr, s, strbeg, strend, &s);
1312 if (s == strend && savsym.howlen == e_star)
49704364 1313 break; /* No way to continue */
18529408
IZ
1314 }
1315 SPAGAIN;
28be1210 1316 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1317 *symptr = savsym;
18529408
IZ
1318 break;
1319 }
28be1210
TH
1320#ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 case '.' | TYPE_IS_SHRIEKING:
1322#endif
1323 case '.': {
9e27e96a 1324 const char *from;
28be1210
TH
1325 SV *sv;
1326#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1327 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1328#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1329 const bool u8 = utf8;
28be1210
TH
1330#endif
1331 if (howlen == e_star) from = strbeg;
1332 else if (len <= 0) from = s;
1333 else {
1334 tempsym_t *group = symptr;
1335
1336 while (--len && group) group = group->previous;
1337 from = group ? strbeg + group->strbeg : strbeg;
1338 }
1339 sv = from <= s ?
00646304
CB
1340 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1342 mXPUSHs(sv);
28be1210
TH
1343 break;
1344 }
1345#ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 case '@' | TYPE_IS_SHRIEKING:
1347#endif
a6ec74c1 1348 case '@':
28be1210
TH
1349 s = strbeg + symptr->strbeg;
1350#ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1352#else /* PERL_PACK_CAN_SHRIEKSIGN */
1353 if (utf8)
1354#endif
1355 {
08ca2aa3
TH
1356 while (len > 0) {
1357 if (s >= strend)
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 s += UTF8SKIP(s);
1360 len--;
1361 }
1362 if (s > strend)
1363 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1364 } else {
28be1210 1365 if (strend-s < len)
fc241834 1366 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1367 s += len;
08ca2aa3 1368 }
a6ec74c1 1369 break;
62f95557
IZ
1370 case 'X' | TYPE_IS_SHRIEKING:
1371 if (!len) /* Avoid division by 0 */
1372 len = 1;
08ca2aa3 1373 if (utf8) {
f7fe979e 1374 const char *hop, *last;
f337b084
TH
1375 I32 l = len;
1376 hop = last = strbeg;
1377 while (hop < s) {
1378 hop += UTF8SKIP(hop);
1379 if (--l == 0) {
08ca2aa3 1380 last = hop;
f337b084
TH
1381 l = len;
1382 }
fc241834 1383 }
f337b084
TH
1384 if (last > s)
1385 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1386 s = last;
1387 break;
f337b084
TH
1388 }
1389 len = (s - strbeg) % len;
62f95557 1390 /* FALL THROUGH */
a6ec74c1 1391 case 'X':
08ca2aa3
TH
1392 if (utf8) {
1393 while (len > 0) {
1394 if (s <= strbeg)
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1396 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1397 if (s <= strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack");
1399 }
1400 len--;
1401 }
1402 } else {
fc241834
RGS
1403 if (len > s - strbeg)
1404 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1405 s -= len;
08ca2aa3 1406 }
a6ec74c1 1407 break;
9e27e96a
AL
1408 case 'x' | TYPE_IS_SHRIEKING: {
1409 I32 ai32;
62f95557
IZ
1410 if (!len) /* Avoid division by 0 */
1411 len = 1;
230e1fce
NC
1412 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1414 if (ai32 == 0) break;
1415 len -= ai32;
9e27e96a 1416 }
62f95557 1417 /* FALL THROUGH */
a6ec74c1 1418 case 'x':
08ca2aa3
TH
1419 if (utf8) {
1420 while (len>0) {
1421 if (s >= strend)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 s += UTF8SKIP(s);
1424 len--;
1425 }
1426 } else {
fc241834
RGS
1427 if (len > strend - s)
1428 Perl_croak(aTHX_ "'x' outside of string in unpack");
1429 s += len;
f337b084 1430 }
a6ec74c1
JH
1431 break;
1432 case '/':
49704364
WL
1433 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1434 break;
a6ec74c1
JH
1435 case 'A':
1436 case 'Z':
1437 case 'a':
08ca2aa3
TH
1438 if (checksum) {
1439 /* Preliminary length estimate is assumed done in 'W' */
1440 if (len > strend - s) len = strend - s;
1441 goto W_checksum;
1442 }
1443 if (utf8) {
1444 I32 l;
f7fe979e 1445 const char *hop;
08ca2aa3
TH
1446 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 if (hop >= strend) {
1448 if (hop > strend)
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 break;
fc241834 1451 }
a6ec74c1 1452 }
08ca2aa3
TH
1453 if (hop > strend)
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1455 len = hop - s;
1456 } else if (len > strend - s)
1457 len = strend - s;
1458
1459 if (datumtype == 'Z') {
1460 /* 'Z' strips stuff after first null */
f7fe979e 1461 const char *ptr, *end;
f337b084
TH
1462 end = s + len;
1463 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1464 sv = newSVpvn(s, ptr-s);
1465 if (howlen == e_star) /* exact for 'Z*' */
1466 len = ptr-s + (ptr != strend ? 1 : 0);
1467 } else if (datumtype == 'A') {
1468 /* 'A' strips both nulls and spaces */
f7fe979e 1469 const char *ptr;
18bdf90a
TH
1470 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1473 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1474 if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 else ptr++;
28be1210 1476 if (ptr > s+len)
18bdf90a
TH
1477 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1478 } else {
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 ptr++;
1482 }
08ca2aa3
TH
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1485
1486 if (utf8) {
1487 SvUTF8_on(sv);
1488 /* Undo any upgrade done due to need_utf8() */
f337b084 1489 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1490 sv_utf8_downgrade(sv, 0);
a6ec74c1 1491 }
6e449a3a 1492 mXPUSHs(sv);
08ca2aa3 1493 s += len;
a6ec74c1
JH
1494 break;
1495 case 'B':
08ca2aa3
TH
1496 case 'b': {
1497 char *str;
49704364 1498 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1499 len = (strend - s) * 8;
1500 if (checksum) {
f337b084 1501 if (utf8)
08ca2aa3 1502 while (len >= 8 && s < strend) {
f337b084 1503 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1504 len -= 8;
1505 }
f337b084 1506 else
fc241834 1507 while (len >= 8) {
08ca2aa3 1508 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1509 len -= 8;
1510 }
08ca2aa3
TH
1511 if (len && s < strend) {
1512 U8 bits;
f337b084
TH
1513 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 if (datumtype == 'b')
a6ec74c1 1515 while (len-- > 0) {
92d41999 1516 if (bits & 1) cuv++;
a6ec74c1
JH
1517 bits >>= 1;
1518 }
f337b084 1519 else
a6ec74c1 1520 while (len-- > 0) {
08ca2aa3 1521 if (bits & 0x80) cuv++;
a6ec74c1
JH
1522 bits <<= 1;
1523 }
fc241834 1524 }
a6ec74c1
JH
1525 break;
1526 }
08ca2aa3 1527
561b68a9 1528 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1529 SvPOK_on(sv);
1530 str = SvPVX(sv);
1531 if (datumtype == 'b') {
f337b084 1532 U8 bits = 0;
f7fe979e 1533 const I32 ai32 = len;
08ca2aa3
TH
1534 for (len = 0; len < ai32; len++) {
1535 if (len & 7) bits >>= 1;
1536 else if (utf8) {
1537 if (s >= strend) break;
f337b084 1538 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1539 } else bits = *(U8 *) s++;
1540 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1541 }
08ca2aa3 1542 } else {
f337b084 1543 U8 bits = 0;
f7fe979e 1544 const I32 ai32 = len;
08ca2aa3
TH
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits <<= 1;
1547 else if (utf8) {
1548 if (s >= strend) break;
f337b084 1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1552 }
1553 }
1554 *str = '\0';
aa07b2f6 1555 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1556 XPUSHs(sv);
a6ec74c1 1557 break;
08ca2aa3 1558 }
a6ec74c1 1559 case 'H':
08ca2aa3 1560 case 'h': {
3297d27d 1561 char *str = NULL;
fc241834 1562 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1563 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1564 len = (strend - s) * 2;
858fe5e1
TC
1565 if (!checksum) {
1566 sv = sv_2mortal(newSV(len ? len : 1));
1567 SvPOK_on(sv);
1568 str = SvPVX(sv);
1569 }
a6ec74c1 1570 if (datumtype == 'h') {
f337b084 1571 U8 bits = 0;
9e27e96a 1572 I32 ai32 = len;
fc241834
RGS
1573 for (len = 0; len < ai32; len++) {
1574 if (len & 1) bits >>= 4;
1575 else if (utf8) {
1576 if (s >= strend) break;
f337b084 1577 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1578 } else bits = * (U8 *) s++;
858fe5e1
TC
1579 if (!checksum)
1580 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1581 }
08ca2aa3 1582 } else {
f337b084 1583 U8 bits = 0;
f7fe979e 1584 const I32 ai32 = len;
08ca2aa3
TH
1585 for (len = 0; len < ai32; len++) {
1586 if (len & 1) bits <<= 4;
1587 else if (utf8) {
1588 if (s >= strend) break;
f337b084 1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1590 } else bits = *(U8 *) s++;
858fe5e1
TC
1591 if (!checksum)
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1593 }
1594 }
858fe5e1
TC
1595 if (!checksum) {
1596 *str = '\0';
1597 SvCUR_set(sv, str - SvPVX_const(sv));
1598 XPUSHs(sv);
1599 }
a6ec74c1 1600 break;
08ca2aa3 1601 }
1651fc44
ML
1602 case 'C':
1603 if (len == 0) {
1604 if (explicit_length)
1605 /* Switch to "character" mode */
1606 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1607 break;
1608 }
1609 /* FALL THROUGH */
a6ec74c1 1610 case 'c':
1651fc44
ML
1611 while (len-- > 0 && s < strend) {
1612 int aint;
1613 if (utf8)
1614 {
1615 STRLEN retlen;
1616 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1617 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1618 if (retlen == (STRLEN) -1 || retlen == 0)
1619 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1620 s += retlen;
1621 }
1622 else
1623 aint = *(U8 *)(s)++;
1624 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1625 aint -= 256;
08ca2aa3 1626 if (!checksum)
6e449a3a 1627 mPUSHi(aint);
73cb7263
NC
1628 else if (checksum > bits_in_uv)
1629 cdouble += (NV)aint;
1630 else
1631 cuv += aint;
a6ec74c1
JH
1632 }
1633 break;
08ca2aa3
TH
1634 case 'W':
1635 W_checksum:
1651fc44 1636 if (utf8) {
08ca2aa3 1637 while (len-- > 0 && s < strend) {
08ca2aa3 1638 STRLEN retlen;
f7fe979e 1639 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1640 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1641 if (retlen == (STRLEN) -1 || retlen == 0)
1642 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1643 s += retlen;
1644 if (!checksum)
6e449a3a 1645 mPUSHu(val);
08ca2aa3
TH
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV) val;
d6d3e8bd 1648 else
08ca2aa3 1649 cuv += val;
fc241834 1650 }
08ca2aa3 1651 } else if (!checksum)
a6ec74c1 1652 while (len-- > 0) {
f7fe979e 1653 const U8 ch = *(U8 *) s++;
6e449a3a 1654 mPUSHu(ch);
a6ec74c1 1655 }
08ca2aa3
TH
1656 else if (checksum > bits_in_uv)
1657 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1658 else
1659 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1660 break;
1661 case 'U':
35bcd338 1662 if (len == 0) {
c5333953 1663 if (explicit_length && howlen != e_star) {
08ca2aa3 1664 /* Switch to "bytes in UTF-8" mode */
f337b084 1665 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1666 else
1667 /* Should be impossible due to the need_utf8() test */
1668 Perl_croak(aTHX_ "U0 mode on a byte string");
1669 }
35bcd338
JH
1670 break;
1671 }
08ca2aa3 1672 if (len > strend - s) len = strend - s;
fc241834 1673 if (!checksum) {
08ca2aa3
TH
1674 if (len && unpack_only_one) len = 1;
1675 EXTEND(SP, len);
1676 EXTEND_MORTAL(len);
fc241834 1677 }
08ca2aa3
TH
1678 while (len-- > 0 && s < strend) {
1679 STRLEN retlen;
1680 UV auv;
1681 if (utf8) {
1682 U8 result[UTF8_MAXLEN];
f7fe979e 1683 const char *ptr = s;
08ca2aa3 1684 STRLEN len;
08ca2aa3
TH
1685 /* Bug: warns about bad utf8 even if we are short on bytes
1686 and will break out of the loop */
230e1fce
NC
1687 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1688 'U'))
08ca2aa3
TH
1689 break;
1690 len = UTF8SKIP(result);
fc241834 1691 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1692 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1693 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1694 s = ptr;
1695 } else {
1696 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1697 if (retlen == (STRLEN) -1 || retlen == 0)
1698 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1699 s += retlen;
1700 }
1701 if (!checksum)
6e449a3a 1702 mPUSHu(auv);
73cb7263 1703 else if (checksum > bits_in_uv)
08ca2aa3 1704 cdouble += (NV) auv;
73cb7263 1705 else
08ca2aa3 1706 cuv += auv;
a6ec74c1
JH
1707 }
1708 break;
49704364
WL
1709 case 's' | TYPE_IS_SHRIEKING:
1710#if SHORTSIZE != SIZE16
73cb7263 1711 while (len-- > 0) {
08ca2aa3 1712 short ashort;
f337b084
TH
1713 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1714 DO_BO_UNPACK(ashort, s);
08ca2aa3 1715 if (!checksum)
6e449a3a 1716 mPUSHi(ashort);
73cb7263
NC
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)ashort;
1719 else
1720 cuv += ashort;
49704364
WL
1721 }
1722 break;
1723#else
1724 /* Fallthrough! */
a6ec74c1 1725#endif
49704364 1726 case 's':
73cb7263 1727 while (len-- > 0) {
08ca2aa3
TH
1728 I16 ai16;
1729
1730#if U16SIZE > SIZE16
1731 ai16 = 0;
1732#endif
f337b084 1733 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1734 DO_BO_UNPACK(ai16, 16);
1109a392 1735#if U16SIZE > SIZE16
73cb7263
NC
1736 if (ai16 > 32767)
1737 ai16 -= 65536;
a6ec74c1 1738#endif
08ca2aa3 1739 if (!checksum)
6e449a3a 1740 mPUSHi(ai16);
73cb7263
NC
1741 else if (checksum > bits_in_uv)
1742 cdouble += (NV)ai16;
1743 else
1744 cuv += ai16;
a6ec74c1
JH
1745 }
1746 break;
49704364
WL
1747 case 'S' | TYPE_IS_SHRIEKING:
1748#if SHORTSIZE != SIZE16
73cb7263 1749 while (len-- > 0) {
08ca2aa3 1750 unsigned short aushort;
f337b084
TH
1751 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1752 DO_BO_UNPACK(aushort, s);
08ca2aa3 1753 if (!checksum)
6e449a3a 1754 mPUSHu(aushort);
73cb7263
NC
1755 else if (checksum > bits_in_uv)
1756 cdouble += (NV)aushort;
1757 else
1758 cuv += aushort;
49704364
WL
1759 }
1760 break;
1761#else
486ec47a 1762 /* Fallthrough! */
49704364 1763#endif
a6ec74c1
JH
1764 case 'v':
1765 case 'n':
1766 case 'S':
73cb7263 1767 while (len-- > 0) {
08ca2aa3
TH
1768 U16 au16;
1769#if U16SIZE > SIZE16
1770 au16 = 0;
1771#endif
f337b084 1772 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1773 DO_BO_UNPACK(au16, 16);
a6ec74c1 1774#ifdef HAS_NTOHS
73cb7263
NC
1775 if (datumtype == 'n')
1776 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1777#endif
1778#ifdef HAS_VTOHS
73cb7263
NC
1779 if (datumtype == 'v')
1780 au16 = vtohs(au16);
a6ec74c1 1781#endif
08ca2aa3 1782 if (!checksum)
6e449a3a 1783 mPUSHu(au16);
73cb7263 1784 else if (checksum > bits_in_uv)
f337b084 1785 cdouble += (NV) au16;
73cb7263
NC
1786 else
1787 cuv += au16;
a6ec74c1
JH
1788 }
1789 break;
7212898e 1790#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1791 case 'v' | TYPE_IS_SHRIEKING:
1792 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1793 while (len-- > 0) {
08ca2aa3
TH
1794 I16 ai16;
1795# if U16SIZE > SIZE16
1796 ai16 = 0;
1797# endif
f337b084 1798 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1799# ifdef HAS_NTOHS
73cb7263 1800 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1801 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1802# endif /* HAS_NTOHS */
1803# ifdef HAS_VTOHS
73cb7263 1804 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1805 ai16 = (I16) vtohs((U16) ai16);
1806# endif /* HAS_VTOHS */
1807 if (!checksum)
6e449a3a 1808 mPUSHi(ai16);
73cb7263 1809 else if (checksum > bits_in_uv)
08ca2aa3 1810 cdouble += (NV) ai16;
73cb7263
NC
1811 else
1812 cuv += ai16;
068bd2e7
MHM
1813 }
1814 break;
08ca2aa3 1815#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1816 case 'i':
49704364 1817 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1818 while (len-- > 0) {
08ca2aa3 1819 int aint;
f337b084
TH
1820 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1821 DO_BO_UNPACK(aint, i);
08ca2aa3 1822 if (!checksum)
6e449a3a 1823 mPUSHi(aint);
73cb7263
NC
1824 else if (checksum > bits_in_uv)
1825 cdouble += (NV)aint;
1826 else
1827 cuv += aint;
a6ec74c1
JH
1828 }
1829 break;
1830 case 'I':
49704364 1831 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1832 while (len-- > 0) {
08ca2aa3 1833 unsigned int auint;
f337b084
TH
1834 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1835 DO_BO_UNPACK(auint, i);
08ca2aa3 1836 if (!checksum)
6e449a3a 1837 mPUSHu(auint);
73cb7263
NC
1838 else if (checksum > bits_in_uv)
1839 cdouble += (NV)auint;
1840 else
1841 cuv += auint;
a6ec74c1
JH
1842 }
1843 break;
92d41999 1844 case 'j':
73cb7263 1845 while (len-- > 0) {
08ca2aa3 1846 IV aiv;
f337b084 1847 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1848#if IVSIZE == INTSIZE
f337b084 1849 DO_BO_UNPACK(aiv, i);
1109a392 1850#elif IVSIZE == LONGSIZE
f337b084 1851 DO_BO_UNPACK(aiv, l);
1109a392 1852#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1853 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1854#else
1855 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1856#endif
08ca2aa3 1857 if (!checksum)
6e449a3a 1858 mPUSHi(aiv);
73cb7263
NC
1859 else if (checksum > bits_in_uv)
1860 cdouble += (NV)aiv;
1861 else
1862 cuv += aiv;
92d41999
JH
1863 }
1864 break;
1865 case 'J':
73cb7263 1866 while (len-- > 0) {
08ca2aa3 1867 UV auv;
f337b084 1868 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1869#if IVSIZE == INTSIZE
f337b084 1870 DO_BO_UNPACK(auv, i);
08ca2aa3 1871#elif IVSIZE == LONGSIZE
f337b084 1872 DO_BO_UNPACK(auv, l);
08ca2aa3 1873#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1874 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1875#else
1876 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1877#endif
08ca2aa3 1878 if (!checksum)
6e449a3a 1879 mPUSHu(auv);
73cb7263
NC
1880 else if (checksum > bits_in_uv)
1881 cdouble += (NV)auv;
1882 else
1883 cuv += auv;
92d41999
JH
1884 }
1885 break;
49704364
WL
1886 case 'l' | TYPE_IS_SHRIEKING:
1887#if LONGSIZE != SIZE32
73cb7263 1888 while (len-- > 0) {
08ca2aa3 1889 long along;
f337b084
TH
1890 SHIFT_VAR(utf8, s, strend, along, datumtype);
1891 DO_BO_UNPACK(along, l);
08ca2aa3 1892 if (!checksum)
6e449a3a 1893 mPUSHi(along);
73cb7263
NC
1894 else if (checksum > bits_in_uv)
1895 cdouble += (NV)along;
1896 else
1897 cuv += along;
49704364
WL
1898 }
1899 break;
1900#else
1901 /* Fallthrough! */
a6ec74c1 1902#endif
49704364 1903 case 'l':
73cb7263 1904 while (len-- > 0) {
08ca2aa3
TH
1905 I32 ai32;
1906#if U32SIZE > SIZE32
1907 ai32 = 0;
1908#endif
f337b084 1909 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1910 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1911#if U32SIZE > SIZE32
08ca2aa3 1912 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1913#endif
08ca2aa3 1914 if (!checksum)
6e449a3a 1915 mPUSHi(ai32);
73cb7263
NC
1916 else if (checksum > bits_in_uv)
1917 cdouble += (NV)ai32;
1918 else
1919 cuv += ai32;
a6ec74c1
JH
1920 }
1921 break;
49704364
WL
1922 case 'L' | TYPE_IS_SHRIEKING:
1923#if LONGSIZE != SIZE32
73cb7263 1924 while (len-- > 0) {
08ca2aa3 1925 unsigned long aulong;
f337b084
TH
1926 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1927 DO_BO_UNPACK(aulong, l);
08ca2aa3 1928 if (!checksum)
6e449a3a 1929 mPUSHu(aulong);
73cb7263
NC
1930 else if (checksum > bits_in_uv)
1931 cdouble += (NV)aulong;
1932 else
1933 cuv += aulong;
49704364
WL
1934 }
1935 break;
1936#else
1937 /* Fall through! */
1938#endif
a6ec74c1
JH
1939 case 'V':
1940 case 'N':
1941 case 'L':
73cb7263 1942 while (len-- > 0) {
08ca2aa3
TH
1943 U32 au32;
1944#if U32SIZE > SIZE32
1945 au32 = 0;
1946#endif
f337b084 1947 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1948 DO_BO_UNPACK(au32, 32);
a6ec74c1 1949#ifdef HAS_NTOHL
73cb7263
NC
1950 if (datumtype == 'N')
1951 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1952#endif
1953#ifdef HAS_VTOHL
73cb7263
NC
1954 if (datumtype == 'V')
1955 au32 = vtohl(au32);
a6ec74c1 1956#endif
08ca2aa3 1957 if (!checksum)
6e449a3a 1958 mPUSHu(au32);
fc241834
RGS
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)au32;
1961 else
1962 cuv += au32;
a6ec74c1
JH
1963 }
1964 break;
7212898e 1965#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1966 case 'V' | TYPE_IS_SHRIEKING:
1967 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1968 while (len-- > 0) {
08ca2aa3
TH
1969 I32 ai32;
1970# if U32SIZE > SIZE32
1971 ai32 = 0;
1972# endif
f337b084 1973 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1974# ifdef HAS_NTOHL
73cb7263
NC
1975 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1976 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1977# endif
1978# ifdef HAS_VTOHL
73cb7263
NC
1979 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1980 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1981# endif
1982 if (!checksum)
6e449a3a 1983 mPUSHi(ai32);
73cb7263
NC
1984 else if (checksum > bits_in_uv)
1985 cdouble += (NV)ai32;
1986 else
1987 cuv += ai32;
068bd2e7
MHM
1988 }
1989 break;
08ca2aa3 1990#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1991 case 'p':
a6ec74c1 1992 while (len-- > 0) {
f7fe979e 1993 const char *aptr;
f337b084 1994 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1995 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1996 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1997 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1998 }
1999 break;
2000 case 'w':
a6ec74c1
JH
2001 {
2002 UV auv = 0;
2003 U32 bytes = 0;
fc241834 2004
08ca2aa3
TH
2005 while (len > 0 && s < strend) {
2006 U8 ch;
f337b084 2007 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2008 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2009 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2010 if (ch < 0x80) {
a6ec74c1 2011 bytes = 0;
6e449a3a 2012 mPUSHu(auv);
a6ec74c1
JH
2013 len--;
2014 auv = 0;
08ca2aa3 2015 continue;
a6ec74c1 2016 }
08ca2aa3 2017 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2018 const char *t;
a6ec74c1 2019
f5992bc4 2020 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2021 while (s < strend) {
f337b084 2022 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2023 sv = mul128(sv, (U8)(ch & 0x7f));
2024 if (!(ch & 0x80)) {
a6ec74c1
JH
2025 bytes = 0;
2026 break;
2027 }
2028 }
10516c54 2029 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2030 while (*t == '0')
2031 t++;
2032 sv_chop(sv, t);
6e449a3a 2033 mPUSHs(sv);
a6ec74c1
JH
2034 len--;
2035 auv = 0;
2036 }
2037 }
2038 if ((s >= strend) && bytes)
49704364 2039 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2040 }
2041 break;
2042 case 'P':
49704364
WL
2043 if (symptr->howlen == e_star)
2044 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2045 EXTEND(SP, 1);
2d3e0934 2046 if (s + sizeof(char*) <= strend) {
08ca2aa3 2047 char *aptr;
f337b084 2048 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2049 DO_BO_UNPACK_PC(aptr);
fc241834 2050 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2051 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2052 }
a6ec74c1
JH
2053 break;
2054#ifdef HAS_QUAD
2055 case 'q':
73cb7263 2056 while (len-- > 0) {
08ca2aa3 2057 Quad_t aquad;
f337b084
TH
2058 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2059 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2060 if (!checksum)
6e449a3a
MHM
2061 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2062 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2063 else if (checksum > bits_in_uv)
2064 cdouble += (NV)aquad;
2065 else
2066 cuv += aquad;
2067 }
a6ec74c1
JH
2068 break;
2069 case 'Q':
73cb7263 2070 while (len-- > 0) {
08ca2aa3 2071 Uquad_t auquad;
f337b084
TH
2072 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2073 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2074 if (!checksum)
6e449a3a
MHM
2075 mPUSHs(auquad <= UV_MAX ?
2076 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2077 else if (checksum > bits_in_uv)
2078 cdouble += (NV)auquad;
2079 else
2080 cuv += auquad;
a6ec74c1
JH
2081 }
2082 break;
08ca2aa3 2083#endif /* HAS_QUAD */
a6ec74c1
JH
2084 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2085 case 'f':
73cb7263 2086 while (len-- > 0) {
08ca2aa3 2087 float afloat;
f337b084 2088 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2089 DO_BO_UNPACK_N(afloat, float);
2090 if (!checksum)
6e449a3a 2091 mPUSHn(afloat);
08ca2aa3 2092 else
73cb7263 2093 cdouble += afloat;
fc241834 2094 }
a6ec74c1
JH
2095 break;
2096 case 'd':
73cb7263 2097 while (len-- > 0) {
08ca2aa3 2098 double adouble;
f337b084 2099 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2100 DO_BO_UNPACK_N(adouble, double);
2101 if (!checksum)
6e449a3a 2102 mPUSHn(adouble);
08ca2aa3 2103 else
73cb7263 2104 cdouble += adouble;
fc241834 2105 }
a6ec74c1 2106 break;
92d41999 2107 case 'F':
73cb7263 2108 while (len-- > 0) {
275663fa
TC
2109 NV_bytes anv;
2110 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2111 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 2112 if (!checksum)
275663fa 2113 mPUSHn(anv.nv);
08ca2aa3 2114 else
275663fa 2115 cdouble += anv.nv;
fc241834 2116 }
92d41999
JH
2117 break;
2118#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2119 case 'D':
73cb7263 2120 while (len-- > 0) {
275663fa
TC
2121 ld_bytes aldouble;
2122 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2123 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 2124 if (!checksum)
275663fa 2125 mPUSHn(aldouble.ld);
08ca2aa3 2126 else
275663fa 2127 cdouble += aldouble.ld;
92d41999
JH
2128 }
2129 break;
2130#endif
a6ec74c1 2131 case 'u':
858fe5e1 2132 if (!checksum) {
f7fe979e 2133 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2134 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2135 if (l) SvPOK_on(sv);
2136 }
2137 if (utf8) {
2138 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2139 I32 a, b, c, d;
db187877 2140 char hunk[3];
08ca2aa3 2141
08ca2aa3
TH
2142 while (len > 0) {
2143 next_uni_uu(aTHX_ &s, strend, &a);
2144 next_uni_uu(aTHX_ &s, strend, &b);
2145 next_uni_uu(aTHX_ &s, strend, &c);
2146 next_uni_uu(aTHX_ &s, strend, &d);
2147 hunk[0] = (char)((a << 2) | (b >> 4));
2148 hunk[1] = (char)((b << 4) | (c >> 2));
2149 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2150 if (!checksum)
2151 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
2152 len -= 3;
2153 }
2154 if (s < strend) {
f7fe979e
AL
2155 if (*s == '\n') {
2156 s++;
2157 }
08ca2aa3
TH
2158 else {
2159 /* possible checksum byte */
f7fe979e
AL
2160 const char *skip = s+UTF8SKIP(s);
2161 if (skip < strend && *skip == '\n')
2162 s = skip+1;
08ca2aa3
TH
2163 }
2164 }
2165 }
2166 } else {
fc241834
RGS
2167 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2168 I32 a, b, c, d;
db187877 2169 char hunk[3];
a6ec74c1 2170
fc241834
RGS
2171 len = PL_uudmap[*(U8*)s++] & 077;
2172 while (len > 0) {
2173 if (s < strend && ISUUCHAR(*s))
2174 a = PL_uudmap[*(U8*)s++] & 077;
2175 else
2176 a = 0;
2177 if (s < strend && ISUUCHAR(*s))
2178 b = PL_uudmap[*(U8*)s++] & 077;
2179 else
2180 b = 0;
2181 if (s < strend && ISUUCHAR(*s))
2182 c = PL_uudmap[*(U8*)s++] & 077;
2183 else
2184 c = 0;
2185 if (s < strend && ISUUCHAR(*s))
2186 d = PL_uudmap[*(U8*)s++] & 077;
2187 else
2188 d = 0;
2189 hunk[0] = (char)((a << 2) | (b >> 4));
2190 hunk[1] = (char)((b << 4) | (c >> 2));
2191 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2192 if (!checksum)
2193 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
2194 len -= 3;
2195 }
2196 if (*s == '\n')
2197 s++;
2198 else /* possible checksum byte */
2199 if (s + 1 < strend && s[1] == '\n')
2200 s += 2;
a6ec74c1 2201 }
08ca2aa3 2202 }
858fe5e1
TC
2203 if (!checksum)
2204 XPUSHs(sv);
a6ec74c1
JH
2205 break;
2206 }
49704364 2207
a6ec74c1 2208 if (checksum) {
1109a392 2209 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2210 (checksum > bits_in_uv &&
08ca2aa3
TH
2211 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2212 NV trouble, anv;
a6ec74c1 2213
08ca2aa3 2214 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2215 while (checksum >= 16) {
2216 checksum -= 16;
08ca2aa3 2217 anv *= 65536.0;
a6ec74c1 2218 }
a6ec74c1 2219 while (cdouble < 0.0)
08ca2aa3
TH
2220 cdouble += anv;
2221 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2222 sv = newSVnv(cdouble);
a6ec74c1
JH
2223 }
2224 else {
fa8ec7c1
NC
2225 if (checksum < bits_in_uv) {
2226 UV mask = ((UV)1 << checksum) - 1;
92d41999 2227 cuv &= mask;
a6ec74c1 2228 }
c4c5f44a 2229 sv = newSVuv(cuv);
a6ec74c1 2230 }
6e449a3a 2231 mXPUSHs(sv);
a6ec74c1
JH
2232 checksum = 0;
2233 }
fc241834 2234
49704364
WL
2235 if (symptr->flags & FLAG_SLASH){
2236 if (SP - PL_stack_base - start_sp_offset <= 0)
2237 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2238 if( next_symbol(symptr) ){
2239 if( symptr->howlen == e_number )
2240 Perl_croak(aTHX_ "Count after length/code in unpack" );
2241 if( beyond ){
2242 /* ...end of char buffer then no decent length available */
2243 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2244 } else {
2245 /* take top of stack (hope it's numeric) */
2246 len = POPi;
2247 if( len < 0 )
2248 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2249 }
2250 } else {
2251 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2252 }
2253 datumtype = symptr->code;
21c16052 2254 explicit_length = FALSE;
49704364
WL
2255 goto redo_switch;
2256 }
a6ec74c1 2257 }
49704364 2258
18529408
IZ
2259 if (new_s)
2260 *new_s = s;
2261 PUTBACK;
2262 return SP - PL_stack_base - start_sp_offset;
2263}
2264
2265PP(pp_unpack)
2266{
97aff369 2267 dVAR;
18529408 2268 dSP;
bab9c0ac 2269 dPOPPOPssrl;
18529408
IZ
2270 I32 gimme = GIMME_V;
2271 STRLEN llen;
2272 STRLEN rlen;
5c144d81
NC
2273 const char *pat = SvPV_const(left, llen);
2274 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2275 const char *strend = s + rlen;
2276 const char *patend = pat + llen;
08ca2aa3 2277 I32 cnt;
18529408
IZ
2278
2279 PUTBACK;
7accc089 2280 cnt = unpackstring(pat, patend, s, strend,
49704364 2281 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2282 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2283
18529408
IZ
2284 SPAGAIN;
2285 if ( !cnt && gimme == G_SCALAR )
2286 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2287 RETURN;
2288}
2289
f337b084 2290STATIC U8 *
f7fe979e 2291doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2292{
f337b084 2293 *h++ = PL_uuemap[len];
a6ec74c1 2294 while (len > 2) {
f337b084
TH
2295 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2297 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2298 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2299 s += 3;
2300 len -= 3;
2301 }
2302 if (len > 0) {
f7fe979e 2303 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2304 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2305 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2306 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2307 *h++ = PL_uuemap[0];
a6ec74c1 2308 }
f337b084
TH
2309 *h++ = '\n';
2310 return h;
a6ec74c1
JH
2311}
2312
2313STATIC SV *
f7fe979e 2314S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2315{
8b6e33c7
AL
2316 SV *result = newSVpvn(s, l);
2317 char *const result_c = SvPV_nolen(result); /* convenience */
2318 char *out = result_c;
2319 bool skip = 1;
2320 bool ignore = 0;
a6ec74c1 2321
7918f24d
NC
2322 PERL_ARGS_ASSERT_IS_AN_INT;
2323
a6ec74c1
JH
2324 while (*s) {
2325 switch (*s) {
2326 case ' ':
2327 break;
2328 case '+':
2329 if (!skip) {
2330 SvREFCNT_dec(result);
2331 return (NULL);
2332 }
2333 break;
2334 case '0':
2335 case '1':
2336 case '2':
2337 case '3':
2338 case '4':
2339 case '5':
2340 case '6':
2341 case '7':
2342 case '8':
2343 case '9':
2344 skip = 0;
2345 if (!ignore) {
2346 *(out++) = *s;
2347 }
2348 break;
2349 case '.':
2350 ignore = 1;
2351 break;
2352 default:
2353 SvREFCNT_dec(result);
2354 return (NULL);
2355 }
2356 s++;
2357 }
2358 *(out++) = '\0';
2359 SvCUR_set(result, out - result_c);
2360 return (result);
2361}
2362
2363/* pnum must be '\0' terminated */
2364STATIC int
2365S_div128(pTHX_ SV *pnum, bool *done)
2366{
8b6e33c7
AL
2367 STRLEN len;
2368 char * const s = SvPV(pnum, len);
2369 char *t = s;
2370 int m = 0;
2371
7918f24d
NC
2372 PERL_ARGS_ASSERT_DIV128;
2373
8b6e33c7
AL
2374 *done = 1;
2375 while (*t) {
2376 const int i = m * 10 + (*t - '0');
2377 const int r = (i >> 7); /* r < 10 */
2378 m = i & 0x7F;
2379 if (r) {
2380 *done = 0;
2381 }
2382 *(t++) = '0' + r;
a6ec74c1 2383 }
8b6e33c7
AL
2384 *(t++) = '\0';
2385 SvCUR_set(pnum, (STRLEN) (t - s));
2386 return (m);
a6ec74c1
JH
2387}
2388
18529408 2389/*
7accc089
JH
2390=for apidoc packlist
2391
2392The engine implementing pack() Perl function.
2393
bfce84ec
AL
2394=cut
2395*/
7accc089
JH
2396
2397void
f7fe979e 2398Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2399{
97aff369 2400 dVAR;
aadb217d
JH
2401 tempsym_t sym;
2402
7918f24d
NC
2403 PERL_ARGS_ASSERT_PACKLIST;
2404
f7fe979e 2405 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2406
f337b084
TH
2407 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2408 Also make sure any UTF8 flag is loaded */
56eb0262 2409 SvPV_force_nolen(cat);
bfce84ec
AL
2410 if (DO_UTF8(cat))
2411 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2412
49704364
WL
2413 (void)pack_rec( cat, &sym, beglist, endlist );
2414}
2415
f337b084
TH
2416/* like sv_utf8_upgrade, but also repoint the group start markers */
2417STATIC void
2418marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2419 STRLEN len;
2420 tempsym_t *group;
f7fe979e
AL
2421 const char *from_ptr, *from_start, *from_end, **marks, **m;
2422 char *to_start, *to_ptr;
f337b084
TH
2423
2424 if (SvUTF8(sv)) return;
2425
aa07b2f6 2426 from_start = SvPVX_const(sv);
f337b084
TH
2427 from_end = from_start + SvCUR(sv);
2428 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2429 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2430 if (from_ptr == from_end) {
2431 /* Simple case: no character needs to be changed */
2432 SvUTF8_on(sv);
2433 return;
2434 }
2435
3473cf63 2436 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2437 Newx(to_start, len, char);
f337b084
TH
2438 Copy(from_start, to_start, from_ptr-from_start, char);
2439 to_ptr = to_start + (from_ptr-from_start);
2440
a02a5408 2441 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2442 for (group=sym_ptr; group; group = group->previous)
2443 marks[group->level] = from_start + group->strbeg;
2444 marks[sym_ptr->level+1] = from_end+1;
2445 for (m = marks; *m < from_ptr; m++)
2446 *m = to_start + (*m-from_start);
2447
2448 for (;from_ptr < from_end; from_ptr++) {
2449 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2450 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2451 }
2452 *to_ptr = 0;
2453
2454 while (*m == from_ptr) *m++ = to_ptr;
2455 if (m != marks + sym_ptr->level+1) {
2456 Safefree(marks);
2457 Safefree(to_start);
5637ef5b
NC
2458 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2459 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2460 }
2461 for (group=sym_ptr; group; group = group->previous)
2462 group->strbeg = marks[group->level] - to_start;
2463 Safefree(marks);
2464
2465 if (SvOOK(sv)) {
2466 if (SvIVX(sv)) {
b162af07 2467 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2468 from_start -= SvIVX(sv);
2469 SvIV_set(sv, 0);
2470 }
2471 SvFLAGS(sv) &= ~SVf_OOK;
2472 }
2473 if (SvLEN(sv) != 0)
2474 Safefree(from_start);
f880fe2f 2475 SvPV_set(sv, to_start);
b162af07
SP
2476 SvCUR_set(sv, to_ptr - to_start);
2477 SvLEN_set(sv, len);
f337b084
TH
2478 SvUTF8_on(sv);
2479}
2480
2481/* Exponential string grower. Makes string extension effectively O(n)
2482 needed says how many extra bytes we need (not counting the final '\0')
2483 Only grows the string if there is an actual lack of space
2484*/
2485STATIC char *
0bd48802 2486S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2487 const STRLEN cur = SvCUR(sv);
2488 const STRLEN len = SvLEN(sv);
f337b084 2489 STRLEN extend;
7918f24d
NC
2490
2491 PERL_ARGS_ASSERT_SV_EXP_GROW;
2492
f337b084
TH
2493 if (len - cur > needed) return SvPVX(sv);
2494 extend = needed > len ? needed : len;
2495 return SvGROW(sv, len+extend+1);
2496}
49704364
WL
2497
2498STATIC
2499SV **
f337b084 2500S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2501{
97aff369 2502 dVAR;
49704364 2503 tempsym_t lookahead;
f337b084
TH
2504 I32 items = endlist - beglist;
2505 bool found = next_symbol(symptr);
2506 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2507 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2508
7918f24d
NC
2509 PERL_ARGS_ASSERT_PACK_REC;
2510
f337b084
TH
2511 if (symptr->level == 0 && found && symptr->code == 'U') {
2512 marked_upgrade(aTHX_ cat, symptr);
2513 symptr->flags |= FLAG_DO_UTF8;
2514 utf8 = 0;
49704364 2515 }
f337b084 2516 symptr->strbeg = SvCUR(cat);
49704364
WL
2517
2518 while (found) {
f337b084
TH
2519 SV *fromstr;
2520 STRLEN fromlen;
2521 I32 len;
a0714e2c 2522 SV *lengthcode = NULL;
49704364 2523 I32 datumtype = symptr->code;
f337b084
TH
2524 howlen_t howlen = symptr->howlen;
2525 char *start = SvPVX(cat);
2526 char *cur = start + SvCUR(cat);
49704364 2527
f337b084
TH
2528#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2529
2530 switch (howlen) {
fc241834 2531 case e_star:
f337b084
TH
2532 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2533 0 : items;
2534 break;
2535 default:
2536 /* e_no_len and e_number */
2537 len = symptr->length;
49704364
WL
2538 break;
2539 }
2540
f337b084 2541 if (len) {
a7a3cfaa 2542 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2543
a7a3cfaa
TH
2544 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2545 /* We can process this letter. */
2546 STRLEN size = props & PACK_SIZE_MASK;
2547 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2548 }
f337b084
TH
2549 }
2550
49704364
WL
2551 /* Look ahead for next symbol. Do we have code/code? */
2552 lookahead = *symptr;
2553 found = next_symbol(&lookahead);
246f24af
TH
2554 if (symptr->flags & FLAG_SLASH) {
2555 IV count;
f337b084 2556 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2557 if (strchr("aAZ", lookahead.code)) {
2558 if (lookahead.howlen == e_number) count = lookahead.length;
2559 else {
ce399ba6
NC
2560 if (items > 0) {
2561 if (SvGAMAGIC(*beglist)) {
2562 /* Avoid reading the active data more than once
2563 by copying it to a temporary. */
2564 STRLEN len;
2565 const char *const pv = SvPV_const(*beglist, len);
740cce10 2566 SV *const temp
59cd0e26
NC
2567 = newSVpvn_flags(pv, len,
2568 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2569 *beglist = temp;
2570 }
246f24af
TH
2571 count = DO_UTF8(*beglist) ?
2572 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2573 }
246f24af
TH
2574 else count = 0;
2575 if (lookahead.code == 'Z') count++;
2576 }
2577 } else {
2578 if (lookahead.howlen == e_number && lookahead.length < items)
2579 count = lookahead.length;
2580 else count = items;
2581 }
2582 lookahead.howlen = e_number;
2583 lookahead.length = count;
2584 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2585 }
49704364 2586
fc241834
RGS
2587 /* Code inside the switch must take care to properly update
2588 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2589 doesn't simply leave using break */
1109a392 2590 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2591 default:
f337b084
TH
2592 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2593 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2594 case '%':
49704364 2595 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2596 {
2597 char *from;
2598#ifdef PERL_PACK_CAN_SHRIEKSIGN
2599 case '.' | TYPE_IS_SHRIEKING:
2600#endif
2601 case '.':
2602 if (howlen == e_star) from = start;
2603 else if (len == 0) from = cur;
2604 else {
2605 tempsym_t *group = symptr;
2606
2607 while (--len && group) group = group->previous;
2608 from = group ? start + group->strbeg : start;
2609 }
2610 fromstr = NEXTFROM;
2611 len = SvIV(fromstr);
2612 goto resize;
2613#ifdef PERL_PACK_CAN_SHRIEKSIGN
2614 case '@' | TYPE_IS_SHRIEKING:
2615#endif
a6ec74c1 2616 case '@':
28be1210
TH
2617 from = start + symptr->strbeg;
2618 resize:
2619#ifdef PERL_PACK_CAN_SHRIEKSIGN
2620 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2621#else /* PERL_PACK_CAN_SHRIEKSIGN */
2622 if (utf8)
2623#endif
2624 if (len >= 0) {
2625 while (len && from < cur) {
2626 from += UTF8SKIP(from);
2627 len--;
2628 }
2629 if (from > cur)
2630 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2631 if (len) {
2632 /* Here we know from == cur */
2633 grow:
2634 GROWING(0, cat, start, cur, len);
2635 Zero(cur, len, char);
2636 cur += len;
2637 } else if (from < cur) {
2638 len = cur - from;
2639 goto shrink;
2640 } else goto no_change;
2641 } else {
2642 cur = from;
2643 len = -len;
2644 goto utf8_shrink;
f337b084 2645 }
28be1210
TH
2646 else {
2647 len -= cur - from;
f337b084 2648 if (len > 0) goto grow;
28be1210 2649 if (len == 0) goto no_change;
fc241834 2650 len = -len;
28be1210 2651 goto shrink;
f337b084 2652 }
a6ec74c1 2653 break;
28be1210 2654 }
fc241834 2655 case '(': {
49704364 2656 tempsym_t savsym = *symptr;
66c611c5
MHM
2657 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2658 symptr->flags |= group_modifiers;
49704364
WL
2659 symptr->patend = savsym.grpend;
2660 symptr->level++;
f337b084 2661 symptr->previous = &lookahead;
18529408 2662 while (len--) {
f337b084
TH
2663 U32 was_utf8;
2664 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2665 else symptr->flags &= ~FLAG_PARSE_UTF8;
2666 was_utf8 = SvUTF8(cat);
49704364 2667 symptr->patptr = savsym.grpbeg;
f337b084
TH
2668 beglist = pack_rec(cat, symptr, beglist, endlist);
2669 if (SvUTF8(cat) != was_utf8)
2670 /* This had better be an upgrade while in utf8==0 mode */
2671 utf8 = 1;
2672
49704364 2673 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2674 break; /* No way to continue */
2675 }
ee790063 2676 items = endlist - beglist;
f337b084
TH
2677 lookahead.flags = symptr->flags & ~group_modifiers;
2678 goto no_change;
18529408 2679 }
62f95557
IZ
2680 case 'X' | TYPE_IS_SHRIEKING:
2681 if (!len) /* Avoid division by 0 */
2682 len = 1;
f337b084
TH
2683 if (utf8) {
2684 char *hop, *last;
2685 I32 l = len;
2686 hop = last = start;
2687 while (hop < cur) {
2688 hop += UTF8SKIP(hop);
2689 if (--l == 0) {
2690 last = hop;
2691 l = len;
2692 }
2693 }
2694 if (last > cur)
2695 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2696 cur = last;
2697 break;
2698 }
2699 len = (cur-start) % len;
62f95557 2700 /* FALL THROUGH */
a6ec74c1 2701 case 'X':
f337b084
TH
2702 if (utf8) {
2703 if (len < 1) goto no_change;
28be1210 2704 utf8_shrink:
f337b084
TH
2705 while (len > 0) {
2706 if (cur <= start)
28be1210
TH
2707 Perl_croak(aTHX_ "'%c' outside of string in pack",
2708 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2709 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2710 if (cur <= start)
28be1210
TH
2711 Perl_croak(aTHX_ "'%c' outside of string in pack",
2712 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2713 }
2714 len--;
2715 }
2716 } else {
fc241834 2717 shrink:
f337b084 2718 if (cur - start < len)
28be1210
TH
2719 Perl_croak(aTHX_ "'%c' outside of string in pack",
2720 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2721 cur -= len;
2722 }
2723 if (cur < start+symptr->strbeg) {
2724 /* Make sure group starts don't point into the void */
2725 tempsym_t *group;
9e27e96a 2726 const STRLEN length = cur-start;
f337b084
TH
2727 for (group = symptr;
2728 group && length < group->strbeg;
2729 group = group->previous) group->strbeg = length;
2730 lookahead.strbeg = length;
2731 }
a6ec74c1 2732 break;
fc241834
RGS
2733 case 'x' | TYPE_IS_SHRIEKING: {
2734 I32 ai32;
62f95557
IZ
2735 if (!len) /* Avoid division by 0 */
2736 len = 1;
230e1fce 2737 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2738 else ai32 = (cur - start) % len;
2739 if (ai32 == 0) goto no_change;
2740 len -= ai32;
2741 }
2742 /* FALL THROUGH */
a6ec74c1 2743 case 'x':
f337b084 2744 goto grow;
a6ec74c1
JH
2745 case 'A':
2746 case 'Z':
f337b084 2747 case 'a': {
f7fe979e 2748 const char *aptr;
f337b084 2749
a6ec74c1 2750 fromstr = NEXTFROM;
e62f0680 2751 aptr = SvPV_const(fromstr, fromlen);
f337b084 2752 if (DO_UTF8(fromstr)) {
f7fe979e 2753 const char *end, *s;
f337b084
TH
2754
2755 if (!utf8 && !SvUTF8(cat)) {
2756 marked_upgrade(aTHX_ cat, symptr);
2757 lookahead.flags |= FLAG_DO_UTF8;
2758 lookahead.strbeg = symptr->strbeg;
2759 utf8 = 1;
2760 start = SvPVX(cat);
2761 cur = start + SvCUR(cat);
2762 }
fc241834 2763 if (howlen == e_star) {
f337b084
TH
2764 if (utf8) goto string_copy;
2765 len = fromlen+1;
2766 }
2767 s = aptr;
2768 end = aptr + fromlen;
2769 fromlen = datumtype == 'Z' ? len-1 : len;
2770 while ((I32) fromlen > 0 && s < end) {
2771 s += UTF8SKIP(s);
2772 fromlen--;
2773 }
2774 if (s > end)
2775 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2776 if (utf8) {
fc241834 2777 len = fromlen;
f337b084
TH
2778 if (datumtype == 'Z') len++;
2779 fromlen = s-aptr;
2780 len += fromlen;
fc241834 2781
f337b084 2782 goto string_copy;
fc241834 2783 }
f337b084
TH
2784 fromlen = len - fromlen;
2785 if (datumtype == 'Z') fromlen--;
2786 if (howlen == e_star) {
2787 len = fromlen;
2788 if (datumtype == 'Z') len++;
fc241834 2789 }
f337b084 2790 GROWING(0, cat, start, cur, len);
fc241834 2791 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2792 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2793 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2794 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2795 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2796 cur += fromlen;
a6ec74c1 2797 len -= fromlen;
f337b084
TH
2798 } else if (utf8) {
2799 if (howlen == e_star) {
2800 len = fromlen;
2801 if (datumtype == 'Z') len++;
a6ec74c1 2802 }
f337b084
TH
2803 if (len <= (I32) fromlen) {
2804 fromlen = len;
2805 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2806 }
fc241834 2807 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2808 upgrade, so:
2809 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2810 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2811 len -= fromlen;
2812 while (fromlen > 0) {
230e1fce 2813 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2814 aptr++;
2815 fromlen--;
fc241834 2816 }
f337b084
TH
2817 } else {
2818 string_copy:
2819 if (howlen == e_star) {
2820 len = fromlen;
2821 if (datumtype == 'Z') len++;
2822 }
2823 if (len <= (I32) fromlen) {
2824 fromlen = len;
2825 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2826 }
f337b084
TH
2827 GROWING(0, cat, start, cur, len);
2828 Copy(aptr, cur, fromlen, char);
2829 cur += fromlen;
2830 len -= fromlen;
a6ec74c1 2831 }
f337b084
TH
2832 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2833 cur += len;
3c4fb04a 2834 SvTAINT(cat);
a6ec74c1 2835 break;
f337b084 2836 }
a6ec74c1 2837 case 'B':
f337b084 2838 case 'b': {
b83604b4 2839 const char *str, *end;
f337b084
TH
2840 I32 l, field_len;
2841 U8 bits;
2842 bool utf8_source;
2843 U32 utf8_flags;
a6ec74c1 2844
fc241834 2845 fromstr = NEXTFROM;
b83604b4 2846 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2847 end = str + fromlen;
2848 if (DO_UTF8(fromstr)) {
2849 utf8_source = TRUE;
041457d9 2850 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2851 } else {
2852 utf8_source = FALSE;
2853 utf8_flags = 0; /* Unused, but keep compilers happy */
2854 }
2855 if (howlen == e_star) len = fromlen;
2856 field_len = (len+7)/8;
2857 GROWING(utf8, cat, start, cur, field_len);
2858 if (len > (I32)fromlen) len = fromlen;
2859 bits = 0;
2860 l = 0;
2861 if (datumtype == 'B')
2862 while (l++ < len) {
2863 if (utf8_source) {
95b63a38 2864 UV val = 0;
f337b084
TH
2865 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2866 bits |= val & 1;
2867 } else bits |= *str++ & 1;
2868 if (l & 7) bits <<= 1;
fc241834 2869 else {
f337b084
TH
2870 PUSH_BYTE(utf8, cur, bits);
2871 bits = 0;
a6ec74c1
JH
2872 }
2873 }
f337b084
TH
2874 else
2875 /* datumtype == 'b' */
2876 while (l++ < len) {
2877 if (utf8_source) {
95b63a38 2878 UV val = 0;
f337b084
TH
2879 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2880 if (val & 1) bits |= 0x80;
2881 } else if (*str++ & 1)
2882 bits |= 0x80;
2883 if (l & 7) bits >>= 1;
fc241834 2884 else {
f337b084
TH
2885 PUSH_BYTE(utf8, cur, bits);
2886 bits = 0;
a6ec74c1
JH
2887 }
2888 }
f337b084
TH
2889 l--;
2890 if (l & 7) {
fc241834 2891 if (datumtype == 'B')
f337b084 2892 bits <<= 7 - (l & 7);
fc241834 2893 else
f337b084
TH
2894 bits >>= 7 - (l & 7);
2895 PUSH_BYTE(utf8, cur, bits);
2896 l += 7;
a6ec74c1 2897 }
f337b084
TH
2898 /* Determine how many chars are left in the requested field */
2899 l /= 8;
2900 if (howlen == e_star) field_len = 0;
2901 else field_len -= l;
2902 Zero(cur, field_len, char);
2903 cur += field_len;
a6ec74c1 2904 break;
f337b084 2905 }
a6ec74c1 2906 case 'H':
f337b084 2907 case 'h': {
10516c54 2908 const char *str, *end;
f337b084
TH
2909 I32 l, field_len;
2910 U8 bits;
2911 bool utf8_source;
2912 U32 utf8_flags;
a6ec74c1 2913
fc241834 2914 fromstr = NEXTFROM;
10516c54 2915 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2916 end = str + fromlen;
2917 if (DO_UTF8(fromstr)) {
2918 utf8_source = TRUE;
041457d9 2919 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2920 } else {
2921 utf8_source = FALSE;
2922 utf8_flags = 0; /* Unused, but keep compilers happy */
2923 }
2924 if (howlen == e_star) len = fromlen;
2925 field_len = (len+1)/2;
2926 GROWING(utf8, cat, start, cur, field_len);
2927 if (!utf8 && len > (I32)fromlen) len = fromlen;
2928 bits = 0;
2929 l = 0;
2930 if (datumtype == 'H')
2931 while (l++ < len) {
2932 if (utf8_source) {
95b63a38 2933 UV val = 0;
f337b084
TH
2934 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2935 if (val < 256 && isALPHA(val))
2936 bits |= (val + 9) & 0xf;
a6ec74c1 2937 else
f337b084
TH
2938 bits |= val & 0xf;
2939 } else if (isALPHA(*str))
2940 bits |= (*str++ + 9) & 0xf;
2941 else
2942 bits |= *str++ & 0xf;
2943 if (l & 1) bits <<= 4;
fc241834 2944 else {
f337b084
TH
2945 PUSH_BYTE(utf8, cur, bits);
2946 bits = 0;
a6ec74c1
JH
2947 }
2948 }
f337b084
TH
2949 else
2950 while (l++ < len) {
2951 if (utf8_source) {
95b63a38 2952 UV val = 0;
f337b084
TH
2953 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2954 if (val < 256 && isALPHA(val))
2955 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2956 else
f337b084
TH
2957 bits |= (val & 0xf) << 4;
2958 } else if (isALPHA(*str))
2959 bits |= ((*str++ + 9) & 0xf) << 4;
2960 else
2961 bits |= (*str++ & 0xf) << 4;
2962 if (l & 1) bits >>= 4;
fc241834 2963 else {
f337b084
TH
2964 PUSH_BYTE(utf8, cur, bits);
2965 bits = 0;
a6ec74c1 2966 }
fc241834 2967 }
f337b084
TH
2968 l--;
2969 if (l & 1) {
2970 PUSH_BYTE(utf8, cur, bits);
2971 l++;
2972 }
2973 /* Determine how many chars are left in the requested field */
2974 l /= 2;
2975 if (howlen == e_star) field_len = 0;
2976 else field_len -= l;
2977 Zero(cur, field_len, char);
2978 cur += field_len;
2979 break;
fc241834
RGS
2980 }
2981 case 'c':
f337b084
TH
2982 while (len-- > 0) {
2983 IV aiv;
2984 fromstr = NEXTFROM;
2985 aiv = SvIV(fromstr);
a2a5de95
NC
2986 if ((-128 > aiv || aiv > 127))
2987 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2988 "Character in 'c' format wrapped in pack");
585ec06d 2989 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2990 }
2991 break;
2992 case 'C':
f337b084
TH
2993 if (len == 0) {
2994 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2995 break;
2996 }
a6ec74c1 2997 while (len-- > 0) {
f337b084 2998 IV aiv;
a6ec74c1 2999 fromstr = NEXTFROM;
f337b084 3000 aiv = SvIV(fromstr);
a2a5de95
NC
3001 if ((0 > aiv || aiv > 0xff))
3002 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3003 "Character in 'C' format wrapped in pack");
1651fc44 3004 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 3005 }
fc241834
RGS
3006 break;
3007 case 'W': {
3008 char *end;
670f1322 3009 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
3010
3011 end = start+SvLEN(cat)-1;
3012 if (utf8) end -= UTF8_MAXLEN-1;
3013 while (len-- > 0) {
3014 UV auv;
3015 fromstr = NEXTFROM;
3016 auv = SvUV(fromstr);
3017 if (in_bytes) auv = auv % 0x100;
3018 if (utf8) {
3019 W_utf8:
3020 if (cur > end) {
3021 *cur = '\0';
b162af07 3022 SvCUR_set(cat, cur - start);
fc241834
RGS
3023
3024 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3025 end = start+SvLEN(cat)-UTF8_MAXLEN;
3026 }
230e1fce
NC
3027 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3028 NATIVE_TO_UNI(auv),
041457d9 3029 warn_utf8 ?
230e1fce 3030 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3031 } else {
3032 if (auv >= 0x100) {
3033 if (!SvUTF8(cat)) {
3034 *cur = '\0';
b162af07 3035 SvCUR_set(cat, cur - start);
fc241834
RGS
3036 marked_upgrade(aTHX_ cat, symptr);
3037 lookahead.flags |= FLAG_DO_UTF8;
3038 lookahead.strbeg = symptr->strbeg;
3039 utf8 = 1;
3040 start = SvPVX(cat);
3041 cur = start + SvCUR(cat);
3042 end = start+SvLEN(cat)-UTF8_MAXLEN;
3043 goto W_utf8;
3044 }
a2a5de95
NC
3045 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3046 "Character in 'W' format wrapped in pack");
fc241834
RGS
3047 auv &= 0xff;
3048 }
3049 if (cur >= end) {
3050 *cur = '\0';
b162af07 3051 SvCUR_set(cat, cur - start);
fc241834
RGS
3052 GROWING(0, cat, start, cur, len+1);
3053 end = start+SvLEN(cat)-1;
3054 }
fe2774ed 3055 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3056 }
3057 }
3058 break;
fc241834
RGS
3059 }
3060 case 'U': {
3061 char *end;
3062
3063 if (len == 0) {
3064 if (!(symptr->flags & FLAG_DO_UTF8)) {
3065 marked_upgrade(aTHX_ cat, symptr);
3066 lookahead.flags |= FLAG_DO_UTF8;
3067 lookahead.strbeg = symptr->strbeg;
3068 }
3069 utf8 = 0;
3070 goto no_change;
3071 }
3072
3073 end = start+SvLEN(cat);
3074 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3075 while (len-- > 0) {
fc241834 3076 UV auv;
a6ec74c1 3077 fromstr = NEXTFROM;
fc241834
RGS
3078 auv = SvUV(fromstr);
3079 if (utf8) {
230e1fce 3080 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3081 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3082 warn_utf8 ?
fc241834
RGS
3083 0 : UNICODE_ALLOW_ANY);
3084 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3085 *cur = '\0';
b162af07 3086 SvCUR_set(cat, cur - start);
fc241834
RGS
3087 GROWING(0, cat, start, cur,
3088 len+(endb-buffer)*UTF8_EXPAND);
3089 end = start+SvLEN(cat);
3090 }
64844641 3091 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3092 } else {
3093 if (cur >= end) {
3094 *cur = '\0';
b162af07 3095 SvCUR_set(cat, cur - start);
fc241834
RGS
3096 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3097 end = start+SvLEN(cat)-UTF8_MAXLEN;
3098 }
230e1fce 3099 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3100 warn_utf8 ?
230e1fce 3101 0 : UNICODE_ALLOW_ANY);
fc241834 3102 }
a6ec74c1 3103 }
a6ec74c1 3104 break;
fc241834 3105 }
a6ec74c1
JH
3106 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3107 case 'f':
a6ec74c1 3108 while (len-- > 0) {
f337b084
TH
3109 float afloat;
3110 NV anv;
a6ec74c1 3111 fromstr = NEXTFROM;
f337b084 3112 anv = SvNV(fromstr);
5cdb9e01 3113#ifdef __VOS__
f337b084 3114 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3115 during conversion from double to float into infinity, so we
3116 do it by hand. This code should either be generalized for
3117 any OS that needs it, or removed if and when VOS implements
3118 posix-976 (suggestion to support mapping to infinity).
3119 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3120{
3121extern const float _float_constants[];
f337b084 3122 if (anv > FLT_MAX)
fc241834 3123 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3124 else if (anv < -FLT_MAX)
fc241834 3125 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3126 else afloat = (float) anv;
3722f0dc 3127}
f337b084 3128#else /* __VOS__ */
baf3cf9c 3129# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3130 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3131 * on Alpha; fake it if we don't have them.
3132 */
f337b084 3133 if (anv > FLT_MAX)
fc241834 3134 afloat = FLT_MAX;
f337b084 3135 else if (anv < -FLT_MAX)
fc241834 3136 afloat = -FLT_MAX;
f337b084 3137 else afloat = (float)anv;
baf3cf9c 3138# else
f337b084 3139 afloat = (float)anv;
baf3cf9c 3140# endif
f337b084 3141#endif /* __VOS__ */
1109a392 3142 DO_BO_PACK_N(afloat, float);
f337b084 3143 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3144 }
3145 break;
3146 case 'd':
a6ec74c1 3147 while (len-- > 0) {
f337b084
TH
3148 double adouble;
3149 NV anv;
a6ec74c1 3150 fromstr = NEXTFROM;
f337b084 3151 anv = SvNV(fromstr);
5cdb9e01 3152#ifdef __VOS__
f337b084 3153 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3154 during conversion from long double to double into infinity,
3155 so we do it by hand. This code should either be generalized
3156 for any OS that needs it, or removed if and when VOS
3157 implements posix-976 (suggestion to support mapping to
3158 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3159{
3160extern const double _double_constants[];
f337b084 3161 if (anv > DBL_MAX)
fc241834 3162 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3163 else if (anv < -DBL_MAX)
fc241834 3164 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3165 else adouble = (double) anv;
3722f0dc 3166}
f337b084 3167#else /* __VOS__ */
baf3cf9c 3168# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3169 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3170 * on Alpha; fake it if we don't have them.
3171 */
f337b084 3172 if (anv > DBL_MAX)
fc241834 3173 adouble = DBL_MAX;
f337b084 3174 else if (anv < -DBL_MAX)
fc241834 3175 adouble = -DBL_MAX;
f337b084 3176 else adouble = (double)anv;
baf3cf9c 3177# else
f337b084 3178 adouble = (double)anv;
baf3cf9c 3179# endif
f337b084 3180#endif /* __VOS__ */
1109a392 3181 DO_BO_PACK_N(adouble, double);
f337b084 3182 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3183 }
3184 break;
fc241834 3185 case 'F': {
275663fa 3186 NV_bytes anv;
1109a392 3187 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3188 while (len-- > 0) {
3189 fromstr = NEXTFROM;
cd07c537
DM
3190#ifdef __GNUC__
3191 /* to work round a gcc/x86 bug; don't use SvNV */
3192 anv.nv = sv_2nv(fromstr);
3193#else
275663fa 3194 anv.nv = SvNV(fromstr);
cd07c537 3195#endif
1109a392 3196 DO_BO_PACK_N(anv, NV);
275663fa 3197 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
3198 }
3199 break;
fc241834 3200 }
92d41999 3201#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 3202 case 'D': {
275663fa 3203 ld_bytes aldouble;
1109a392
MHM
3204 /* long doubles can have unused bits, which may be nonzero */
3205 Zero(&aldouble, 1, long double);
92d41999
JH
3206 while (len-- > 0) {
3207 fromstr = NEXTFROM;
cd07c537
DM
3208# ifdef __GNUC__
3209 /* to work round a gcc/x86 bug; don't use SvNV */
3210 aldouble.ld = (long double)sv_2nv(fromstr);
3211# else
275663fa 3212 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 3213# endif
1109a392 3214 DO_BO_PACK_N(aldouble, long double);
275663fa 3215 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
3216 }
3217 break;
fc241834 3218 }
92d41999 3219#endif
7212898e 3220#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3221 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3222#endif
a6ec74c1
JH
3223 case 'n':
3224 while (len-- > 0) {
f337b084 3225 I16 ai16;
a6ec74c1 3226 fromstr = NEXTFROM;
ef108786 3227 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3228#ifdef HAS_HTONS
ef108786 3229 ai16 = PerlSock_htons(ai16);
a6ec74c1 3230#endif
f337b084 3231 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3232 }
3233 break;
7212898e 3234#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3235 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3236#endif
a6ec74c1
JH
3237 case 'v':
3238 while (len-- > 0) {
f337b084 3239 I16 ai16;
a6ec74c1 3240 fromstr = NEXTFROM;
ef108786 3241 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3242#ifdef HAS_HTOVS
ef108786 3243 ai16 = htovs(ai16);
a6ec74c1 3244#endif
f337b084 3245 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3246 }
3247 break;
49704364 3248 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3249#if SHORTSIZE != SIZE16
fc241834 3250 while (len-- > 0) {
f337b084 3251 unsigned short aushort;
fc241834
RGS
3252 fromstr = NEXTFROM;
3253 aushort = SvUV(fromstr);
3254 DO_BO_PACK(aushort, s);
f337b084 3255 PUSH_VAR(utf8, cur, aushort);
fc241834 3256 }
49704364
WL
3257 break;
3258#else
3259 /* Fall through! */
a6ec74c1 3260#endif
49704364 3261 case 'S':
fc241834 3262 while (len-- > 0) {
f337b084 3263 U16 au16;
fc241834
RGS
3264 fromstr = NEXTFROM;
3265 au16 = (U16)SvUV(fromstr);
3266 DO_BO_PACK(au16, 16);
f337b084 3267 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3268 }
3269 break;
49704364 3270 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3271#if SHORTSIZE != SIZE16
fc241834 3272 while (len-- > 0) {
f337b084 3273 short ashort;
fc241834
RGS
3274 fromstr = NEXTFROM;
3275 ashort = SvIV(fromstr);
3276 DO_BO_PACK(ashort, s);
f337b084 3277 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3278 }
49704364
WL
3279 break;
3280#else
3281 /* Fall through! */
a6ec74c1 3282#endif
49704364
WL
3283 case 's':
3284 while (len-- > 0) {
f337b084 3285 I16 ai16;
49704364 3286 fromstr = NEXTFROM;
ef108786
MHM
3287 ai16 = (I16)SvIV(fromstr);
3288 DO_BO_PACK(ai16, 16);
f337b084 3289 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3290 }
3291 break;
3292 case 'I':
49704364 3293 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3294 while (len-- > 0) {
f337b084 3295 unsigned int auint;
a6ec74c1
JH
3296 fromstr = NEXTFROM;
3297 auint = SvUV(fromstr);
1109a392 3298 DO_BO_PACK(auint, i);
f337b084 3299 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3300 }
3301 break;
92d41999
JH
3302 case 'j':
3303 while (len-- > 0) {
f337b084 3304 IV aiv;
92d41999
JH
3305 fromstr = NEXTFROM;
3306 aiv = SvIV(fromstr);
1109a392
MHM
3307#if IVSIZE == INTSIZE
3308 DO_BO_PACK(aiv, i);
3309#elif IVSIZE == LONGSIZE
3310 DO_BO_PACK(aiv, l);
3311#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3312 DO_BO_PACK(aiv, 64);
f337b084
TH
3313#else
3314 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3315#endif
f337b084 3316 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3317 }
3318 break;
3319 case 'J':
3320 while (len-- > 0) {
f337b084 3321 UV auv;
92d41999
JH
3322 fromstr = NEXTFROM;
3323 auv = SvUV(fromstr);
1109a392
MHM
3324#if UVSIZE == INTSIZE
3325 DO_BO_PACK(auv, i);
3326#elif UVSIZE == LONGSIZE
3327 DO_BO_PACK(auv, l);
3328#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3329 DO_BO_PACK(auv, 64);
f337b084
TH
3330#else
3331 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3332#endif
f337b084 3333 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3334 }
3335 break;
a6ec74c1
JH
3336 case 'w':
3337 while (len-- > 0) {
f337b084 3338 NV anv;
a6ec74c1 3339 fromstr = NEXTFROM;
15e9f109 3340 anv = SvNV(fromstr);
a6ec74c1 3341
f337b084
TH
3342 if (anv < 0) {
3343 *cur = '\0';
b162af07 3344 SvCUR_set(cat, cur - start);
49704364 3345 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3346 }
a6ec74c1 3347
196b62db
NC
3348 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3349 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3350 any negative IVs will have already been got by the croak()
3351 above. IOK is untrue for fractions, so we test them
3352 against UV_MAX_P1. */
f337b084
TH
3353 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3354 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3355 char *in = buf + sizeof(buf);
196b62db 3356 UV auv = SvUV(fromstr);
a6ec74c1
JH
3357
3358 do {
eb160463 3359 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3360 auv >>= 7;
3361 } while (auv);
3362 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3363 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3364 in, (buf + sizeof(buf)) - in);
3365 } else if (SvPOKp(fromstr))
3366 goto w_string;
a6ec74c1 3367 else if (SvNOKp(fromstr)) {
0258719b 3368 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3369 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3370 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3371 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3372 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3373 Some C compilers are strict about integral constant
3374 expressions so we conservatively divide by a slightly
3375 smaller integer instead of multiplying by the exact
3376 floating-point value.
0258719b
NC
3377 */
3378#ifdef NV_MAX_10_EXP
f337b084 3379 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3380 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3381#else
f337b084 3382 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3383 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3384#endif
a6ec74c1
JH
3385 char *in = buf + sizeof(buf);
3386
8b6e33c7 3387 anv = Perl_floor(anv);
a6ec74c1 3388 do {
8b6e33c7 3389 const NV next = Perl_floor(anv / 128);
a6ec74c1 3390 if (in <= buf) /* this cannot happen ;-) */
49704364 3391 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3392 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3393 anv = next;
3394 } while (anv > 0);
a6ec74c1 3395 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3396 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3397 in, (buf + sizeof(buf)) - in);
3398 } else {
8b6e33c7
AL
3399 const char *from;
3400 char *result, *in;
735b914b
JH
3401 SV *norm;
3402 STRLEN len;
3403 bool done;
3404
f337b084 3405 w_string:
735b914b 3406 /* Copy string and check for compliance */
349d4f2f 3407 from = SvPV_const(fromstr, len);
735b914b 3408 if ((norm = is_an_int(from, len)) == NULL)
49704364 3409 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3410
a02a5408 3411 Newx(result, len, char);
735b914b
JH
3412 in = result + len;
3413 done = FALSE;
f337b084 3414 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3415 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3416 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3417 in, (result + len) - in);
735b914b
JH
3418 Safefree(result);
3419 SvREFCNT_dec(norm); /* free norm */
fc241834 3420 }
a6ec74c1
JH
3421 }
3422 break;
3423 case 'i':
49704364 3424 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3425 while (len-- > 0) {
f337b084 3426 int aint;
a6ec74c1
JH
3427 fromstr = NEXTFROM;
3428 aint = SvIV(fromstr);
1109a392 3429 DO_BO_PACK(aint, i);
f337b084 3430 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3431 }
3432 break;
7212898e 3433#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3434 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3435#endif
a6ec74c1
JH
3436 case 'N':
3437 while (len-- > 0) {
f337b084 3438 U32 au32;
a6ec74c1 3439 fromstr = NEXTFROM;
ef108786 3440 au32 = SvUV(fromstr);
a6ec74c1 3441#ifdef HAS_HTONL
ef108786 3442 au32 = PerlSock_htonl(au32);
a6ec74c1 3443#endif
f337b084 3444 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3445 }
3446 break;
7212898e 3447#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3448 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3449#endif
a6ec74c1
JH
3450 case 'V':
3451 while (len-- > 0) {
f337b084 3452 U32 au32;
a6ec74c1 3453 fromstr = NEXTFROM;
ef108786 3454 au32 = SvUV(fromstr);
a6ec74c1 3455#ifdef HAS_HTOVL
ef108786 3456 au32 = htovl(au32);
a6ec74c1 3457#endif
f337b084 3458 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3459 }
3460 break;
49704364 3461 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3462#if LONGSIZE != SIZE32
fc241834 3463 while (len-- > 0) {
f337b084 3464 unsigned long aulong;
fc241834
RGS
3465 fromstr = NEXTFROM;
3466 aulong = SvUV(fromstr);
3467 DO_BO_PACK(aulong, l);
f337b084 3468 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3469 }
49704364
WL
3470 break;
3471#else
3472 /* Fall though! */
a6ec74c1 3473#endif
49704364 3474 case 'L':
fc241834 3475 while (len-- > 0) {
f337b084 3476 U32 au32;
fc241834
RGS
3477 fromstr = NEXTFROM;
3478 au32 = SvUV(fromstr);
3479 DO_BO_PACK(au32, 32);
f337b084 3480 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3481 }
3482 break;
49704364 3483 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3484#if LONGSIZE != SIZE32
fc241834 3485 while (len-- > 0) {
f337b084 3486 long along;
fc241834
RGS
3487 fromstr = NEXTFROM;
3488 along = SvIV(fromstr);
3489 DO_BO_PACK(along, l);
f337b084 3490 PUSH_VAR(utf8, cur, along);
a6ec74c1 3491 }
49704364
WL
3492 break;
3493#else
3494 /* Fall though! */
a6ec74c1 3495#endif
49704364
WL
3496 case 'l':
3497 while (len-- > 0) {
f337b084 3498 I32 ai32;
49704364 3499 fromstr = NEXTFROM;
ef108786
MHM
3500 ai32 = SvIV(fromstr);
3501 DO_BO_PACK(ai32, 32);
f337b084 3502 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3503 }
3504 break;
3505#ifdef HAS_QUAD
3506 case 'Q':
3507 while (len-- > 0) {
f337b084 3508 Uquad_t auquad;
a6ec74c1 3509 fromstr = NEXTFROM;
f337b084 3510 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3511 DO_BO_PACK(auquad, 64);
f337b084 3512 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3513 }
3514 break;
3515 case 'q':
3516 while (len-- > 0) {
f337b084 3517 Quad_t aquad;
a6ec74c1
JH
3518 fromstr = NEXTFROM;
3519 aquad = (Quad_t)SvIV(fromstr);
1109a392 3520 DO_BO_PACK(aquad, 64);
f337b084 3521 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3522 }
3523 break;
f337b084 3524#endif /* HAS_QUAD */
a6ec74c1
JH
3525 case 'P':
3526 len = 1; /* assume SV is correct length */
f337b084 3527 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3528 /* Fall through! */
a6ec74c1
JH
3529 case 'p':
3530 while (len-- > 0) {
83003860 3531 const char *aptr;
f337b084 3532
a6ec74c1 3533 fromstr = NEXTFROM;
28a4f200
TH
3534 SvGETMAGIC(fromstr);
3535 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3536 else {
a6ec74c1
JH
3537 /* XXX better yet, could spirit away the string to
3538 * a safe spot and hang on to it until the result
3539 * of pack() (and all copies of the result) are
3540 * gone.
3541 */
041457d9 3542 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3543 !SvREADONLY(fromstr)))) {
3544 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3545 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3546 }
3547 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3548 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3549 else
2596d9fe 3550 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3551 }
07409e01 3552 DO_BO_PACK_PC(aptr);
f337b084 3553 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3554 }
3555 break;
fc241834 3556 case 'u': {
f7fe979e 3557 const char *aptr, *aend;
fc241834 3558 bool from_utf8;
f337b084 3559
a6ec74c1 3560 fromstr = NEXTFROM;
fc241834
RGS
3561 if (len <= 2) len = 45;
3562 else len = len / 3 * 3;
3563 if (len >= 64) {
a2a5de95
NC
3564 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3565 "Field too wide in 'u' format in pack");
fc241834
RGS
3566 len = 63;
3567 }
83003860 3568 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3569 from_utf8 = DO_UTF8(fromstr);
3570 if (from_utf8) {
3571 aend = aptr + fromlen;
3572 fromlen = sv_len_utf8(fromstr);
3573 } else aend = NULL; /* Unused, but keep compilers happy */
3574 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3575 while (fromlen > 0) {
fc241834 3576 U8 *end;
a6ec74c1 3577 I32 todo;
fc241834 3578 U8 hunk[1+63/3*4+1];
a6ec74c1 3579
eb160463 3580 if ((I32)fromlen > len)
a6ec74c1
JH
3581 todo = len;
3582 else
3583 todo = fromlen;
fc241834
RGS
3584 if (from_utf8) {
3585 char buffer[64];
3586 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3587 'u' | TYPE_IS_PACK)) {
3588 *cur = '\0';
b162af07 3589 SvCUR_set(cat, cur - start);
5637ef5b
NC
3590 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3591 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3592 aptr, aend, buffer, (long) todo);
fc241834
RGS
3593 }
3594 end = doencodes(hunk, buffer, todo);
3595 } else {
3596 end = doencodes(hunk, aptr, todo);
3597 aptr += todo;
3598 }
3599 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3600 fromlen -= todo;
3601 }
a6ec74c1
JH
3602 break;
3603 }
f337b084
TH
3604 }
3605 *cur = '\0';
b162af07 3606 SvCUR_set(cat, cur - start);
f337b084 3607 no_change:
49704364 3608 *symptr = lookahead;
a6ec74c1 3609 }
49704364 3610 return beglist;
18529408
IZ
3611}
3612#undef NEXTFROM
3613
3614
3615PP(pp_pack)
3616{
97aff369 3617 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3618 register SV *cat = TARG;
3619 STRLEN fromlen;
349d4f2f
NC
3620 SV *pat_sv = *++MARK;
3621 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3622 register const char *patend = pat + fromlen;
18529408
IZ
3623
3624 MARK++;
76f68e9b 3625 sv_setpvs(cat, "");
f337b084 3626 SvUTF8_off(cat);
18529408 3627
7accc089 3628 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3629
a6ec74c1
JH
3630 SvSETMAGIC(cat);
3631 SP = ORIGMARK;
3632 PUSHs(cat);
3633 RETURN;
3634}
a6ec74c1 3635
73cb7263
NC
3636/*
3637 * Local variables:
3638 * c-indentation-style: bsd
3639 * c-basic-offset: 4
3640 * indent-tabs-mode: t
3641 * End:
3642 *
37442d52
RGS
3643 * ex: set ts=8 sts=4 sw=4 noet:
3644 */