This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Minor edition nit in perlfunc
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
WL
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
78d46eaa
NC
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
78d46eaa
NC
400#if 'J'-'I' == 1
401/* ASCII */
f43a9a31 402STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
1651fc44 409 /* C */ sizeof(unsigned char),
80a13697 410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 411 /* D */ LONG_DOUBLESIZE,
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* Q */ sizeof(Uquad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 450#if defined(HAS_QUAD)
a7a3cfaa 451 /* q */ sizeof(Quad_t),
80a13697 452#else
a7a3cfaa 453 0,
80a13697 454#endif
a7a3cfaa
TH
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
7212898e 479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 480 /* N */ SIZE32,
7212898e 481#else
a7a3cfaa 482 0,
7212898e 483#endif
a7a3cfaa
TH
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
7212898e 487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 488 /* V */ SIZE32,
7212898e 489#else
a7a3cfaa 490 0,
7212898e 491#endif
a7a3cfaa
TH
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
7212898e 498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 499 /* n */ SIZE16,
7212898e 500#else
a7a3cfaa 501 0,
7212898e 502#endif
a7a3cfaa
TH
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
7212898e 506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 507 /* v */ SIZE16,
7212898e 508#else
a7a3cfaa 509 0,
7212898e 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
520};
521#else
522/* EBCDIC (or bust) */
f43a9a31 523STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 548#if defined(HAS_QUAD)
a7a3cfaa 549 /* q */ sizeof(Quad_t),
80a13697 550#else
a7a3cfaa 551 0,
80a13697 552#endif
a7a3cfaa
TH
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 560 /* C */ sizeof(unsigned char),
80a13697 561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 562 /* D */ LONG_DOUBLESIZE,
80a13697 563#else
a7a3cfaa 564 0,
80a13697 565#endif
a7a3cfaa
TH
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
80a13697 577#if defined(HAS_QUAD)
a7a3cfaa 578 /* Q */ sizeof(Uquad_t),
80a13697 579#else
a7a3cfaa 580 0,
80a13697 581#endif
a7a3cfaa
TH
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
7212898e 604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 605 /* n */ SIZE16,
7212898e 606#else
a7a3cfaa 607 0,
7212898e 608#endif
a7a3cfaa
TH
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
7212898e 612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 613 /* v */ SIZE16,
7212898e 614#else
a7a3cfaa 615 0,
7212898e 616#endif
a7a3cfaa
TH
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
7212898e 624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 625 /* N */ SIZE32,
7212898e 626#else
a7a3cfaa 627 0,
7212898e 628#endif
a7a3cfaa
TH
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
7212898e 632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 633 /* V */ SIZE32,
7212898e 634#else
a7a3cfaa 635 0,
7212898e 636#endif
a7a3cfaa
TH
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
639};
640#endif
641
08ca2aa3 642STATIC U8
f7fe979e 643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 644{
08ca2aa3 645 STRLEN retlen;
0bcc34c2 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
648 /* We try to process malformed UTF-8 as much as possible (preferrably with
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 654 if (val >= 0x100) {
a2a5de95
NC
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
658 val &= 0xff;
659 }
660 *s += retlen;
fe2774ed 661 return (U8)val;
08ca2aa3
TH
662}
663
f337b084
TH
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
666 *(U8 *)(s)++)
667
668STATIC bool
f7fe979e 669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
670{
671 UV val;
672 STRLEN retlen;
f7fe979e 673 const char *from = *s;
08ca2aa3 674 int bad = 0;
f7fe979e 675 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
f337b084 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
fe2774ed 688 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
f7fe979e 694 const char *ptr;
9e27e96a 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
f337b084 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
699 }
700 if (from > end) from = end;
701 }
a2a5de95
NC
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 704 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
f7fe979e 714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 715{
97aff369 716 dVAR;
08ca2aa3 717 STRLEN retlen;
0bcc34c2 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
f337b084 725 *s += retlen;
08ca2aa3
TH
726 return TRUE;
727}
78d46eaa 728
64844641 729STATIC char *
14333449 730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 731 const U8 * const end = start + len;
64844641 732
7918f24d
NC
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
f337b084 735 while (start < end) {
48fa4626
TS
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
742 }
743 start++;
744 }
64844641 745 return dest;
f337b084
TH
746}
747
230e1fce
NC
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
64844641
AL
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
f337b084
TH
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
3473cf63 761 if (utf8) glen *= UTF8_EXPAND; \
f337b084 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 763 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
f7fe979e 770 const STRLEN glen = (in_len); \
f337b084 771 STRLEN gl = glen; \
3473cf63 772 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
b162af07 775 SvCUR_set((cat), (cur) - (start)); \
0bd48802 776 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
f7fe979e 785 const U8 au8 = (byte); \
64844641 786 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
f7fe979e
AL
803static const char *_action( const tempsym_t* symptr )
804{
10edeb5d 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
806}
807
206947d2 808/* Returns the sizeof() struct described by pat */
028d1f6d 809STATIC I32
f337b084 810S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 811{
f337b084 812 I32 total = 0;
206947d2 813
7918f24d
NC
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
49704364 816 while (next_symbol(symptr)) {
f337b084 817 I32 len;
f7fe979e 818 int size;
f337b084
TH
819
820 switch (symptr->howlen) {
fc241834 821 case e_star:
49704364 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 823 _action( symptr ) );
49704364 824 break;
f337b084
TH
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
49704364
WL
829 }
830
a7a3cfaa 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 832 if (!size) {
f7fe979e 833 int star;
80a13697
NC
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 839 _action( symptr ) );
28be1210
TH
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
80a13697 844 case '@':
28be1210 845 case '.':
80a13697
NC
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 851 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 852 _action( symptr ) );
80a13697
NC
853 case '%':
854 size = 0;
855 break;
856 case '(':
fc241834
RGS
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
80a13697
NC
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
f7fe979e 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
80a13697
NC
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
78d46eaa 906
80a13697
NC
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
78d46eaa
NC
910 break;
911 }
206947d2
IZ
912 }
913 total += len * size;
914 }
915 return total;
916}
917
49704364
WL
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
f7fe979e
AL
922STATIC const char *
923S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 924{
7918f24d
NC
925 PERL_ARGS_ASSERT_GROUP_END;
926
49704364 927 while (patptr < patend) {
f7fe979e 928 const char c = *patptr++;
49704364
WL
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
18529408 942 }
49704364
WL
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
18529408
IZ
946}
947
49704364
WL
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
fc241834 952 */
f7fe979e
AL
953STATIC const char *
954S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
955{
956 I32 len = *patptr++ - '0';
7918f24d
NC
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
49704364
WL
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
f337b084 973S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 974{
f7fe979e 975 const char* patptr = symptr->patptr;
0bcc34c2 976 const char* const patend = symptr->patend;
49704364 977
7918f24d
NC
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
49704364
WL
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
fc241834 992 /* We should have found a template code */
49704364 993 I32 code = *patptr++ & 0xFF;
66c611c5 994 U32 inherited_modifiers = 0;
49704364
WL
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 1000 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
1001 }
1002 continue;
1003 }
fc241834 1004
49704364 1005 /* for '(', skip to ')' */
fc241834 1006 if (code == '(') {
49704364
WL
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 1009 _action( symptr ) );
49704364
WL
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 1014 _action( symptr ) );
49704364
WL
1015 }
1016
66c611c5
MHM
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1109a392
MHM
1023 /* look for modifiers */
1024 while (patptr < patend) {
b7787f18
AL
1025 const char *allowed;
1026 I32 modifier;
1109a392
MHM
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
7212898e 1030 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1031 break;
7212898e 1032#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1040 break;
f337b084 1041#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1042 default:
b7787f18
AL
1043 allowed = "";
1044 modifier = 0;
1109a392
MHM
1045 break;
1046 }
66c611c5 1047
1109a392
MHM
1048 if (modifier == 0)
1049 break;
66c611c5 1050
1109a392
MHM
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1053 allowed, _action( symptr ) );
66c611c5
MHM
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1061 *patptr, _action( symptr ) );
66c611c5 1062
a2a5de95
NC
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1109a392 1068 }
66c611c5 1069
1109a392
MHM
1070 code |= modifier;
1071 patptr++;
49704364
WL
1072 }
1073
66c611c5
MHM
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
fc241834 1077 /* look for count and/or / */
49704364
WL
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
f7fe979e 1088 const char* lenptr = ++patptr;
49704364
WL
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1096 _action( symptr ) );
49704364
WL
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
66c611c5 1120 if (*patptr == '/') {
49704364
WL
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
66c611c5
MHM
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1126 _action( symptr ) );
49704364
WL
1127 }
1128 break;
1129 }
18529408 1130 }
49704364
WL
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
fc241834 1138 symptr->patptr = patptr;
49704364 1139 return TRUE;
18529408 1140 }
49704364 1141 }
fc241834 1142 symptr->patptr = patptr;
49704364 1143 return FALSE;
18529408
IZ
1144}
1145
18529408 1146/*
fc241834 1147 There is no way to cleanly handle the case where we should process the
08ca2aa3 1148 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
fc241834 1155STATIC bool
08ca2aa3
TH
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
7918f24d
NC
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
08ca2aa3
TH
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
f7fe979e 1165 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
08ca2aa3
TH
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
f7fe979e 1182 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
7accc089
JH
1190=for apidoc unpackstring
1191
608d3aed
WL
1192The engine implementing unpack() Perl function. C<unpackstring> puts the
1193extracted list items on the stack and returns the number of elements.
1194Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1195
1196=cut */
1197
1198I32
f7fe979e 1199Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1200{
f7fe979e 1201 tempsym_t sym;
08ca2aa3 1202
7918f24d
NC
1203 PERL_ARGS_ASSERT_UNPACKSTRING;
1204
f337b084 1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
230e1fce 1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1211 SAVEFREEPV(s);
1212 strend = s + len;
f337b084 1213 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1214 }
1215
f337b084
TH
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1218
f7fe979e 1219 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1220
1221 return unpack_rec(&sym, s, s, strend, NULL );
1222}
1223
4136a0f7 1224STATIC I32
f7fe979e 1225S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1226{
27da23d5 1227 dVAR; dSP;
18529408 1228 SV *sv;
f7fe979e 1229 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1230 howlen_t howlen;
a6ec74c1 1231 I32 checksum = 0;
92d41999 1232 UV cuv = 0;
a6ec74c1 1233 NV cdouble = 0.0;
f337b084 1234 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1235 bool beyond = FALSE;
21c16052 1236 bool explicit_length;
9e27e96a 1237 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1238 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1239
1240 PERL_ARGS_ASSERT_UNPACK_REC;
1241
28be1210 1242 symptr->strbeg = s - strbeg;
49704364 1243
49704364 1244 while (next_symbol(symptr)) {
a7a3cfaa 1245 packprops_t props;
9e27e96a 1246 I32 len;
f337b084 1247 I32 datumtype = symptr->code;
206947d2 1248 /* do first one only unless in list context
08ca2aa3 1249 / is implemented by unpacking the count, then popping it from the
206947d2 1250 stack, so must check that we're not in the middle of a / */
49704364 1251 if ( unpack_only_one
206947d2 1252 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1253 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1254 break;
49704364 1255
f337b084 1256 switch (howlen = symptr->howlen) {
fc241834
RGS
1257 case e_star:
1258 len = strend - strbeg; /* long enough */
49704364 1259 break;
f337b084
TH
1260 default:
1261 /* e_no_len and e_number */
1262 len = symptr->length;
1263 break;
49704364 1264 }
18529408 1265
21c16052 1266 explicit_length = TRUE;
a6ec74c1 1267 redo_switch:
49704364 1268 beyond = s >= strend;
a7a3cfaa
TH
1269
1270 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1271 if (props) {
1272 /* props nonzero means we can process this letter. */
9e27e96a
AL
1273 const long size = props & PACK_SIZE_MASK;
1274 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1275 if (len > howmany)
1276 len = howmany;
1277
1278 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
78d46eaa
NC
1282 }
1283 }
a7a3cfaa 1284
1109a392 1285 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1286 default:
1109a392 1287 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1288
a6ec74c1 1289 case '%':
49704364 1290 if (howlen == e_no_len)
18529408 1291 len = 16; /* len is not specified */
a6ec74c1 1292 checksum = len;
92d41999 1293 cuv = 0;
a6ec74c1 1294 cdouble = 0;
18529408 1295 continue;
a6ec74c1 1296 break;
18529408
IZ
1297 case '(':
1298 {
49704364 1299 tempsym_t savsym = *symptr;
9e27e96a 1300 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1301 symptr->flags |= group_modifiers;
49704364 1302 symptr->patend = savsym.grpend;
28be1210 1303 symptr->previous = &savsym;
49704364 1304 symptr->level++;
18529408 1305 PUTBACK;
c6f750d1 1306 if (len && unpack_only_one) len = 1;
18529408 1307 while (len--) {
49704364 1308 symptr->patptr = savsym.grpbeg;
f337b084
TH
1309 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1310 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1311 unpack_rec(symptr, s, strbeg, strend, &s);
1312 if (s == strend && savsym.howlen == e_star)
49704364 1313 break; /* No way to continue */
18529408
IZ
1314 }
1315 SPAGAIN;
28be1210 1316 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1317 *symptr = savsym;
18529408
IZ
1318 break;
1319 }
28be1210
TH
1320#ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 case '.' | TYPE_IS_SHRIEKING:
1322#endif
1323 case '.': {
9e27e96a 1324 const char *from;
28be1210
TH
1325 SV *sv;
1326#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1327 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1328#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1329 const bool u8 = utf8;
28be1210
TH
1330#endif
1331 if (howlen == e_star) from = strbeg;
1332 else if (len <= 0) from = s;
1333 else {
1334 tempsym_t *group = symptr;
1335
1336 while (--len && group) group = group->previous;
1337 from = group ? strbeg + group->strbeg : strbeg;
1338 }
1339 sv = from <= s ?
00646304
CB
1340 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1342 mXPUSHs(sv);
28be1210
TH
1343 break;
1344 }
1345#ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 case '@' | TYPE_IS_SHRIEKING:
1347#endif
a6ec74c1 1348 case '@':
28be1210
TH
1349 s = strbeg + symptr->strbeg;
1350#ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1352#else /* PERL_PACK_CAN_SHRIEKSIGN */
1353 if (utf8)
1354#endif
1355 {
08ca2aa3
TH
1356 while (len > 0) {
1357 if (s >= strend)
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 s += UTF8SKIP(s);
1360 len--;
1361 }
1362 if (s > strend)
1363 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1364 } else {
28be1210 1365 if (strend-s < len)
fc241834 1366 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1367 s += len;
08ca2aa3 1368 }
a6ec74c1 1369 break;
62f95557
IZ
1370 case 'X' | TYPE_IS_SHRIEKING:
1371 if (!len) /* Avoid division by 0 */
1372 len = 1;
08ca2aa3 1373 if (utf8) {
f7fe979e 1374 const char *hop, *last;
f337b084
TH
1375 I32 l = len;
1376 hop = last = strbeg;
1377 while (hop < s) {
1378 hop += UTF8SKIP(hop);
1379 if (--l == 0) {
08ca2aa3 1380 last = hop;
f337b084
TH
1381 l = len;
1382 }
fc241834 1383 }
f337b084
TH
1384 if (last > s)
1385 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1386 s = last;
1387 break;
f337b084
TH
1388 }
1389 len = (s - strbeg) % len;
62f95557 1390 /* FALL THROUGH */
a6ec74c1 1391 case 'X':
08ca2aa3
TH
1392 if (utf8) {
1393 while (len > 0) {
1394 if (s <= strbeg)
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1396 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1397 if (s <= strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack");
1399 }
1400 len--;
1401 }
1402 } else {
fc241834
RGS
1403 if (len > s - strbeg)
1404 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1405 s -= len;
08ca2aa3 1406 }
a6ec74c1 1407 break;
9e27e96a
AL
1408 case 'x' | TYPE_IS_SHRIEKING: {
1409 I32 ai32;
62f95557
IZ
1410 if (!len) /* Avoid division by 0 */
1411 len = 1;
230e1fce
NC
1412 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1414 if (ai32 == 0) break;
1415 len -= ai32;
9e27e96a 1416 }
62f95557 1417 /* FALL THROUGH */
a6ec74c1 1418 case 'x':
08ca2aa3
TH
1419 if (utf8) {
1420 while (len>0) {
1421 if (s >= strend)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 s += UTF8SKIP(s);
1424 len--;
1425 }
1426 } else {
fc241834
RGS
1427 if (len > strend - s)
1428 Perl_croak(aTHX_ "'x' outside of string in unpack");
1429 s += len;
f337b084 1430 }
a6ec74c1
JH
1431 break;
1432 case '/':
49704364
WL
1433 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1434 break;
a6ec74c1
JH
1435 case 'A':
1436 case 'Z':
1437 case 'a':
08ca2aa3
TH
1438 if (checksum) {
1439 /* Preliminary length estimate is assumed done in 'W' */
1440 if (len > strend - s) len = strend - s;
1441 goto W_checksum;
1442 }
1443 if (utf8) {
1444 I32 l;
f7fe979e 1445 const char *hop;
08ca2aa3
TH
1446 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 if (hop >= strend) {
1448 if (hop > strend)
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 break;
fc241834 1451 }
a6ec74c1 1452 }
08ca2aa3
TH
1453 if (hop > strend)
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1455 len = hop - s;
1456 } else if (len > strend - s)
1457 len = strend - s;
1458
1459 if (datumtype == 'Z') {
1460 /* 'Z' strips stuff after first null */
f7fe979e 1461 const char *ptr, *end;
f337b084
TH
1462 end = s + len;
1463 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1464 sv = newSVpvn(s, ptr-s);
1465 if (howlen == e_star) /* exact for 'Z*' */
1466 len = ptr-s + (ptr != strend ? 1 : 0);
1467 } else if (datumtype == 'A') {
1468 /* 'A' strips both nulls and spaces */
f7fe979e 1469 const char *ptr;
18bdf90a
TH
1470 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1473 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1474 if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 else ptr++;
28be1210 1476 if (ptr > s+len)
18bdf90a
TH
1477 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1478 } else {
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 ptr++;
1482 }
08ca2aa3
TH
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1485
1486 if (utf8) {
1487 SvUTF8_on(sv);
1488 /* Undo any upgrade done due to need_utf8() */
f337b084 1489 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1490 sv_utf8_downgrade(sv, 0);
a6ec74c1 1491 }
6e449a3a 1492 mXPUSHs(sv);
08ca2aa3 1493 s += len;
a6ec74c1
JH
1494 break;
1495 case 'B':
08ca2aa3
TH
1496 case 'b': {
1497 char *str;
49704364 1498 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1499 len = (strend - s) * 8;
1500 if (checksum) {
f337b084 1501 if (utf8)
08ca2aa3 1502 while (len >= 8 && s < strend) {
f337b084 1503 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1504 len -= 8;
1505 }
f337b084 1506 else
fc241834 1507 while (len >= 8) {
08ca2aa3 1508 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1509 len -= 8;
1510 }
08ca2aa3
TH
1511 if (len && s < strend) {
1512 U8 bits;
f337b084
TH
1513 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 if (datumtype == 'b')
a6ec74c1 1515 while (len-- > 0) {
92d41999 1516 if (bits & 1) cuv++;
a6ec74c1
JH
1517 bits >>= 1;
1518 }
f337b084 1519 else
a6ec74c1 1520 while (len-- > 0) {
08ca2aa3 1521 if (bits & 0x80) cuv++;
a6ec74c1
JH
1522 bits <<= 1;
1523 }
fc241834 1524 }
a6ec74c1
JH
1525 break;
1526 }
08ca2aa3 1527
561b68a9 1528 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1529 SvPOK_on(sv);
1530 str = SvPVX(sv);
1531 if (datumtype == 'b') {
f337b084 1532 U8 bits = 0;
f7fe979e 1533 const I32 ai32 = len;
08ca2aa3
TH
1534 for (len = 0; len < ai32; len++) {
1535 if (len & 7) bits >>= 1;
1536 else if (utf8) {
1537 if (s >= strend) break;
f337b084 1538 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1539 } else bits = *(U8 *) s++;
1540 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1541 }
08ca2aa3 1542 } else {
f337b084 1543 U8 bits = 0;
f7fe979e 1544 const I32 ai32 = len;
08ca2aa3
TH
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits <<= 1;
1547 else if (utf8) {
1548 if (s >= strend) break;
f337b084 1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1552 }
1553 }
1554 *str = '\0';
aa07b2f6 1555 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1556 XPUSHs(sv);
a6ec74c1 1557 break;
08ca2aa3 1558 }
a6ec74c1 1559 case 'H':
08ca2aa3
TH
1560 case 'h': {
1561 char *str;
fc241834 1562 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1563 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1564 len = (strend - s) * 2;
561b68a9 1565 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1566 SvPOK_on(sv);
1567 str = SvPVX(sv);
1568 if (datumtype == 'h') {
f337b084 1569 U8 bits = 0;
9e27e96a 1570 I32 ai32 = len;
fc241834
RGS
1571 for (len = 0; len < ai32; len++) {
1572 if (len & 1) bits >>= 4;
1573 else if (utf8) {
1574 if (s >= strend) break;
f337b084 1575 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1576 } else bits = * (U8 *) s++;
a6ec74c1
JH
1577 *str++ = PL_hexdigit[bits & 15];
1578 }
08ca2aa3 1579 } else {
f337b084 1580 U8 bits = 0;
f7fe979e 1581 const I32 ai32 = len;
08ca2aa3
TH
1582 for (len = 0; len < ai32; len++) {
1583 if (len & 1) bits <<= 4;
1584 else if (utf8) {
1585 if (s >= strend) break;
f337b084 1586 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1587 } else bits = *(U8 *) s++;
a6ec74c1
JH
1588 *str++ = PL_hexdigit[(bits >> 4) & 15];
1589 }
1590 }
1591 *str = '\0';
aa07b2f6 1592 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1593 XPUSHs(sv);
a6ec74c1 1594 break;
08ca2aa3 1595 }
1651fc44
ML
1596 case 'C':
1597 if (len == 0) {
1598 if (explicit_length)
1599 /* Switch to "character" mode */
1600 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1601 break;
1602 }
1603 /* FALL THROUGH */
a6ec74c1 1604 case 'c':
1651fc44
ML
1605 while (len-- > 0 && s < strend) {
1606 int aint;
1607 if (utf8)
1608 {
1609 STRLEN retlen;
1610 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1611 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1612 if (retlen == (STRLEN) -1 || retlen == 0)
1613 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1614 s += retlen;
1615 }
1616 else
1617 aint = *(U8 *)(s)++;
1618 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1619 aint -= 256;
08ca2aa3 1620 if (!checksum)
6e449a3a 1621 mPUSHi(aint);
73cb7263
NC
1622 else if (checksum > bits_in_uv)
1623 cdouble += (NV)aint;
1624 else
1625 cuv += aint;
a6ec74c1
JH
1626 }
1627 break;
08ca2aa3
TH
1628 case 'W':
1629 W_checksum:
1651fc44 1630 if (utf8) {
08ca2aa3 1631 while (len-- > 0 && s < strend) {
08ca2aa3 1632 STRLEN retlen;
f7fe979e 1633 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1634 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1635 if (retlen == (STRLEN) -1 || retlen == 0)
1636 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1637 s += retlen;
1638 if (!checksum)
6e449a3a 1639 mPUSHu(val);
08ca2aa3
TH
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV) val;
d6d3e8bd 1642 else
08ca2aa3 1643 cuv += val;
fc241834 1644 }
08ca2aa3 1645 } else if (!checksum)
a6ec74c1 1646 while (len-- > 0) {
f7fe979e 1647 const U8 ch = *(U8 *) s++;
6e449a3a 1648 mPUSHu(ch);
a6ec74c1 1649 }
08ca2aa3
TH
1650 else if (checksum > bits_in_uv)
1651 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1652 else
1653 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1654 break;
1655 case 'U':
35bcd338 1656 if (len == 0) {
08ca2aa3
TH
1657 if (explicit_length) {
1658 /* Switch to "bytes in UTF-8" mode */
f337b084 1659 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1660 else
1661 /* Should be impossible due to the need_utf8() test */
1662 Perl_croak(aTHX_ "U0 mode on a byte string");
1663 }
35bcd338
JH
1664 break;
1665 }
08ca2aa3 1666 if (len > strend - s) len = strend - s;
fc241834 1667 if (!checksum) {
08ca2aa3
TH
1668 if (len && unpack_only_one) len = 1;
1669 EXTEND(SP, len);
1670 EXTEND_MORTAL(len);
fc241834 1671 }
08ca2aa3
TH
1672 while (len-- > 0 && s < strend) {
1673 STRLEN retlen;
1674 UV auv;
1675 if (utf8) {
1676 U8 result[UTF8_MAXLEN];
f7fe979e 1677 const char *ptr = s;
08ca2aa3 1678 STRLEN len;
08ca2aa3
TH
1679 /* Bug: warns about bad utf8 even if we are short on bytes
1680 and will break out of the loop */
230e1fce
NC
1681 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1682 'U'))
08ca2aa3
TH
1683 break;
1684 len = UTF8SKIP(result);
fc241834 1685 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1686 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1687 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1688 s = ptr;
1689 } else {
1690 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1691 if (retlen == (STRLEN) -1 || retlen == 0)
1692 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1693 s += retlen;
1694 }
1695 if (!checksum)
6e449a3a 1696 mPUSHu(auv);
73cb7263 1697 else if (checksum > bits_in_uv)
08ca2aa3 1698 cdouble += (NV) auv;
73cb7263 1699 else
08ca2aa3 1700 cuv += auv;
a6ec74c1
JH
1701 }
1702 break;
49704364
WL
1703 case 's' | TYPE_IS_SHRIEKING:
1704#if SHORTSIZE != SIZE16
73cb7263 1705 while (len-- > 0) {
08ca2aa3 1706 short ashort;
f337b084
TH
1707 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1708 DO_BO_UNPACK(ashort, s);
08ca2aa3 1709 if (!checksum)
6e449a3a 1710 mPUSHi(ashort);
73cb7263
NC
1711 else if (checksum > bits_in_uv)
1712 cdouble += (NV)ashort;
1713 else
1714 cuv += ashort;
49704364
WL
1715 }
1716 break;
1717#else
1718 /* Fallthrough! */
a6ec74c1 1719#endif
49704364 1720 case 's':
73cb7263 1721 while (len-- > 0) {
08ca2aa3
TH
1722 I16 ai16;
1723
1724#if U16SIZE > SIZE16
1725 ai16 = 0;
1726#endif
f337b084 1727 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1728 DO_BO_UNPACK(ai16, 16);
1109a392 1729#if U16SIZE > SIZE16
73cb7263
NC
1730 if (ai16 > 32767)
1731 ai16 -= 65536;
a6ec74c1 1732#endif
08ca2aa3 1733 if (!checksum)
6e449a3a 1734 mPUSHi(ai16);
73cb7263
NC
1735 else if (checksum > bits_in_uv)
1736 cdouble += (NV)ai16;
1737 else
1738 cuv += ai16;
a6ec74c1
JH
1739 }
1740 break;
49704364
WL
1741 case 'S' | TYPE_IS_SHRIEKING:
1742#if SHORTSIZE != SIZE16
73cb7263 1743 while (len-- > 0) {
08ca2aa3 1744 unsigned short aushort;
f337b084
TH
1745 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1746 DO_BO_UNPACK(aushort, s);
08ca2aa3 1747 if (!checksum)
6e449a3a 1748 mPUSHu(aushort);
73cb7263
NC
1749 else if (checksum > bits_in_uv)
1750 cdouble += (NV)aushort;
1751 else
1752 cuv += aushort;
49704364
WL
1753 }
1754 break;
1755#else
1756 /* Fallhrough! */
1757#endif
a6ec74c1
JH
1758 case 'v':
1759 case 'n':
1760 case 'S':
73cb7263 1761 while (len-- > 0) {
08ca2aa3
TH
1762 U16 au16;
1763#if U16SIZE > SIZE16
1764 au16 = 0;
1765#endif
f337b084 1766 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1767 DO_BO_UNPACK(au16, 16);
a6ec74c1 1768#ifdef HAS_NTOHS
73cb7263
NC
1769 if (datumtype == 'n')
1770 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1771#endif
1772#ifdef HAS_VTOHS
73cb7263
NC
1773 if (datumtype == 'v')
1774 au16 = vtohs(au16);
a6ec74c1 1775#endif
08ca2aa3 1776 if (!checksum)
6e449a3a 1777 mPUSHu(au16);
73cb7263 1778 else if (checksum > bits_in_uv)
f337b084 1779 cdouble += (NV) au16;
73cb7263
NC
1780 else
1781 cuv += au16;
a6ec74c1
JH
1782 }
1783 break;
7212898e 1784#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1785 case 'v' | TYPE_IS_SHRIEKING:
1786 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1787 while (len-- > 0) {
08ca2aa3
TH
1788 I16 ai16;
1789# if U16SIZE > SIZE16
1790 ai16 = 0;
1791# endif
f337b084 1792 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1793# ifdef HAS_NTOHS
73cb7263 1794 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1795 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1796# endif /* HAS_NTOHS */
1797# ifdef HAS_VTOHS
73cb7263 1798 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1799 ai16 = (I16) vtohs((U16) ai16);
1800# endif /* HAS_VTOHS */
1801 if (!checksum)
6e449a3a 1802 mPUSHi(ai16);
73cb7263 1803 else if (checksum > bits_in_uv)
08ca2aa3 1804 cdouble += (NV) ai16;
73cb7263
NC
1805 else
1806 cuv += ai16;
068bd2e7
MHM
1807 }
1808 break;
08ca2aa3 1809#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1810 case 'i':
49704364 1811 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1812 while (len-- > 0) {
08ca2aa3 1813 int aint;
f337b084
TH
1814 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1815 DO_BO_UNPACK(aint, i);
08ca2aa3 1816 if (!checksum)
6e449a3a 1817 mPUSHi(aint);
73cb7263
NC
1818 else if (checksum > bits_in_uv)
1819 cdouble += (NV)aint;
1820 else
1821 cuv += aint;
a6ec74c1
JH
1822 }
1823 break;
1824 case 'I':
49704364 1825 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1826 while (len-- > 0) {
08ca2aa3 1827 unsigned int auint;
f337b084
TH
1828 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1829 DO_BO_UNPACK(auint, i);
08ca2aa3 1830 if (!checksum)
6e449a3a 1831 mPUSHu(auint);
73cb7263
NC
1832 else if (checksum > bits_in_uv)
1833 cdouble += (NV)auint;
1834 else
1835 cuv += auint;
a6ec74c1
JH
1836 }
1837 break;
92d41999 1838 case 'j':
73cb7263 1839 while (len-- > 0) {
08ca2aa3 1840 IV aiv;
f337b084 1841 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1842#if IVSIZE == INTSIZE
f337b084 1843 DO_BO_UNPACK(aiv, i);
1109a392 1844#elif IVSIZE == LONGSIZE
f337b084 1845 DO_BO_UNPACK(aiv, l);
1109a392 1846#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1847 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1848#else
1849 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1850#endif
08ca2aa3 1851 if (!checksum)
6e449a3a 1852 mPUSHi(aiv);
73cb7263
NC
1853 else if (checksum > bits_in_uv)
1854 cdouble += (NV)aiv;
1855 else
1856 cuv += aiv;
92d41999
JH
1857 }
1858 break;
1859 case 'J':
73cb7263 1860 while (len-- > 0) {
08ca2aa3 1861 UV auv;
f337b084 1862 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1863#if IVSIZE == INTSIZE
f337b084 1864 DO_BO_UNPACK(auv, i);
08ca2aa3 1865#elif IVSIZE == LONGSIZE
f337b084 1866 DO_BO_UNPACK(auv, l);
08ca2aa3 1867#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1868 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1869#else
1870 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1871#endif
08ca2aa3 1872 if (!checksum)
6e449a3a 1873 mPUSHu(auv);
73cb7263
NC
1874 else if (checksum > bits_in_uv)
1875 cdouble += (NV)auv;
1876 else
1877 cuv += auv;
92d41999
JH
1878 }
1879 break;
49704364
WL
1880 case 'l' | TYPE_IS_SHRIEKING:
1881#if LONGSIZE != SIZE32
73cb7263 1882 while (len-- > 0) {
08ca2aa3 1883 long along;
f337b084
TH
1884 SHIFT_VAR(utf8, s, strend, along, datumtype);
1885 DO_BO_UNPACK(along, l);
08ca2aa3 1886 if (!checksum)
6e449a3a 1887 mPUSHi(along);
73cb7263
NC
1888 else if (checksum > bits_in_uv)
1889 cdouble += (NV)along;
1890 else
1891 cuv += along;
49704364
WL
1892 }
1893 break;
1894#else
1895 /* Fallthrough! */
a6ec74c1 1896#endif
49704364 1897 case 'l':
73cb7263 1898 while (len-- > 0) {
08ca2aa3
TH
1899 I32 ai32;
1900#if U32SIZE > SIZE32
1901 ai32 = 0;
1902#endif
f337b084 1903 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1904 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1905#if U32SIZE > SIZE32
08ca2aa3 1906 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1907#endif
08ca2aa3 1908 if (!checksum)
6e449a3a 1909 mPUSHi(ai32);
73cb7263
NC
1910 else if (checksum > bits_in_uv)
1911 cdouble += (NV)ai32;
1912 else
1913 cuv += ai32;
a6ec74c1
JH
1914 }
1915 break;
49704364
WL
1916 case 'L' | TYPE_IS_SHRIEKING:
1917#if LONGSIZE != SIZE32
73cb7263 1918 while (len-- > 0) {
08ca2aa3 1919 unsigned long aulong;
f337b084
TH
1920 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1921 DO_BO_UNPACK(aulong, l);
08ca2aa3 1922 if (!checksum)
6e449a3a 1923 mPUSHu(aulong);
73cb7263
NC
1924 else if (checksum > bits_in_uv)
1925 cdouble += (NV)aulong;
1926 else
1927 cuv += aulong;
49704364
WL
1928 }
1929 break;
1930#else
1931 /* Fall through! */
1932#endif
a6ec74c1
JH
1933 case 'V':
1934 case 'N':
1935 case 'L':
73cb7263 1936 while (len-- > 0) {
08ca2aa3
TH
1937 U32 au32;
1938#if U32SIZE > SIZE32
1939 au32 = 0;
1940#endif
f337b084 1941 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1942 DO_BO_UNPACK(au32, 32);
a6ec74c1 1943#ifdef HAS_NTOHL
73cb7263
NC
1944 if (datumtype == 'N')
1945 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1946#endif
1947#ifdef HAS_VTOHL
73cb7263
NC
1948 if (datumtype == 'V')
1949 au32 = vtohl(au32);
a6ec74c1 1950#endif
08ca2aa3 1951 if (!checksum)
6e449a3a 1952 mPUSHu(au32);
fc241834
RGS
1953 else if (checksum > bits_in_uv)
1954 cdouble += (NV)au32;
1955 else
1956 cuv += au32;
a6ec74c1
JH
1957 }
1958 break;
7212898e 1959#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1960 case 'V' | TYPE_IS_SHRIEKING:
1961 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1962 while (len-- > 0) {
08ca2aa3
TH
1963 I32 ai32;
1964# if U32SIZE > SIZE32
1965 ai32 = 0;
1966# endif
f337b084 1967 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1968# ifdef HAS_NTOHL
73cb7263
NC
1969 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1970 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1971# endif
1972# ifdef HAS_VTOHL
73cb7263
NC
1973 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1974 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1975# endif
1976 if (!checksum)
6e449a3a 1977 mPUSHi(ai32);
73cb7263
NC
1978 else if (checksum > bits_in_uv)
1979 cdouble += (NV)ai32;
1980 else
1981 cuv += ai32;
068bd2e7
MHM
1982 }
1983 break;
08ca2aa3 1984#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1985 case 'p':
a6ec74c1 1986 while (len-- > 0) {
f7fe979e 1987 const char *aptr;
f337b084 1988 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1989 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1990 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1991 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1992 }
1993 break;
1994 case 'w':
a6ec74c1
JH
1995 {
1996 UV auv = 0;
1997 U32 bytes = 0;
fc241834 1998
08ca2aa3
TH
1999 while (len > 0 && s < strend) {
2000 U8 ch;
f337b084 2001 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2002 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2003 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2004 if (ch < 0x80) {
a6ec74c1 2005 bytes = 0;
6e449a3a 2006 mPUSHu(auv);
a6ec74c1
JH
2007 len--;
2008 auv = 0;
08ca2aa3 2009 continue;
a6ec74c1 2010 }
08ca2aa3 2011 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2012 const char *t;
a6ec74c1 2013
f5992bc4 2014 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2015 while (s < strend) {
f337b084 2016 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2017 sv = mul128(sv, (U8)(ch & 0x7f));
2018 if (!(ch & 0x80)) {
a6ec74c1
JH
2019 bytes = 0;
2020 break;
2021 }
2022 }
10516c54 2023 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2024 while (*t == '0')
2025 t++;
2026 sv_chop(sv, t);
6e449a3a 2027 mPUSHs(sv);
a6ec74c1
JH
2028 len--;
2029 auv = 0;
2030 }
2031 }
2032 if ((s >= strend) && bytes)
49704364 2033 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2034 }
2035 break;
2036 case 'P':
49704364
WL
2037 if (symptr->howlen == e_star)
2038 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2039 EXTEND(SP, 1);
2d3e0934 2040 if (s + sizeof(char*) <= strend) {
08ca2aa3 2041 char *aptr;
f337b084 2042 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2043 DO_BO_UNPACK_PC(aptr);
fc241834 2044 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2045 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2046 }
a6ec74c1
JH
2047 break;
2048#ifdef HAS_QUAD
2049 case 'q':
73cb7263 2050 while (len-- > 0) {
08ca2aa3 2051 Quad_t aquad;
f337b084
TH
2052 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2053 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2054 if (!checksum)
6e449a3a
MHM
2055 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2056 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2057 else if (checksum > bits_in_uv)
2058 cdouble += (NV)aquad;
2059 else
2060 cuv += aquad;
2061 }
a6ec74c1
JH
2062 break;
2063 case 'Q':
73cb7263 2064 while (len-- > 0) {
08ca2aa3 2065 Uquad_t auquad;
f337b084
TH
2066 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2067 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2068 if (!checksum)
6e449a3a
MHM
2069 mPUSHs(auquad <= UV_MAX ?
2070 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2071 else if (checksum > bits_in_uv)
2072 cdouble += (NV)auquad;
2073 else
2074 cuv += auquad;
a6ec74c1
JH
2075 }
2076 break;
08ca2aa3 2077#endif /* HAS_QUAD */
a6ec74c1
JH
2078 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2079 case 'f':
73cb7263 2080 while (len-- > 0) {
08ca2aa3 2081 float afloat;
f337b084 2082 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2083 DO_BO_UNPACK_N(afloat, float);
2084 if (!checksum)
6e449a3a 2085 mPUSHn(afloat);
08ca2aa3 2086 else
73cb7263 2087 cdouble += afloat;
fc241834 2088 }
a6ec74c1
JH
2089 break;
2090 case 'd':
73cb7263 2091 while (len-- > 0) {
08ca2aa3 2092 double adouble;
f337b084 2093 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2094 DO_BO_UNPACK_N(adouble, double);
2095 if (!checksum)
6e449a3a 2096 mPUSHn(adouble);
08ca2aa3 2097 else
73cb7263 2098 cdouble += adouble;
fc241834 2099 }
a6ec74c1 2100 break;
92d41999 2101 case 'F':
73cb7263 2102 while (len-- > 0) {
275663fa
TC
2103 NV_bytes anv;
2104 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2105 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 2106 if (!checksum)
275663fa 2107 mPUSHn(anv.nv);
08ca2aa3 2108 else
275663fa 2109 cdouble += anv.nv;
fc241834 2110 }
92d41999
JH
2111 break;
2112#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2113 case 'D':
73cb7263 2114 while (len-- > 0) {
275663fa
TC
2115 ld_bytes aldouble;
2116 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2117 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 2118 if (!checksum)
275663fa 2119 mPUSHn(aldouble.ld);
08ca2aa3 2120 else
275663fa 2121 cdouble += aldouble.ld;
92d41999
JH
2122 }
2123 break;
2124#endif
a6ec74c1 2125 case 'u':
08ca2aa3 2126 {
f7fe979e 2127 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2128 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2129 if (l) SvPOK_on(sv);
2130 }
2131 if (utf8) {
2132 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2133 I32 a, b, c, d;
db187877 2134 char hunk[3];
08ca2aa3 2135
08ca2aa3
TH
2136 while (len > 0) {
2137 next_uni_uu(aTHX_ &s, strend, &a);
2138 next_uni_uu(aTHX_ &s, strend, &b);
2139 next_uni_uu(aTHX_ &s, strend, &c);
2140 next_uni_uu(aTHX_ &s, strend, &d);
2141 hunk[0] = (char)((a << 2) | (b >> 4));
2142 hunk[1] = (char)((b << 4) | (c >> 2));
2143 hunk[2] = (char)((c << 6) | d);
2144 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2145 len -= 3;
2146 }
2147 if (s < strend) {
f7fe979e
AL
2148 if (*s == '\n') {
2149 s++;
2150 }
08ca2aa3
TH
2151 else {
2152 /* possible checksum byte */
f7fe979e
AL
2153 const char *skip = s+UTF8SKIP(s);
2154 if (skip < strend && *skip == '\n')
2155 s = skip+1;
08ca2aa3
TH
2156 }
2157 }
2158 }
2159 } else {
fc241834
RGS
2160 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2161 I32 a, b, c, d;
db187877 2162 char hunk[3];
a6ec74c1 2163
fc241834
RGS
2164 len = PL_uudmap[*(U8*)s++] & 077;
2165 while (len > 0) {
2166 if (s < strend && ISUUCHAR(*s))
2167 a = PL_uudmap[*(U8*)s++] & 077;
2168 else
2169 a = 0;
2170 if (s < strend && ISUUCHAR(*s))
2171 b = PL_uudmap[*(U8*)s++] & 077;
2172 else
2173 b = 0;
2174 if (s < strend && ISUUCHAR(*s))
2175 c = PL_uudmap[*(U8*)s++] & 077;
2176 else
2177 c = 0;
2178 if (s < strend && ISUUCHAR(*s))
2179 d = PL_uudmap[*(U8*)s++] & 077;
2180 else
2181 d = 0;
2182 hunk[0] = (char)((a << 2) | (b >> 4));
2183 hunk[1] = (char)((b << 4) | (c >> 2));
2184 hunk[2] = (char)((c << 6) | d);
2185 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2186 len -= 3;
2187 }
2188 if (*s == '\n')
2189 s++;
2190 else /* possible checksum byte */
2191 if (s + 1 < strend && s[1] == '\n')
2192 s += 2;
a6ec74c1 2193 }
08ca2aa3
TH
2194 }
2195 XPUSHs(sv);
a6ec74c1
JH
2196 break;
2197 }
49704364 2198
a6ec74c1 2199 if (checksum) {
1109a392 2200 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2201 (checksum > bits_in_uv &&
08ca2aa3
TH
2202 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2203 NV trouble, anv;
a6ec74c1 2204
08ca2aa3 2205 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2206 while (checksum >= 16) {
2207 checksum -= 16;
08ca2aa3 2208 anv *= 65536.0;
a6ec74c1 2209 }
a6ec74c1 2210 while (cdouble < 0.0)
08ca2aa3
TH
2211 cdouble += anv;
2212 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2213 sv = newSVnv(cdouble);
a6ec74c1
JH
2214 }
2215 else {
fa8ec7c1
NC
2216 if (checksum < bits_in_uv) {
2217 UV mask = ((UV)1 << checksum) - 1;
92d41999 2218 cuv &= mask;
a6ec74c1 2219 }
c4c5f44a 2220 sv = newSVuv(cuv);
a6ec74c1 2221 }
6e449a3a 2222 mXPUSHs(sv);
a6ec74c1
JH
2223 checksum = 0;
2224 }
fc241834 2225
49704364
WL
2226 if (symptr->flags & FLAG_SLASH){
2227 if (SP - PL_stack_base - start_sp_offset <= 0)
2228 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2229 if( next_symbol(symptr) ){
2230 if( symptr->howlen == e_number )
2231 Perl_croak(aTHX_ "Count after length/code in unpack" );
2232 if( beyond ){
2233 /* ...end of char buffer then no decent length available */
2234 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2235 } else {
2236 /* take top of stack (hope it's numeric) */
2237 len = POPi;
2238 if( len < 0 )
2239 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2240 }
2241 } else {
2242 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2243 }
2244 datumtype = symptr->code;
21c16052 2245 explicit_length = FALSE;
49704364
WL
2246 goto redo_switch;
2247 }
a6ec74c1 2248 }
49704364 2249
18529408
IZ
2250 if (new_s)
2251 *new_s = s;
2252 PUTBACK;
2253 return SP - PL_stack_base - start_sp_offset;
2254}
2255
2256PP(pp_unpack)
2257{
97aff369 2258 dVAR;
18529408 2259 dSP;
bab9c0ac 2260 dPOPPOPssrl;
18529408
IZ
2261 I32 gimme = GIMME_V;
2262 STRLEN llen;
2263 STRLEN rlen;
5c144d81
NC
2264 const char *pat = SvPV_const(left, llen);
2265 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2266 const char *strend = s + rlen;
2267 const char *patend = pat + llen;
08ca2aa3 2268 I32 cnt;
18529408
IZ
2269
2270 PUTBACK;
7accc089 2271 cnt = unpackstring(pat, patend, s, strend,
49704364 2272 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2273 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2274
18529408
IZ
2275 SPAGAIN;
2276 if ( !cnt && gimme == G_SCALAR )
2277 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2278 RETURN;
2279}
2280
f337b084 2281STATIC U8 *
f7fe979e 2282doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2283{
f337b084 2284 *h++ = PL_uuemap[len];
a6ec74c1 2285 while (len > 2) {
f337b084
TH
2286 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2287 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2288 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2289 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2290 s += 3;
2291 len -= 3;
2292 }
2293 if (len > 0) {
f7fe979e 2294 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2295 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2297 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2298 *h++ = PL_uuemap[0];
a6ec74c1 2299 }
f337b084
TH
2300 *h++ = '\n';
2301 return h;
a6ec74c1
JH
2302}
2303
2304STATIC SV *
f7fe979e 2305S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2306{
8b6e33c7
AL
2307 SV *result = newSVpvn(s, l);
2308 char *const result_c = SvPV_nolen(result); /* convenience */
2309 char *out = result_c;
2310 bool skip = 1;
2311 bool ignore = 0;
a6ec74c1 2312
7918f24d
NC
2313 PERL_ARGS_ASSERT_IS_AN_INT;
2314
a6ec74c1
JH
2315 while (*s) {
2316 switch (*s) {
2317 case ' ':
2318 break;
2319 case '+':
2320 if (!skip) {
2321 SvREFCNT_dec(result);
2322 return (NULL);
2323 }
2324 break;
2325 case '0':
2326 case '1':
2327 case '2':
2328 case '3':
2329 case '4':
2330 case '5':
2331 case '6':
2332 case '7':
2333 case '8':
2334 case '9':
2335 skip = 0;
2336 if (!ignore) {
2337 *(out++) = *s;
2338 }
2339 break;
2340 case '.':
2341 ignore = 1;
2342 break;
2343 default:
2344 SvREFCNT_dec(result);
2345 return (NULL);
2346 }
2347 s++;
2348 }
2349 *(out++) = '\0';
2350 SvCUR_set(result, out - result_c);
2351 return (result);
2352}
2353
2354/* pnum must be '\0' terminated */
2355STATIC int
2356S_div128(pTHX_ SV *pnum, bool *done)
2357{
8b6e33c7
AL
2358 STRLEN len;
2359 char * const s = SvPV(pnum, len);
2360 char *t = s;
2361 int m = 0;
2362
7918f24d
NC
2363 PERL_ARGS_ASSERT_DIV128;
2364
8b6e33c7
AL
2365 *done = 1;
2366 while (*t) {
2367 const int i = m * 10 + (*t - '0');
2368 const int r = (i >> 7); /* r < 10 */
2369 m = i & 0x7F;
2370 if (r) {
2371 *done = 0;
2372 }
2373 *(t++) = '0' + r;
a6ec74c1 2374 }
8b6e33c7
AL
2375 *(t++) = '\0';
2376 SvCUR_set(pnum, (STRLEN) (t - s));
2377 return (m);
a6ec74c1
JH
2378}
2379
18529408 2380/*
7accc089
JH
2381=for apidoc packlist
2382
2383The engine implementing pack() Perl function.
2384
bfce84ec
AL
2385=cut
2386*/
7accc089
JH
2387
2388void
f7fe979e 2389Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2390{
97aff369 2391 dVAR;
aadb217d
JH
2392 tempsym_t sym;
2393
7918f24d
NC
2394 PERL_ARGS_ASSERT_PACKLIST;
2395
f7fe979e 2396 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2397
f337b084
TH
2398 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2399 Also make sure any UTF8 flag is loaded */
56eb0262 2400 SvPV_force_nolen(cat);
bfce84ec
AL
2401 if (DO_UTF8(cat))
2402 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2403
49704364
WL
2404 (void)pack_rec( cat, &sym, beglist, endlist );
2405}
2406
f337b084
TH
2407/* like sv_utf8_upgrade, but also repoint the group start markers */
2408STATIC void
2409marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2410 STRLEN len;
2411 tempsym_t *group;
f7fe979e
AL
2412 const char *from_ptr, *from_start, *from_end, **marks, **m;
2413 char *to_start, *to_ptr;
f337b084
TH
2414
2415 if (SvUTF8(sv)) return;
2416
aa07b2f6 2417 from_start = SvPVX_const(sv);
f337b084
TH
2418 from_end = from_start + SvCUR(sv);
2419 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2420 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2421 if (from_ptr == from_end) {
2422 /* Simple case: no character needs to be changed */
2423 SvUTF8_on(sv);
2424 return;
2425 }
2426
3473cf63 2427 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2428 Newx(to_start, len, char);
f337b084
TH
2429 Copy(from_start, to_start, from_ptr-from_start, char);
2430 to_ptr = to_start + (from_ptr-from_start);
2431
a02a5408 2432 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2433 for (group=sym_ptr; group; group = group->previous)
2434 marks[group->level] = from_start + group->strbeg;
2435 marks[sym_ptr->level+1] = from_end+1;
2436 for (m = marks; *m < from_ptr; m++)
2437 *m = to_start + (*m-from_start);
2438
2439 for (;from_ptr < from_end; from_ptr++) {
2440 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2441 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2442 }
2443 *to_ptr = 0;
2444
2445 while (*m == from_ptr) *m++ = to_ptr;
2446 if (m != marks + sym_ptr->level+1) {
2447 Safefree(marks);
2448 Safefree(to_start);
5f5991a0 2449 Perl_croak(aTHX_ "panic: marks beyond string end");
f337b084
TH
2450 }
2451 for (group=sym_ptr; group; group = group->previous)
2452 group->strbeg = marks[group->level] - to_start;
2453 Safefree(marks);
2454
2455 if (SvOOK(sv)) {
2456 if (SvIVX(sv)) {
b162af07 2457 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2458 from_start -= SvIVX(sv);
2459 SvIV_set(sv, 0);
2460 }
2461 SvFLAGS(sv) &= ~SVf_OOK;
2462 }
2463 if (SvLEN(sv) != 0)
2464 Safefree(from_start);
f880fe2f 2465 SvPV_set(sv, to_start);
b162af07
SP
2466 SvCUR_set(sv, to_ptr - to_start);
2467 SvLEN_set(sv, len);
f337b084
TH
2468 SvUTF8_on(sv);
2469}
2470
2471/* Exponential string grower. Makes string extension effectively O(n)
2472 needed says how many extra bytes we need (not counting the final '\0')
2473 Only grows the string if there is an actual lack of space
2474*/
2475STATIC char *
0bd48802 2476S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2477 const STRLEN cur = SvCUR(sv);
2478 const STRLEN len = SvLEN(sv);
f337b084 2479 STRLEN extend;
7918f24d
NC
2480
2481 PERL_ARGS_ASSERT_SV_EXP_GROW;
2482
f337b084
TH
2483 if (len - cur > needed) return SvPVX(sv);
2484 extend = needed > len ? needed : len;
2485 return SvGROW(sv, len+extend+1);
2486}
49704364
WL
2487
2488STATIC
2489SV **
f337b084 2490S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2491{
97aff369 2492 dVAR;
49704364 2493 tempsym_t lookahead;
f337b084
TH
2494 I32 items = endlist - beglist;
2495 bool found = next_symbol(symptr);
2496 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2497 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2498
7918f24d
NC
2499 PERL_ARGS_ASSERT_PACK_REC;
2500
f337b084
TH
2501 if (symptr->level == 0 && found && symptr->code == 'U') {
2502 marked_upgrade(aTHX_ cat, symptr);
2503 symptr->flags |= FLAG_DO_UTF8;
2504 utf8 = 0;
49704364 2505 }
f337b084 2506 symptr->strbeg = SvCUR(cat);
49704364
WL
2507
2508 while (found) {
f337b084
TH
2509 SV *fromstr;
2510 STRLEN fromlen;
2511 I32 len;
a0714e2c 2512 SV *lengthcode = NULL;
49704364 2513 I32 datumtype = symptr->code;
f337b084
TH
2514 howlen_t howlen = symptr->howlen;
2515 char *start = SvPVX(cat);
2516 char *cur = start + SvCUR(cat);
49704364 2517
f337b084
TH
2518#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2519
2520 switch (howlen) {
fc241834 2521 case e_star:
f337b084
TH
2522 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2523 0 : items;
2524 break;
2525 default:
2526 /* e_no_len and e_number */
2527 len = symptr->length;
49704364
WL
2528 break;
2529 }
2530
f337b084 2531 if (len) {
a7a3cfaa 2532 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2533
a7a3cfaa
TH
2534 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2535 /* We can process this letter. */
2536 STRLEN size = props & PACK_SIZE_MASK;
2537 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2538 }
f337b084
TH
2539 }
2540
49704364
WL
2541 /* Look ahead for next symbol. Do we have code/code? */
2542 lookahead = *symptr;
2543 found = next_symbol(&lookahead);
246f24af
TH
2544 if (symptr->flags & FLAG_SLASH) {
2545 IV count;
f337b084 2546 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2547 if (strchr("aAZ", lookahead.code)) {
2548 if (lookahead.howlen == e_number) count = lookahead.length;
2549 else {
ce399ba6
NC
2550 if (items > 0) {
2551 if (SvGAMAGIC(*beglist)) {
2552 /* Avoid reading the active data more than once
2553 by copying it to a temporary. */
2554 STRLEN len;
2555 const char *const pv = SvPV_const(*beglist, len);
740cce10 2556 SV *const temp
59cd0e26
NC
2557 = newSVpvn_flags(pv, len,
2558 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2559 *beglist = temp;
2560 }
246f24af
TH
2561 count = DO_UTF8(*beglist) ?
2562 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2563 }
246f24af
TH
2564 else count = 0;
2565 if (lookahead.code == 'Z') count++;
2566 }
2567 } else {
2568 if (lookahead.howlen == e_number && lookahead.length < items)
2569 count = lookahead.length;
2570 else count = items;
2571 }
2572 lookahead.howlen = e_number;
2573 lookahead.length = count;
2574 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2575 }
49704364 2576
fc241834
RGS
2577 /* Code inside the switch must take care to properly update
2578 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2579 doesn't simply leave using break */
1109a392 2580 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2581 default:
f337b084
TH
2582 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2583 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2584 case '%':
49704364 2585 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2586 {
2587 char *from;
2588#ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 case '.' | TYPE_IS_SHRIEKING:
2590#endif
2591 case '.':
2592 if (howlen == e_star) from = start;
2593 else if (len == 0) from = cur;
2594 else {
2595 tempsym_t *group = symptr;
2596
2597 while (--len && group) group = group->previous;
2598 from = group ? start + group->strbeg : start;
2599 }
2600 fromstr = NEXTFROM;
2601 len = SvIV(fromstr);
2602 goto resize;
2603#ifdef PERL_PACK_CAN_SHRIEKSIGN
2604 case '@' | TYPE_IS_SHRIEKING:
2605#endif
a6ec74c1 2606 case '@':
28be1210
TH
2607 from = start + symptr->strbeg;
2608 resize:
2609#ifdef PERL_PACK_CAN_SHRIEKSIGN
2610 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2611#else /* PERL_PACK_CAN_SHRIEKSIGN */
2612 if (utf8)
2613#endif
2614 if (len >= 0) {
2615 while (len && from < cur) {
2616 from += UTF8SKIP(from);
2617 len--;
2618 }
2619 if (from > cur)
2620 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2621 if (len) {
2622 /* Here we know from == cur */
2623 grow:
2624 GROWING(0, cat, start, cur, len);
2625 Zero(cur, len, char);
2626 cur += len;
2627 } else if (from < cur) {
2628 len = cur - from;
2629 goto shrink;
2630 } else goto no_change;
2631 } else {
2632 cur = from;
2633 len = -len;
2634 goto utf8_shrink;
f337b084 2635 }
28be1210
TH
2636 else {
2637 len -= cur - from;
f337b084 2638 if (len > 0) goto grow;
28be1210 2639 if (len == 0) goto no_change;
fc241834 2640 len = -len;
28be1210 2641 goto shrink;
f337b084 2642 }
a6ec74c1 2643 break;
28be1210 2644 }
fc241834 2645 case '(': {
49704364 2646 tempsym_t savsym = *symptr;
66c611c5
MHM
2647 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2648 symptr->flags |= group_modifiers;
49704364
WL
2649 symptr->patend = savsym.grpend;
2650 symptr->level++;
f337b084 2651 symptr->previous = &lookahead;
18529408 2652 while (len--) {
f337b084
TH
2653 U32 was_utf8;
2654 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2655 else symptr->flags &= ~FLAG_PARSE_UTF8;
2656 was_utf8 = SvUTF8(cat);
49704364 2657 symptr->patptr = savsym.grpbeg;
f337b084
TH
2658 beglist = pack_rec(cat, symptr, beglist, endlist);
2659 if (SvUTF8(cat) != was_utf8)
2660 /* This had better be an upgrade while in utf8==0 mode */
2661 utf8 = 1;
2662
49704364 2663 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2664 break; /* No way to continue */
2665 }
ee790063 2666 items = endlist - beglist;
f337b084
TH
2667 lookahead.flags = symptr->flags & ~group_modifiers;
2668 goto no_change;
18529408 2669 }
62f95557
IZ
2670 case 'X' | TYPE_IS_SHRIEKING:
2671 if (!len) /* Avoid division by 0 */
2672 len = 1;
f337b084
TH
2673 if (utf8) {
2674 char *hop, *last;
2675 I32 l = len;
2676 hop = last = start;
2677 while (hop < cur) {
2678 hop += UTF8SKIP(hop);
2679 if (--l == 0) {
2680 last = hop;
2681 l = len;
2682 }
2683 }
2684 if (last > cur)
2685 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2686 cur = last;
2687 break;
2688 }
2689 len = (cur-start) % len;
62f95557 2690 /* FALL THROUGH */
a6ec74c1 2691 case 'X':
f337b084
TH
2692 if (utf8) {
2693 if (len < 1) goto no_change;
28be1210 2694 utf8_shrink:
f337b084
TH
2695 while (len > 0) {
2696 if (cur <= start)
28be1210
TH
2697 Perl_croak(aTHX_ "'%c' outside of string in pack",
2698 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2699 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2700 if (cur <= start)
28be1210
TH
2701 Perl_croak(aTHX_ "'%c' outside of string in pack",
2702 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2703 }
2704 len--;
2705 }
2706 } else {
fc241834 2707 shrink:
f337b084 2708 if (cur - start < len)
28be1210
TH
2709 Perl_croak(aTHX_ "'%c' outside of string in pack",
2710 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2711 cur -= len;
2712 }
2713 if (cur < start+symptr->strbeg) {
2714 /* Make sure group starts don't point into the void */
2715 tempsym_t *group;
9e27e96a 2716 const STRLEN length = cur-start;
f337b084
TH
2717 for (group = symptr;
2718 group && length < group->strbeg;
2719 group = group->previous) group->strbeg = length;
2720 lookahead.strbeg = length;
2721 }
a6ec74c1 2722 break;
fc241834
RGS
2723 case 'x' | TYPE_IS_SHRIEKING: {
2724 I32 ai32;
62f95557
IZ
2725 if (!len) /* Avoid division by 0 */
2726 len = 1;
230e1fce 2727 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2728 else ai32 = (cur - start) % len;
2729 if (ai32 == 0) goto no_change;
2730 len -= ai32;
2731 }
2732 /* FALL THROUGH */
a6ec74c1 2733 case 'x':
f337b084 2734 goto grow;
a6ec74c1
JH
2735 case 'A':
2736 case 'Z':
f337b084 2737 case 'a': {
f7fe979e 2738 const char *aptr;
f337b084 2739
a6ec74c1 2740 fromstr = NEXTFROM;
e62f0680 2741 aptr = SvPV_const(fromstr, fromlen);
f337b084 2742 if (DO_UTF8(fromstr)) {
f7fe979e 2743 const char *end, *s;
f337b084
TH
2744
2745 if (!utf8 && !SvUTF8(cat)) {
2746 marked_upgrade(aTHX_ cat, symptr);
2747 lookahead.flags |= FLAG_DO_UTF8;
2748 lookahead.strbeg = symptr->strbeg;
2749 utf8 = 1;
2750 start = SvPVX(cat);
2751 cur = start + SvCUR(cat);
2752 }
fc241834 2753 if (howlen == e_star) {
f337b084
TH
2754 if (utf8) goto string_copy;
2755 len = fromlen+1;
2756 }
2757 s = aptr;
2758 end = aptr + fromlen;
2759 fromlen = datumtype == 'Z' ? len-1 : len;
2760 while ((I32) fromlen > 0 && s < end) {
2761 s += UTF8SKIP(s);
2762 fromlen--;
2763 }
2764 if (s > end)
2765 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2766 if (utf8) {
fc241834 2767 len = fromlen;
f337b084
TH
2768 if (datumtype == 'Z') len++;
2769 fromlen = s-aptr;
2770 len += fromlen;
fc241834 2771
f337b084 2772 goto string_copy;
fc241834 2773 }
f337b084
TH
2774 fromlen = len - fromlen;
2775 if (datumtype == 'Z') fromlen--;
2776 if (howlen == e_star) {
2777 len = fromlen;
2778 if (datumtype == 'Z') len++;
fc241834 2779 }
f337b084 2780 GROWING(0, cat, start, cur, len);
fc241834 2781 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2782 datumtype | TYPE_IS_PACK))
5f5991a0 2783 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
f337b084 2784 cur += fromlen;
a6ec74c1 2785 len -= fromlen;
f337b084
TH
2786 } else if (utf8) {
2787 if (howlen == e_star) {
2788 len = fromlen;
2789 if (datumtype == 'Z') len++;
a6ec74c1 2790 }
f337b084
TH
2791 if (len <= (I32) fromlen) {
2792 fromlen = len;
2793 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2794 }
fc241834 2795 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2796 upgrade, so:
2797 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2798 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2799 len -= fromlen;
2800 while (fromlen > 0) {
230e1fce 2801 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2802 aptr++;
2803 fromlen--;
fc241834 2804 }
f337b084
TH
2805 } else {
2806 string_copy:
2807 if (howlen == e_star) {
2808 len = fromlen;
2809 if (datumtype == 'Z') len++;
2810 }
2811 if (len <= (I32) fromlen) {
2812 fromlen = len;
2813 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2814 }
f337b084
TH
2815 GROWING(0, cat, start, cur, len);
2816 Copy(aptr, cur, fromlen, char);
2817 cur += fromlen;
2818 len -= fromlen;
a6ec74c1 2819 }
f337b084
TH
2820 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2821 cur += len;
3c4fb04a 2822 SvTAINT(cat);
a6ec74c1 2823 break;
f337b084 2824 }
a6ec74c1 2825 case 'B':
f337b084 2826 case 'b': {
b83604b4 2827 const char *str, *end;
f337b084
TH
2828 I32 l, field_len;
2829 U8 bits;
2830 bool utf8_source;
2831 U32 utf8_flags;
a6ec74c1 2832
fc241834 2833 fromstr = NEXTFROM;
b83604b4 2834 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2835 end = str + fromlen;
2836 if (DO_UTF8(fromstr)) {
2837 utf8_source = TRUE;
041457d9 2838 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2839 } else {
2840 utf8_source = FALSE;
2841 utf8_flags = 0; /* Unused, but keep compilers happy */
2842 }
2843 if (howlen == e_star) len = fromlen;
2844 field_len = (len+7)/8;
2845 GROWING(utf8, cat, start, cur, field_len);
2846 if (len > (I32)fromlen) len = fromlen;
2847 bits = 0;
2848 l = 0;
2849 if (datumtype == 'B')
2850 while (l++ < len) {
2851 if (utf8_source) {
95b63a38 2852 UV val = 0;
f337b084
TH
2853 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2854 bits |= val & 1;
2855 } else bits |= *str++ & 1;
2856 if (l & 7) bits <<= 1;
fc241834 2857 else {
f337b084
TH
2858 PUSH_BYTE(utf8, cur, bits);
2859 bits = 0;
a6ec74c1
JH
2860 }
2861 }
f337b084
TH
2862 else
2863 /* datumtype == 'b' */
2864 while (l++ < len) {
2865 if (utf8_source) {
95b63a38 2866 UV val = 0;
f337b084
TH
2867 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2868 if (val & 1) bits |= 0x80;
2869 } else if (*str++ & 1)
2870 bits |= 0x80;
2871 if (l & 7) bits >>= 1;
fc241834 2872 else {
f337b084
TH
2873 PUSH_BYTE(utf8, cur, bits);
2874 bits = 0;
a6ec74c1
JH
2875 }
2876 }
f337b084
TH
2877 l--;
2878 if (l & 7) {
fc241834 2879 if (datumtype == 'B')
f337b084 2880 bits <<= 7 - (l & 7);
fc241834 2881 else
f337b084
TH
2882 bits >>= 7 - (l & 7);
2883 PUSH_BYTE(utf8, cur, bits);
2884 l += 7;
a6ec74c1 2885 }
f337b084
TH
2886 /* Determine how many chars are left in the requested field */
2887 l /= 8;
2888 if (howlen == e_star) field_len = 0;
2889 else field_len -= l;
2890 Zero(cur, field_len, char);
2891 cur += field_len;
a6ec74c1 2892 break;
f337b084 2893 }
a6ec74c1 2894 case 'H':
f337b084 2895 case 'h': {
10516c54 2896 const char *str, *end;
f337b084
TH
2897 I32 l, field_len;
2898 U8 bits;
2899 bool utf8_source;
2900 U32 utf8_flags;
a6ec74c1 2901
fc241834 2902 fromstr = NEXTFROM;
10516c54 2903 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2904 end = str + fromlen;
2905 if (DO_UTF8(fromstr)) {
2906 utf8_source = TRUE;
041457d9 2907 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2908 } else {
2909 utf8_source = FALSE;
2910 utf8_flags = 0; /* Unused, but keep compilers happy */
2911 }
2912 if (howlen == e_star) len = fromlen;
2913 field_len = (len+1)/2;
2914 GROWING(utf8, cat, start, cur, field_len);
2915 if (!utf8 && len > (I32)fromlen) len = fromlen;
2916 bits = 0;
2917 l = 0;
2918 if (datumtype == 'H')
2919 while (l++ < len) {
2920 if (utf8_source) {
95b63a38 2921 UV val = 0;
f337b084
TH
2922 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2923 if (val < 256 && isALPHA(val))
2924 bits |= (val + 9) & 0xf;
a6ec74c1 2925 else
f337b084
TH
2926 bits |= val & 0xf;
2927 } else if (isALPHA(*str))
2928 bits |= (*str++ + 9) & 0xf;
2929 else
2930 bits |= *str++ & 0xf;
2931 if (l & 1) bits <<= 4;
fc241834 2932 else {
f337b084
TH
2933 PUSH_BYTE(utf8, cur, bits);
2934 bits = 0;
a6ec74c1
JH
2935 }
2936 }
f337b084
TH
2937 else
2938 while (l++ < len) {
2939 if (utf8_source) {
95b63a38 2940 UV val = 0;
f337b084
TH
2941 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2942 if (val < 256 && isALPHA(val))
2943 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2944 else
f337b084
TH
2945 bits |= (val & 0xf) << 4;
2946 } else if (isALPHA(*str))
2947 bits |= ((*str++ + 9) & 0xf) << 4;
2948 else
2949 bits |= (*str++ & 0xf) << 4;
2950 if (l & 1) bits >>= 4;
fc241834 2951 else {
f337b084
TH
2952 PUSH_BYTE(utf8, cur, bits);
2953 bits = 0;
a6ec74c1 2954 }
fc241834 2955 }
f337b084
TH
2956 l--;
2957 if (l & 1) {
2958 PUSH_BYTE(utf8, cur, bits);
2959 l++;
2960 }
2961 /* Determine how many chars are left in the requested field */
2962 l /= 2;
2963 if (howlen == e_star) field_len = 0;
2964 else field_len -= l;
2965 Zero(cur, field_len, char);
2966 cur += field_len;
2967 break;
fc241834
RGS
2968 }
2969 case 'c':
f337b084
TH
2970 while (len-- > 0) {
2971 IV aiv;
2972 fromstr = NEXTFROM;
2973 aiv = SvIV(fromstr);
a2a5de95
NC
2974 if ((-128 > aiv || aiv > 127))
2975 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2976 "Character in 'c' format wrapped in pack");
585ec06d 2977 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2978 }
2979 break;
2980 case 'C':
f337b084
TH
2981 if (len == 0) {
2982 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2983 break;
2984 }
a6ec74c1 2985 while (len-- > 0) {
f337b084 2986 IV aiv;
a6ec74c1 2987 fromstr = NEXTFROM;
f337b084 2988 aiv = SvIV(fromstr);
a2a5de95
NC
2989 if ((0 > aiv || aiv > 0xff))
2990 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2991 "Character in 'C' format wrapped in pack");
1651fc44 2992 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2993 }
fc241834
RGS
2994 break;
2995 case 'W': {
2996 char *end;
670f1322 2997 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2998
2999 end = start+SvLEN(cat)-1;
3000 if (utf8) end -= UTF8_MAXLEN-1;
3001 while (len-- > 0) {
3002 UV auv;
3003 fromstr = NEXTFROM;
3004 auv = SvUV(fromstr);
3005 if (in_bytes) auv = auv % 0x100;
3006 if (utf8) {
3007 W_utf8:
3008 if (cur > end) {
3009 *cur = '\0';
b162af07 3010 SvCUR_set(cat, cur - start);
fc241834
RGS
3011
3012 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3013 end = start+SvLEN(cat)-UTF8_MAXLEN;
3014 }
230e1fce
NC
3015 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3016 NATIVE_TO_UNI(auv),
041457d9 3017 warn_utf8 ?
230e1fce 3018 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3019 } else {
3020 if (auv >= 0x100) {
3021 if (!SvUTF8(cat)) {
3022 *cur = '\0';
b162af07 3023 SvCUR_set(cat, cur - start);
fc241834
RGS
3024 marked_upgrade(aTHX_ cat, symptr);
3025 lookahead.flags |= FLAG_DO_UTF8;
3026 lookahead.strbeg = symptr->strbeg;
3027 utf8 = 1;
3028 start = SvPVX(cat);
3029 cur = start + SvCUR(cat);
3030 end = start+SvLEN(cat)-UTF8_MAXLEN;
3031 goto W_utf8;
3032 }
a2a5de95
NC
3033 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3034 "Character in 'W' format wrapped in pack");
fc241834
RGS
3035 auv &= 0xff;
3036 }
3037 if (cur >= end) {
3038 *cur = '\0';
b162af07 3039 SvCUR_set(cat, cur - start);
fc241834
RGS
3040 GROWING(0, cat, start, cur, len+1);
3041 end = start+SvLEN(cat)-1;
3042 }
fe2774ed 3043 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3044 }
3045 }
3046 break;
fc241834
RGS
3047 }
3048 case 'U': {
3049 char *end;
3050
3051 if (len == 0) {
3052 if (!(symptr->flags & FLAG_DO_UTF8)) {
3053 marked_upgrade(aTHX_ cat, symptr);
3054 lookahead.flags |= FLAG_DO_UTF8;
3055 lookahead.strbeg = symptr->strbeg;
3056 }
3057 utf8 = 0;
3058 goto no_change;
3059 }
3060
3061 end = start+SvLEN(cat);
3062 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3063 while (len-- > 0) {
fc241834 3064 UV auv;
a6ec74c1 3065 fromstr = NEXTFROM;
fc241834
RGS
3066 auv = SvUV(fromstr);
3067 if (utf8) {
230e1fce 3068 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3069 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3070 warn_utf8 ?
fc241834
RGS
3071 0 : UNICODE_ALLOW_ANY);
3072 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3073 *cur = '\0';
b162af07 3074 SvCUR_set(cat, cur - start);
fc241834
RGS
3075 GROWING(0, cat, start, cur,
3076 len+(endb-buffer)*UTF8_EXPAND);
3077 end = start+SvLEN(cat);
3078 }
64844641 3079 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3080 } else {
3081 if (cur >= end) {
3082 *cur = '\0';
b162af07 3083 SvCUR_set(cat, cur - start);
fc241834
RGS
3084 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3085 end = start+SvLEN(cat)-UTF8_MAXLEN;
3086 }
230e1fce 3087 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3088 warn_utf8 ?
230e1fce 3089 0 : UNICODE_ALLOW_ANY);
fc241834 3090 }
a6ec74c1 3091 }
a6ec74c1 3092 break;
fc241834 3093 }
a6ec74c1
JH
3094 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3095 case 'f':
a6ec74c1 3096 while (len-- > 0) {
f337b084
TH
3097 float afloat;
3098 NV anv;
a6ec74c1 3099 fromstr = NEXTFROM;
f337b084 3100 anv = SvNV(fromstr);
5cdb9e01 3101#ifdef __VOS__
f337b084 3102 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3103 during conversion from double to float into infinity, so we
3104 do it by hand. This code should either be generalized for
3105 any OS that needs it, or removed if and when VOS implements
3106 posix-976 (suggestion to support mapping to infinity).
3107 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3108{
3109extern const float _float_constants[];
f337b084 3110 if (anv > FLT_MAX)
fc241834 3111 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3112 else if (anv < -FLT_MAX)
fc241834 3113 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3114 else afloat = (float) anv;
3722f0dc 3115}
f337b084 3116#else /* __VOS__ */
baf3cf9c 3117# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3118 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3119 * on Alpha; fake it if we don't have them.
3120 */
f337b084 3121 if (anv > FLT_MAX)
fc241834 3122 afloat = FLT_MAX;
f337b084 3123 else if (anv < -FLT_MAX)
fc241834 3124 afloat = -FLT_MAX;
f337b084 3125 else afloat = (float)anv;
baf3cf9c 3126# else
f337b084 3127 afloat = (float)anv;
baf3cf9c 3128# endif
f337b084 3129#endif /* __VOS__ */
1109a392 3130 DO_BO_PACK_N(afloat, float);
f337b084 3131 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3132 }
3133 break;
3134 case 'd':
a6ec74c1 3135 while (len-- > 0) {
f337b084
TH
3136 double adouble;
3137 NV anv;
a6ec74c1 3138 fromstr = NEXTFROM;
f337b084 3139 anv = SvNV(fromstr);
5cdb9e01 3140#ifdef __VOS__
f337b084 3141 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3142 during conversion from long double to double into infinity,
3143 so we do it by hand. This code should either be generalized
3144 for any OS that needs it, or removed if and when VOS
3145 implements posix-976 (suggestion to support mapping to
3146 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3147{
3148extern const double _double_constants[];
f337b084 3149 if (anv > DBL_MAX)
fc241834 3150 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3151 else if (anv < -DBL_MAX)
fc241834 3152 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3153 else adouble = (double) anv;
3722f0dc 3154}
f337b084 3155#else /* __VOS__ */
baf3cf9c 3156# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3157 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3158 * on Alpha; fake it if we don't have them.
3159 */
f337b084 3160 if (anv > DBL_MAX)
fc241834 3161 adouble = DBL_MAX;
f337b084 3162 else if (anv < -DBL_MAX)
fc241834 3163 adouble = -DBL_MAX;
f337b084 3164 else adouble = (double)anv;
baf3cf9c 3165# else
f337b084 3166 adouble = (double)anv;
baf3cf9c 3167# endif
f337b084 3168#endif /* __VOS__ */
1109a392 3169 DO_BO_PACK_N(adouble, double);
f337b084 3170 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3171 }
3172 break;
fc241834 3173 case 'F': {
275663fa 3174 NV_bytes anv;
1109a392 3175 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3176 while (len-- > 0) {
3177 fromstr = NEXTFROM;
275663fa 3178 anv.nv = SvNV(fromstr);
1109a392 3179 DO_BO_PACK_N(anv, NV);
275663fa 3180 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
3181 }
3182 break;
fc241834 3183 }
92d41999 3184#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 3185 case 'D': {
275663fa 3186 ld_bytes aldouble;
1109a392
MHM
3187 /* long doubles can have unused bits, which may be nonzero */
3188 Zero(&aldouble, 1, long double);
92d41999
JH
3189 while (len-- > 0) {
3190 fromstr = NEXTFROM;
275663fa 3191 aldouble.ld = (long double)SvNV(fromstr);
1109a392 3192 DO_BO_PACK_N(aldouble, long double);
275663fa 3193 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
3194 }
3195 break;
fc241834 3196 }
92d41999 3197#endif
7212898e 3198#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3199 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3200#endif
a6ec74c1
JH
3201 case 'n':
3202 while (len-- > 0) {
f337b084 3203 I16 ai16;
a6ec74c1 3204 fromstr = NEXTFROM;
ef108786 3205 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3206#ifdef HAS_HTONS
ef108786 3207 ai16 = PerlSock_htons(ai16);
a6ec74c1 3208#endif
f337b084 3209 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3210 }
3211 break;
7212898e 3212#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3213 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3214#endif
a6ec74c1
JH
3215 case 'v':
3216 while (len-- > 0) {
f337b084 3217 I16 ai16;
a6ec74c1 3218 fromstr = NEXTFROM;
ef108786 3219 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3220#ifdef HAS_HTOVS
ef108786 3221 ai16 = htovs(ai16);
a6ec74c1 3222#endif
f337b084 3223 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3224 }
3225 break;
49704364 3226 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3227#if SHORTSIZE != SIZE16
fc241834 3228 while (len-- > 0) {
f337b084 3229 unsigned short aushort;
fc241834
RGS
3230 fromstr = NEXTFROM;
3231 aushort = SvUV(fromstr);
3232 DO_BO_PACK(aushort, s);
f337b084 3233 PUSH_VAR(utf8, cur, aushort);
fc241834 3234 }
49704364
WL
3235 break;
3236#else
3237 /* Fall through! */
a6ec74c1 3238#endif
49704364 3239 case 'S':
fc241834 3240 while (len-- > 0) {
f337b084 3241 U16 au16;
fc241834
RGS
3242 fromstr = NEXTFROM;
3243 au16 = (U16)SvUV(fromstr);
3244 DO_BO_PACK(au16, 16);
f337b084 3245 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3246 }
3247 break;
49704364 3248 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3249#if SHORTSIZE != SIZE16
fc241834 3250 while (len-- > 0) {
f337b084 3251 short ashort;
fc241834
RGS
3252 fromstr = NEXTFROM;
3253 ashort = SvIV(fromstr);
3254 DO_BO_PACK(ashort, s);
f337b084 3255 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3256 }
49704364
WL
3257 break;
3258#else
3259 /* Fall through! */
a6ec74c1 3260#endif
49704364
WL
3261 case 's':
3262 while (len-- > 0) {
f337b084 3263 I16 ai16;
49704364 3264 fromstr = NEXTFROM;
ef108786
MHM
3265 ai16 = (I16)SvIV(fromstr);
3266 DO_BO_PACK(ai16, 16);
f337b084 3267 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3268 }
3269 break;
3270 case 'I':
49704364 3271 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3272 while (len-- > 0) {
f337b084 3273 unsigned int auint;
a6ec74c1
JH
3274 fromstr = NEXTFROM;
3275 auint = SvUV(fromstr);
1109a392 3276 DO_BO_PACK(auint, i);
f337b084 3277 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3278 }
3279 break;
92d41999
JH
3280 case 'j':
3281 while (len-- > 0) {
f337b084 3282 IV aiv;
92d41999
JH
3283 fromstr = NEXTFROM;
3284 aiv = SvIV(fromstr);
1109a392
MHM
3285#if IVSIZE == INTSIZE
3286 DO_BO_PACK(aiv, i);
3287#elif IVSIZE == LONGSIZE
3288 DO_BO_PACK(aiv, l);
3289#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3290 DO_BO_PACK(aiv, 64);
f337b084
TH
3291#else
3292 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3293#endif
f337b084 3294 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3295 }
3296 break;
3297 case 'J':
3298 while (len-- > 0) {
f337b084 3299 UV auv;
92d41999
JH
3300 fromstr = NEXTFROM;
3301 auv = SvUV(fromstr);
1109a392
MHM
3302#if UVSIZE == INTSIZE
3303 DO_BO_PACK(auv, i);
3304#elif UVSIZE == LONGSIZE
3305 DO_BO_PACK(auv, l);
3306#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3307 DO_BO_PACK(auv, 64);
f337b084
TH
3308#else
3309 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3310#endif
f337b084 3311 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3312 }
3313 break;
a6ec74c1
JH
3314 case 'w':
3315 while (len-- > 0) {
f337b084 3316 NV anv;
a6ec74c1 3317 fromstr = NEXTFROM;
15e9f109 3318 anv = SvNV(fromstr);
a6ec74c1 3319
f337b084
TH
3320 if (anv < 0) {
3321 *cur = '\0';
b162af07 3322 SvCUR_set(cat, cur - start);
49704364 3323 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3324 }
a6ec74c1 3325
196b62db
NC
3326 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3327 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3328 any negative IVs will have already been got by the croak()
3329 above. IOK is untrue for fractions, so we test them
3330 against UV_MAX_P1. */
f337b084
TH
3331 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3332 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3333 char *in = buf + sizeof(buf);
196b62db 3334 UV auv = SvUV(fromstr);
a6ec74c1
JH
3335
3336 do {
eb160463 3337 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3338 auv >>= 7;
3339 } while (auv);
3340 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3341 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3342 in, (buf + sizeof(buf)) - in);
3343 } else if (SvPOKp(fromstr))
3344 goto w_string;
a6ec74c1 3345 else if (SvNOKp(fromstr)) {
0258719b
NC
3346 /* 10**NV_MAX_10_EXP is the largest power of 10
3347 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3348 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3349 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3350 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3351 Some C compilers are strict about integral constant
3352 expressions so we conservatively divide by a slightly
3353 smaller integer instead of multiplying by the exact
3354 floating-point value.
0258719b
NC
3355 */
3356#ifdef NV_MAX_10_EXP
f337b084 3357 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3358 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3359#else
f337b084 3360 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3361 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3362#endif
a6ec74c1
JH
3363 char *in = buf + sizeof(buf);
3364
8b6e33c7 3365 anv = Perl_floor(anv);
a6ec74c1 3366 do {
8b6e33c7 3367 const NV next = Perl_floor(anv / 128);
a6ec74c1 3368 if (in <= buf) /* this cannot happen ;-) */
49704364 3369 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3370 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3371 anv = next;
3372 } while (anv > 0);
a6ec74c1 3373 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3374 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3375 in, (buf + sizeof(buf)) - in);
3376 } else {
8b6e33c7
AL
3377 const char *from;
3378 char *result, *in;
735b914b
JH
3379 SV *norm;
3380 STRLEN len;
3381 bool done;
3382
f337b084 3383 w_string:
735b914b 3384 /* Copy string and check for compliance */
349d4f2f 3385 from = SvPV_const(fromstr, len);
735b914b 3386 if ((norm = is_an_int(from, len)) == NULL)
49704364 3387 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3388
a02a5408 3389 Newx(result, len, char);
735b914b
JH
3390 in = result + len;
3391 done = FALSE;
f337b084 3392 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3393 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3394 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3395 in, (result + len) - in);
735b914b
JH
3396 Safefree(result);
3397 SvREFCNT_dec(norm); /* free norm */
fc241834 3398 }
a6ec74c1
JH
3399 }
3400 break;
3401 case 'i':
49704364 3402 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3403 while (len-- > 0) {
f337b084 3404 int aint;
a6ec74c1
JH
3405 fromstr = NEXTFROM;
3406 aint = SvIV(fromstr);
1109a392 3407 DO_BO_PACK(aint, i);
f337b084 3408 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3409 }
3410 break;
7212898e 3411#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3412 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3413#endif
a6ec74c1
JH
3414 case 'N':
3415 while (len-- > 0) {
f337b084 3416 U32 au32;
a6ec74c1 3417 fromstr = NEXTFROM;
ef108786 3418 au32 = SvUV(fromstr);
a6ec74c1 3419#ifdef HAS_HTONL
ef108786 3420 au32 = PerlSock_htonl(au32);
a6ec74c1 3421#endif
f337b084 3422 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3423 }
3424 break;
7212898e 3425#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3426 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3427#endif
a6ec74c1
JH
3428 case 'V':
3429 while (len-- > 0) {
f337b084 3430 U32 au32;
a6ec74c1 3431 fromstr = NEXTFROM;
ef108786 3432 au32 = SvUV(fromstr);
a6ec74c1 3433#ifdef HAS_HTOVL
ef108786 3434 au32 = htovl(au32);
a6ec74c1 3435#endif
f337b084 3436 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3437 }
3438 break;
49704364 3439 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3440#if LONGSIZE != SIZE32
fc241834 3441 while (len-- > 0) {
f337b084 3442 unsigned long aulong;
fc241834
RGS
3443 fromstr = NEXTFROM;
3444 aulong = SvUV(fromstr);
3445 DO_BO_PACK(aulong, l);
f337b084 3446 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3447 }
49704364
WL
3448 break;
3449#else
3450 /* Fall though! */
a6ec74c1 3451#endif
49704364 3452 case 'L':
fc241834 3453 while (len-- > 0) {
f337b084 3454 U32 au32;
fc241834
RGS
3455 fromstr = NEXTFROM;
3456 au32 = SvUV(fromstr);
3457 DO_BO_PACK(au32, 32);
f337b084 3458 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3459 }
3460 break;
49704364 3461 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3462#if LONGSIZE != SIZE32
fc241834 3463 while (len-- > 0) {
f337b084 3464 long along;
fc241834
RGS
3465 fromstr = NEXTFROM;
3466 along = SvIV(fromstr);
3467 DO_BO_PACK(along, l);
f337b084 3468 PUSH_VAR(utf8, cur, along);
a6ec74c1 3469 }
49704364
WL
3470 break;
3471#else
3472 /* Fall though! */
a6ec74c1 3473#endif
49704364
WL
3474 case 'l':
3475 while (len-- > 0) {
f337b084 3476 I32 ai32;
49704364 3477 fromstr = NEXTFROM;
ef108786
MHM
3478 ai32 = SvIV(fromstr);
3479 DO_BO_PACK(ai32, 32);
f337b084 3480 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3481 }
3482 break;
3483#ifdef HAS_QUAD
3484 case 'Q':
3485 while (len-- > 0) {
f337b084 3486 Uquad_t auquad;
a6ec74c1 3487 fromstr = NEXTFROM;
f337b084 3488 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3489 DO_BO_PACK(auquad, 64);
f337b084 3490 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3491 }
3492 break;
3493 case 'q':
3494 while (len-- > 0) {
f337b084 3495 Quad_t aquad;
a6ec74c1
JH
3496 fromstr = NEXTFROM;
3497 aquad = (Quad_t)SvIV(fromstr);
1109a392 3498 DO_BO_PACK(aquad, 64);
f337b084 3499 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3500 }
3501 break;
f337b084 3502#endif /* HAS_QUAD */
a6ec74c1
JH
3503 case 'P':
3504 len = 1; /* assume SV is correct length */
f337b084 3505 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3506 /* Fall through! */
a6ec74c1
JH
3507 case 'p':
3508 while (len-- > 0) {
83003860 3509 const char *aptr;
f337b084 3510
a6ec74c1 3511 fromstr = NEXTFROM;
28a4f200
TH
3512 SvGETMAGIC(fromstr);
3513 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3514 else {
a6ec74c1
JH
3515 /* XXX better yet, could spirit away the string to
3516 * a safe spot and hang on to it until the result
3517 * of pack() (and all copies of the result) are
3518 * gone.
3519 */
041457d9 3520 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3521 !SvREADONLY(fromstr)))) {
3522 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3523 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3524 }
3525 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3526 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3527 else
2596d9fe 3528 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3529 }
07409e01 3530 DO_BO_PACK_PC(aptr);
f337b084 3531 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3532 }
3533 break;
fc241834 3534 case 'u': {
f7fe979e 3535 const char *aptr, *aend;
fc241834 3536 bool from_utf8;
f337b084 3537
a6ec74c1 3538 fromstr = NEXTFROM;
fc241834
RGS
3539 if (len <= 2) len = 45;
3540 else len = len / 3 * 3;
3541 if (len >= 64) {
a2a5de95
NC
3542 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3543 "Field too wide in 'u' format in pack");
fc241834
RGS
3544 len = 63;
3545 }
83003860 3546 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3547 from_utf8 = DO_UTF8(fromstr);
3548 if (from_utf8) {
3549 aend = aptr + fromlen;
3550 fromlen = sv_len_utf8(fromstr);
3551 } else aend = NULL; /* Unused, but keep compilers happy */
3552 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3553 while (fromlen > 0) {
fc241834 3554 U8 *end;
a6ec74c1 3555 I32 todo;
fc241834 3556 U8 hunk[1+63/3*4+1];
a6ec74c1 3557
eb160463 3558 if ((I32)fromlen > len)
a6ec74c1
JH
3559 todo = len;
3560 else
3561 todo = fromlen;
fc241834
RGS
3562 if (from_utf8) {
3563 char buffer[64];
3564 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3565 'u' | TYPE_IS_PACK)) {
3566 *cur = '\0';
b162af07 3567 SvCUR_set(cat, cur - start);
5f5991a0 3568 Perl_croak(aTHX_ "panic: string is shorter than advertised");
fc241834
RGS
3569 }
3570 end = doencodes(hunk, buffer, todo);
3571 } else {
3572 end = doencodes(hunk, aptr, todo);
3573 aptr += todo;
3574 }
3575 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3576 fromlen -= todo;
3577 }
a6ec74c1
JH
3578 break;
3579 }
f337b084
TH
3580 }
3581 *cur = '\0';
b162af07 3582 SvCUR_set(cat, cur - start);
f337b084 3583 no_change:
49704364 3584 *symptr = lookahead;
a6ec74c1 3585 }
49704364 3586 return beglist;
18529408
IZ
3587}
3588#undef NEXTFROM
3589
3590
3591PP(pp_pack)
3592{
97aff369 3593 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3594 register SV *cat = TARG;
3595 STRLEN fromlen;
349d4f2f
NC
3596 SV *pat_sv = *++MARK;
3597 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3598 register const char *patend = pat + fromlen;
18529408
IZ
3599
3600 MARK++;
76f68e9b 3601 sv_setpvs(cat, "");
f337b084 3602 SvUTF8_off(cat);
18529408 3603
7accc089 3604 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3605
a6ec74c1
JH
3606 SvSETMAGIC(cat);
3607 SP = ORIGMARK;
3608 PUSHs(cat);
3609 RETURN;
3610}
a6ec74c1 3611
73cb7263
NC
3612/*
3613 * Local variables:
3614 * c-indentation-style: bsd
3615 * c-basic-offset: 4
3616 * indent-tabs-mode: t
3617 * End:
3618 *
37442d52
RGS
3619 * ex: set ts=8 sts=4 sw=4 noet:
3620 */