This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove perlsh, which is neither installed nor referenced anywhere.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
WL
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
78d46eaa
NC
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
78d46eaa
NC
400#if 'J'-'I' == 1
401/* ASCII */
f43a9a31 402STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
1651fc44 409 /* C */ sizeof(unsigned char),
80a13697 410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 411 /* D */ LONG_DOUBLESIZE,
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* Q */ sizeof(Uquad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 450#if defined(HAS_QUAD)
a7a3cfaa 451 /* q */ sizeof(Quad_t),
80a13697 452#else
a7a3cfaa 453 0,
80a13697 454#endif
a7a3cfaa
TH
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
7212898e 479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 480 /* N */ SIZE32,
7212898e 481#else
a7a3cfaa 482 0,
7212898e 483#endif
a7a3cfaa
TH
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
7212898e 487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 488 /* V */ SIZE32,
7212898e 489#else
a7a3cfaa 490 0,
7212898e 491#endif
a7a3cfaa
TH
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
7212898e 498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 499 /* n */ SIZE16,
7212898e 500#else
a7a3cfaa 501 0,
7212898e 502#endif
a7a3cfaa
TH
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
7212898e 506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 507 /* v */ SIZE16,
7212898e 508#else
a7a3cfaa 509 0,
7212898e 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
520};
521#else
522/* EBCDIC (or bust) */
f43a9a31 523STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 548#if defined(HAS_QUAD)
a7a3cfaa 549 /* q */ sizeof(Quad_t),
80a13697 550#else
a7a3cfaa 551 0,
80a13697 552#endif
a7a3cfaa
TH
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 560 /* C */ sizeof(unsigned char),
80a13697 561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 562 /* D */ LONG_DOUBLESIZE,
80a13697 563#else
a7a3cfaa 564 0,
80a13697 565#endif
a7a3cfaa
TH
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
80a13697 577#if defined(HAS_QUAD)
a7a3cfaa 578 /* Q */ sizeof(Uquad_t),
80a13697 579#else
a7a3cfaa 580 0,
80a13697 581#endif
a7a3cfaa
TH
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
7212898e 604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 605 /* n */ SIZE16,
7212898e 606#else
a7a3cfaa 607 0,
7212898e 608#endif
a7a3cfaa
TH
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
7212898e 612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 613 /* v */ SIZE16,
7212898e 614#else
a7a3cfaa 615 0,
7212898e 616#endif
a7a3cfaa
TH
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
7212898e 624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 625 /* N */ SIZE32,
7212898e 626#else
a7a3cfaa 627 0,
7212898e 628#endif
a7a3cfaa
TH
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
7212898e 632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 633 /* V */ SIZE32,
7212898e 634#else
a7a3cfaa 635 0,
7212898e 636#endif
a7a3cfaa
TH
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
639};
640#endif
641
08ca2aa3 642STATIC U8
f7fe979e 643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 644{
08ca2aa3 645 STRLEN retlen;
0bcc34c2 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 648 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 654 if (val >= 0x100) {
a2a5de95
NC
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
658 val &= 0xff;
659 }
660 *s += retlen;
fe2774ed 661 return (U8)val;
08ca2aa3
TH
662}
663
f337b084
TH
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
666 *(U8 *)(s)++)
667
668STATIC bool
f7fe979e 669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
670{
671 UV val;
672 STRLEN retlen;
f7fe979e 673 const char *from = *s;
08ca2aa3 674 int bad = 0;
f7fe979e 675 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
f337b084 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
fe2774ed 688 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
f7fe979e 694 const char *ptr;
9e27e96a 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
f337b084 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
699 }
700 if (from > end) from = end;
701 }
a2a5de95
NC
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 704 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
f7fe979e 714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 715{
97aff369 716 dVAR;
08ca2aa3 717 STRLEN retlen;
0bcc34c2 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
f337b084 725 *s += retlen;
08ca2aa3
TH
726 return TRUE;
727}
78d46eaa 728
64844641 729STATIC char *
14333449 730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 731 const U8 * const end = start + len;
64844641 732
7918f24d
NC
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
f337b084 735 while (start < end) {
48fa4626
TS
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
742 }
743 start++;
744 }
64844641 745 return dest;
f337b084
TH
746}
747
230e1fce
NC
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
64844641
AL
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
f337b084
TH
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
3473cf63 761 if (utf8) glen *= UTF8_EXPAND; \
f337b084 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 763 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
f7fe979e 770 const STRLEN glen = (in_len); \
f337b084 771 STRLEN gl = glen; \
3473cf63 772 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
b162af07 775 SvCUR_set((cat), (cur) - (start)); \
0bd48802 776 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
f7fe979e 785 const U8 au8 = (byte); \
64844641 786 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
f7fe979e
AL
803static const char *_action( const tempsym_t* symptr )
804{
10edeb5d 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
806}
807
206947d2 808/* Returns the sizeof() struct described by pat */
028d1f6d 809STATIC I32
f337b084 810S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 811{
f337b084 812 I32 total = 0;
206947d2 813
7918f24d
NC
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
49704364 816 while (next_symbol(symptr)) {
f337b084 817 I32 len;
f7fe979e 818 int size;
f337b084
TH
819
820 switch (symptr->howlen) {
fc241834 821 case e_star:
49704364 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 823 _action( symptr ) );
49704364 824 break;
f337b084
TH
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
49704364
WL
829 }
830
a7a3cfaa 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 832 if (!size) {
f7fe979e 833 int star;
80a13697
NC
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 839 _action( symptr ) );
28be1210
TH
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
80a13697 844 case '@':
28be1210 845 case '.':
80a13697
NC
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 851 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 852 _action( symptr ) );
80a13697
NC
853 case '%':
854 size = 0;
855 break;
856 case '(':
fc241834
RGS
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
80a13697
NC
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
f7fe979e 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
80a13697
NC
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
78d46eaa 906
80a13697
NC
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
78d46eaa
NC
910 break;
911 }
206947d2
IZ
912 }
913 total += len * size;
914 }
915 return total;
916}
917
49704364
WL
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
f7fe979e 922STATIC const char *
5aaab254 923S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 924{
7918f24d
NC
925 PERL_ARGS_ASSERT_GROUP_END;
926
49704364 927 while (patptr < patend) {
f7fe979e 928 const char c = *patptr++;
49704364
WL
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
18529408 942 }
49704364
WL
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
18529408
IZ
946}
947
49704364
WL
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
fc241834 952 */
f7fe979e 953STATIC const char *
5aaab254 954S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
955{
956 I32 len = *patptr++ - '0';
7918f24d
NC
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
49704364
WL
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
f337b084 973S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 974{
f7fe979e 975 const char* patptr = symptr->patptr;
0bcc34c2 976 const char* const patend = symptr->patend;
49704364 977
7918f24d
NC
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
49704364
WL
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
fc241834 992 /* We should have found a template code */
49704364 993 I32 code = *patptr++ & 0xFF;
66c611c5 994 U32 inherited_modifiers = 0;
49704364
WL
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 1000 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
1001 }
1002 continue;
1003 }
fc241834 1004
49704364 1005 /* for '(', skip to ')' */
fc241834 1006 if (code == '(') {
49704364
WL
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 1009 _action( symptr ) );
49704364
WL
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 1014 _action( symptr ) );
49704364
WL
1015 }
1016
66c611c5
MHM
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1109a392
MHM
1023 /* look for modifiers */
1024 while (patptr < patend) {
b7787f18
AL
1025 const char *allowed;
1026 I32 modifier;
1109a392
MHM
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
7212898e 1030 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1031 break;
7212898e 1032#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1040 break;
f337b084 1041#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1042 default:
b7787f18
AL
1043 allowed = "";
1044 modifier = 0;
1109a392
MHM
1045 break;
1046 }
66c611c5 1047
1109a392
MHM
1048 if (modifier == 0)
1049 break;
66c611c5 1050
1109a392
MHM
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1053 allowed, _action( symptr ) );
66c611c5
MHM
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1061 *patptr, _action( symptr ) );
66c611c5 1062
a2a5de95
NC
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1109a392 1068 }
66c611c5 1069
1109a392
MHM
1070 code |= modifier;
1071 patptr++;
49704364
WL
1072 }
1073
66c611c5
MHM
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
fc241834 1077 /* look for count and/or / */
49704364
WL
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
f7fe979e 1088 const char* lenptr = ++patptr;
49704364
WL
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1096 _action( symptr ) );
49704364
WL
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
66c611c5 1120 if (*patptr == '/') {
49704364
WL
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
66c611c5
MHM
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1126 _action( symptr ) );
49704364
WL
1127 }
1128 break;
1129 }
18529408 1130 }
49704364
WL
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
fc241834 1138 symptr->patptr = patptr;
49704364 1139 return TRUE;
18529408 1140 }
49704364 1141 }
fc241834 1142 symptr->patptr = patptr;
49704364 1143 return FALSE;
18529408
IZ
1144}
1145
18529408 1146/*
fc241834 1147 There is no way to cleanly handle the case where we should process the
08ca2aa3 1148 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
fc241834 1155STATIC bool
08ca2aa3
TH
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
7918f24d
NC
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
08ca2aa3
TH
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
f7fe979e 1165 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
08ca2aa3
TH
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
f7fe979e 1182 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
7accc089
JH
1190=for apidoc unpackstring
1191
21ebfc7a
DM
1192The engine implementing the unpack() Perl function.
1193
1194Using the template pat..patend, this function unpacks the string
1195s..strend into a number of mortal SVs, which it pushes onto the perl
1196argument (@_) stack (so you will need to issue a C<PUTBACK> before and
1197C<SPAGAIN> after the call to this function). It returns the number of
1198pushed elements.
1199
1200The strend and patend pointers should point to the byte following the last
1201character of each string.
1202
1203Although this function returns its values on the perl argument stack, it
1204doesn't take any parameters from that stack (and thus in particular
1205there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
1206example).
7accc089
JH
1207
1208=cut */
1209
1210I32
f7fe979e 1211Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1212{
f7fe979e 1213 tempsym_t sym;
08ca2aa3 1214
7918f24d
NC
1215 PERL_ARGS_ASSERT_UNPACKSTRING;
1216
f337b084 1217 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1218 else if (need_utf8(pat, patend)) {
1219 /* We probably should try to avoid this in case a scalar context call
1220 wouldn't get to the "U0" */
1221 STRLEN len = strend - s;
230e1fce 1222 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1223 SAVEFREEPV(s);
1224 strend = s + len;
f337b084 1225 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1226 }
1227
f337b084
TH
1228 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1229 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1230
f7fe979e 1231 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1232
1233 return unpack_rec(&sym, s, s, strend, NULL );
1234}
1235
4136a0f7 1236STATIC I32
f7fe979e 1237S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1238{
27da23d5 1239 dVAR; dSP;
3297d27d 1240 SV *sv = NULL;
f7fe979e 1241 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1242 howlen_t howlen;
a6ec74c1 1243 I32 checksum = 0;
92d41999 1244 UV cuv = 0;
a6ec74c1 1245 NV cdouble = 0.0;
f337b084 1246 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1247 bool beyond = FALSE;
21c16052 1248 bool explicit_length;
9e27e96a 1249 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1250 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1251
1252 PERL_ARGS_ASSERT_UNPACK_REC;
1253
28be1210 1254 symptr->strbeg = s - strbeg;
49704364 1255
49704364 1256 while (next_symbol(symptr)) {
a7a3cfaa 1257 packprops_t props;
9e27e96a 1258 I32 len;
f337b084 1259 I32 datumtype = symptr->code;
206947d2 1260 /* do first one only unless in list context
08ca2aa3 1261 / is implemented by unpacking the count, then popping it from the
206947d2 1262 stack, so must check that we're not in the middle of a / */
49704364 1263 if ( unpack_only_one
206947d2 1264 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1265 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1266 break;
49704364 1267
f337b084 1268 switch (howlen = symptr->howlen) {
fc241834
RGS
1269 case e_star:
1270 len = strend - strbeg; /* long enough */
49704364 1271 break;
f337b084
TH
1272 default:
1273 /* e_no_len and e_number */
1274 len = symptr->length;
1275 break;
49704364 1276 }
18529408 1277
21c16052 1278 explicit_length = TRUE;
a6ec74c1 1279 redo_switch:
49704364 1280 beyond = s >= strend;
a7a3cfaa
TH
1281
1282 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1283 if (props) {
1284 /* props nonzero means we can process this letter. */
9e27e96a
AL
1285 const long size = props & PACK_SIZE_MASK;
1286 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1287 if (len > howmany)
1288 len = howmany;
1289
1290 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1291 if (len && unpack_only_one) len = 1;
1292 EXTEND(SP, len);
1293 EXTEND_MORTAL(len);
78d46eaa
NC
1294 }
1295 }
a7a3cfaa 1296
1109a392 1297 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1298 default:
1109a392 1299 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1300
a6ec74c1 1301 case '%':
49704364 1302 if (howlen == e_no_len)
18529408 1303 len = 16; /* len is not specified */
a6ec74c1 1304 checksum = len;
92d41999 1305 cuv = 0;
a6ec74c1 1306 cdouble = 0;
18529408 1307 continue;
a6ec74c1 1308 break;
18529408
IZ
1309 case '(':
1310 {
49704364 1311 tempsym_t savsym = *symptr;
9e27e96a 1312 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1313 symptr->flags |= group_modifiers;
49704364 1314 symptr->patend = savsym.grpend;
28be1210 1315 symptr->previous = &savsym;
49704364 1316 symptr->level++;
18529408 1317 PUTBACK;
c6f750d1 1318 if (len && unpack_only_one) len = 1;
18529408 1319 while (len--) {
49704364 1320 symptr->patptr = savsym.grpbeg;
f337b084
TH
1321 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1322 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1323 unpack_rec(symptr, s, strbeg, strend, &s);
1324 if (s == strend && savsym.howlen == e_star)
49704364 1325 break; /* No way to continue */
18529408
IZ
1326 }
1327 SPAGAIN;
28be1210 1328 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1329 *symptr = savsym;
18529408
IZ
1330 break;
1331 }
28be1210
TH
1332#ifdef PERL_PACK_CAN_SHRIEKSIGN
1333 case '.' | TYPE_IS_SHRIEKING:
1334#endif
1335 case '.': {
9e27e96a 1336 const char *from;
28be1210
TH
1337 SV *sv;
1338#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1339 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1340#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1341 const bool u8 = utf8;
28be1210
TH
1342#endif
1343 if (howlen == e_star) from = strbeg;
1344 else if (len <= 0) from = s;
1345 else {
1346 tempsym_t *group = symptr;
1347
1348 while (--len && group) group = group->previous;
1349 from = group ? strbeg + group->strbeg : strbeg;
1350 }
1351 sv = from <= s ?
00646304
CB
1352 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1353 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1354 mXPUSHs(sv);
28be1210
TH
1355 break;
1356 }
1357#ifdef PERL_PACK_CAN_SHRIEKSIGN
1358 case '@' | TYPE_IS_SHRIEKING:
1359#endif
a6ec74c1 1360 case '@':
28be1210
TH
1361 s = strbeg + symptr->strbeg;
1362#ifdef PERL_PACK_CAN_SHRIEKSIGN
1363 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1364#else /* PERL_PACK_CAN_SHRIEKSIGN */
1365 if (utf8)
1366#endif
1367 {
08ca2aa3
TH
1368 while (len > 0) {
1369 if (s >= strend)
1370 Perl_croak(aTHX_ "'@' outside of string in unpack");
1371 s += UTF8SKIP(s);
1372 len--;
1373 }
1374 if (s > strend)
1375 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1376 } else {
28be1210 1377 if (strend-s < len)
fc241834 1378 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1379 s += len;
08ca2aa3 1380 }
a6ec74c1 1381 break;
62f95557
IZ
1382 case 'X' | TYPE_IS_SHRIEKING:
1383 if (!len) /* Avoid division by 0 */
1384 len = 1;
08ca2aa3 1385 if (utf8) {
f7fe979e 1386 const char *hop, *last;
f337b084
TH
1387 I32 l = len;
1388 hop = last = strbeg;
1389 while (hop < s) {
1390 hop += UTF8SKIP(hop);
1391 if (--l == 0) {
08ca2aa3 1392 last = hop;
f337b084
TH
1393 l = len;
1394 }
fc241834 1395 }
f337b084
TH
1396 if (last > s)
1397 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1398 s = last;
1399 break;
f337b084
TH
1400 }
1401 len = (s - strbeg) % len;
62f95557 1402 /* FALL THROUGH */
a6ec74c1 1403 case 'X':
08ca2aa3
TH
1404 if (utf8) {
1405 while (len > 0) {
1406 if (s <= strbeg)
1407 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1408 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1409 if (s <= strbeg)
1410 Perl_croak(aTHX_ "'X' outside of string in unpack");
1411 }
1412 len--;
1413 }
1414 } else {
fc241834
RGS
1415 if (len > s - strbeg)
1416 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1417 s -= len;
08ca2aa3 1418 }
a6ec74c1 1419 break;
9e27e96a
AL
1420 case 'x' | TYPE_IS_SHRIEKING: {
1421 I32 ai32;
62f95557
IZ
1422 if (!len) /* Avoid division by 0 */
1423 len = 1;
230e1fce
NC
1424 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1425 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1426 if (ai32 == 0) break;
1427 len -= ai32;
9e27e96a 1428 }
62f95557 1429 /* FALL THROUGH */
a6ec74c1 1430 case 'x':
08ca2aa3
TH
1431 if (utf8) {
1432 while (len>0) {
1433 if (s >= strend)
1434 Perl_croak(aTHX_ "'x' outside of string in unpack");
1435 s += UTF8SKIP(s);
1436 len--;
1437 }
1438 } else {
fc241834
RGS
1439 if (len > strend - s)
1440 Perl_croak(aTHX_ "'x' outside of string in unpack");
1441 s += len;
f337b084 1442 }
a6ec74c1
JH
1443 break;
1444 case '/':
49704364
WL
1445 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1446 break;
a6ec74c1
JH
1447 case 'A':
1448 case 'Z':
1449 case 'a':
08ca2aa3
TH
1450 if (checksum) {
1451 /* Preliminary length estimate is assumed done in 'W' */
1452 if (len > strend - s) len = strend - s;
1453 goto W_checksum;
1454 }
1455 if (utf8) {
1456 I32 l;
f7fe979e 1457 const char *hop;
08ca2aa3
TH
1458 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1459 if (hop >= strend) {
1460 if (hop > strend)
1461 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1462 break;
fc241834 1463 }
a6ec74c1 1464 }
08ca2aa3
TH
1465 if (hop > strend)
1466 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1467 len = hop - s;
1468 } else if (len > strend - s)
1469 len = strend - s;
1470
1471 if (datumtype == 'Z') {
1472 /* 'Z' strips stuff after first null */
f7fe979e 1473 const char *ptr, *end;
f337b084
TH
1474 end = s + len;
1475 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1476 sv = newSVpvn(s, ptr-s);
1477 if (howlen == e_star) /* exact for 'Z*' */
1478 len = ptr-s + (ptr != strend ? 1 : 0);
1479 } else if (datumtype == 'A') {
1480 /* 'A' strips both nulls and spaces */
f7fe979e 1481 const char *ptr;
18bdf90a
TH
1482 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1483 for (ptr = s+len-1; ptr >= s; ptr--)
1484 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1485 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1486 if (ptr >= s) ptr += UTF8SKIP(ptr);
1487 else ptr++;
28be1210 1488 if (ptr > s+len)
18bdf90a
TH
1489 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1490 } else {
1491 for (ptr = s+len-1; ptr >= s; ptr--)
1492 if (*ptr != 0 && !isSPACE(*ptr)) break;
1493 ptr++;
1494 }
08ca2aa3
TH
1495 sv = newSVpvn(s, ptr-s);
1496 } else sv = newSVpvn(s, len);
1497
1498 if (utf8) {
1499 SvUTF8_on(sv);
1500 /* Undo any upgrade done due to need_utf8() */
f337b084 1501 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1502 sv_utf8_downgrade(sv, 0);
a6ec74c1 1503 }
6e449a3a 1504 mXPUSHs(sv);
08ca2aa3 1505 s += len;
a6ec74c1
JH
1506 break;
1507 case 'B':
08ca2aa3
TH
1508 case 'b': {
1509 char *str;
49704364 1510 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1511 len = (strend - s) * 8;
1512 if (checksum) {
f337b084 1513 if (utf8)
08ca2aa3 1514 while (len >= 8 && s < strend) {
f337b084 1515 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1516 len -= 8;
1517 }
f337b084 1518 else
fc241834 1519 while (len >= 8) {
08ca2aa3 1520 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1521 len -= 8;
1522 }
08ca2aa3
TH
1523 if (len && s < strend) {
1524 U8 bits;
f337b084
TH
1525 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1526 if (datumtype == 'b')
a6ec74c1 1527 while (len-- > 0) {
92d41999 1528 if (bits & 1) cuv++;
a6ec74c1
JH
1529 bits >>= 1;
1530 }
f337b084 1531 else
a6ec74c1 1532 while (len-- > 0) {
08ca2aa3 1533 if (bits & 0x80) cuv++;
a6ec74c1
JH
1534 bits <<= 1;
1535 }
fc241834 1536 }
a6ec74c1
JH
1537 break;
1538 }
08ca2aa3 1539
561b68a9 1540 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1541 SvPOK_on(sv);
1542 str = SvPVX(sv);
1543 if (datumtype == 'b') {
f337b084 1544 U8 bits = 0;
f7fe979e 1545 const I32 ai32 = len;
08ca2aa3
TH
1546 for (len = 0; len < ai32; len++) {
1547 if (len & 7) bits >>= 1;
1548 else if (utf8) {
1549 if (s >= strend) break;
f337b084 1550 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1551 } else bits = *(U8 *) s++;
1552 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1553 }
08ca2aa3 1554 } else {
f337b084 1555 U8 bits = 0;
f7fe979e 1556 const I32 ai32 = len;
08ca2aa3
TH
1557 for (len = 0; len < ai32; len++) {
1558 if (len & 7) bits <<= 1;
1559 else if (utf8) {
1560 if (s >= strend) break;
f337b084 1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1562 } else bits = *(U8 *) s++;
1563 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1564 }
1565 }
1566 *str = '\0';
aa07b2f6 1567 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1568 XPUSHs(sv);
a6ec74c1 1569 break;
08ca2aa3 1570 }
a6ec74c1 1571 case 'H':
08ca2aa3 1572 case 'h': {
3297d27d 1573 char *str = NULL;
fc241834 1574 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1575 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1576 len = (strend - s) * 2;
858fe5e1
TC
1577 if (!checksum) {
1578 sv = sv_2mortal(newSV(len ? len : 1));
1579 SvPOK_on(sv);
1580 str = SvPVX(sv);
1581 }
a6ec74c1 1582 if (datumtype == 'h') {
f337b084 1583 U8 bits = 0;
9e27e96a 1584 I32 ai32 = len;
fc241834
RGS
1585 for (len = 0; len < ai32; len++) {
1586 if (len & 1) bits >>= 4;
1587 else if (utf8) {
1588 if (s >= strend) break;
f337b084 1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1590 } else bits = * (U8 *) s++;
858fe5e1
TC
1591 if (!checksum)
1592 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1593 }
08ca2aa3 1594 } else {
f337b084 1595 U8 bits = 0;
f7fe979e 1596 const I32 ai32 = len;
08ca2aa3
TH
1597 for (len = 0; len < ai32; len++) {
1598 if (len & 1) bits <<= 4;
1599 else if (utf8) {
1600 if (s >= strend) break;
f337b084 1601 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1602 } else bits = *(U8 *) s++;
858fe5e1
TC
1603 if (!checksum)
1604 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1605 }
1606 }
858fe5e1
TC
1607 if (!checksum) {
1608 *str = '\0';
1609 SvCUR_set(sv, str - SvPVX_const(sv));
1610 XPUSHs(sv);
1611 }
a6ec74c1 1612 break;
08ca2aa3 1613 }
1651fc44
ML
1614 case 'C':
1615 if (len == 0) {
1616 if (explicit_length)
1617 /* Switch to "character" mode */
1618 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1619 break;
1620 }
1621 /* FALL THROUGH */
a6ec74c1 1622 case 'c':
1651fc44
ML
1623 while (len-- > 0 && s < strend) {
1624 int aint;
1625 if (utf8)
1626 {
1627 STRLEN retlen;
1628 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1629 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1630 if (retlen == (STRLEN) -1 || retlen == 0)
1631 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1632 s += retlen;
1633 }
1634 else
1635 aint = *(U8 *)(s)++;
1636 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1637 aint -= 256;
08ca2aa3 1638 if (!checksum)
6e449a3a 1639 mPUSHi(aint);
73cb7263
NC
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV)aint;
1642 else
1643 cuv += aint;
a6ec74c1
JH
1644 }
1645 break;
08ca2aa3
TH
1646 case 'W':
1647 W_checksum:
1651fc44 1648 if (utf8) {
08ca2aa3 1649 while (len-- > 0 && s < strend) {
08ca2aa3 1650 STRLEN retlen;
f7fe979e 1651 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1652 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1653 if (retlen == (STRLEN) -1 || retlen == 0)
1654 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1655 s += retlen;
1656 if (!checksum)
6e449a3a 1657 mPUSHu(val);
08ca2aa3
TH
1658 else if (checksum > bits_in_uv)
1659 cdouble += (NV) val;
d6d3e8bd 1660 else
08ca2aa3 1661 cuv += val;
fc241834 1662 }
08ca2aa3 1663 } else if (!checksum)
a6ec74c1 1664 while (len-- > 0) {
f7fe979e 1665 const U8 ch = *(U8 *) s++;
6e449a3a 1666 mPUSHu(ch);
a6ec74c1 1667 }
08ca2aa3
TH
1668 else if (checksum > bits_in_uv)
1669 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1670 else
1671 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1672 break;
1673 case 'U':
35bcd338 1674 if (len == 0) {
c5333953 1675 if (explicit_length && howlen != e_star) {
08ca2aa3 1676 /* Switch to "bytes in UTF-8" mode */
f337b084 1677 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1678 else
1679 /* Should be impossible due to the need_utf8() test */
1680 Perl_croak(aTHX_ "U0 mode on a byte string");
1681 }
35bcd338
JH
1682 break;
1683 }
08ca2aa3 1684 if (len > strend - s) len = strend - s;
fc241834 1685 if (!checksum) {
08ca2aa3
TH
1686 if (len && unpack_only_one) len = 1;
1687 EXTEND(SP, len);
1688 EXTEND_MORTAL(len);
fc241834 1689 }
08ca2aa3
TH
1690 while (len-- > 0 && s < strend) {
1691 STRLEN retlen;
1692 UV auv;
1693 if (utf8) {
1694 U8 result[UTF8_MAXLEN];
f7fe979e 1695 const char *ptr = s;
08ca2aa3 1696 STRLEN len;
08ca2aa3
TH
1697 /* Bug: warns about bad utf8 even if we are short on bytes
1698 and will break out of the loop */
230e1fce
NC
1699 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1700 'U'))
08ca2aa3
TH
1701 break;
1702 len = UTF8SKIP(result);
fc241834 1703 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1704 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1705 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1706 s = ptr;
1707 } else {
cde84f2a 1708 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1709 if (retlen == (STRLEN) -1 || retlen == 0)
1710 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1711 s += retlen;
1712 }
1713 if (!checksum)
6e449a3a 1714 mPUSHu(auv);
73cb7263 1715 else if (checksum > bits_in_uv)
08ca2aa3 1716 cdouble += (NV) auv;
73cb7263 1717 else
08ca2aa3 1718 cuv += auv;
a6ec74c1
JH
1719 }
1720 break;
49704364
WL
1721 case 's' | TYPE_IS_SHRIEKING:
1722#if SHORTSIZE != SIZE16
73cb7263 1723 while (len-- > 0) {
08ca2aa3 1724 short ashort;
f337b084
TH
1725 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1726 DO_BO_UNPACK(ashort, s);
08ca2aa3 1727 if (!checksum)
6e449a3a 1728 mPUSHi(ashort);
73cb7263
NC
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)ashort;
1731 else
1732 cuv += ashort;
49704364
WL
1733 }
1734 break;
1735#else
1736 /* Fallthrough! */
a6ec74c1 1737#endif
49704364 1738 case 's':
73cb7263 1739 while (len-- > 0) {
08ca2aa3
TH
1740 I16 ai16;
1741
1742#if U16SIZE > SIZE16
1743 ai16 = 0;
1744#endif
f337b084 1745 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1746 DO_BO_UNPACK(ai16, 16);
1109a392 1747#if U16SIZE > SIZE16
73cb7263
NC
1748 if (ai16 > 32767)
1749 ai16 -= 65536;
a6ec74c1 1750#endif
08ca2aa3 1751 if (!checksum)
6e449a3a 1752 mPUSHi(ai16);
73cb7263
NC
1753 else if (checksum > bits_in_uv)
1754 cdouble += (NV)ai16;
1755 else
1756 cuv += ai16;
a6ec74c1
JH
1757 }
1758 break;
49704364
WL
1759 case 'S' | TYPE_IS_SHRIEKING:
1760#if SHORTSIZE != SIZE16
73cb7263 1761 while (len-- > 0) {
08ca2aa3 1762 unsigned short aushort;
f337b084
TH
1763 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1764 DO_BO_UNPACK(aushort, s);
08ca2aa3 1765 if (!checksum)
6e449a3a 1766 mPUSHu(aushort);
73cb7263
NC
1767 else if (checksum > bits_in_uv)
1768 cdouble += (NV)aushort;
1769 else
1770 cuv += aushort;
49704364
WL
1771 }
1772 break;
1773#else
486ec47a 1774 /* Fallthrough! */
49704364 1775#endif
a6ec74c1
JH
1776 case 'v':
1777 case 'n':
1778 case 'S':
73cb7263 1779 while (len-- > 0) {
08ca2aa3
TH
1780 U16 au16;
1781#if U16SIZE > SIZE16
1782 au16 = 0;
1783#endif
f337b084 1784 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1785 DO_BO_UNPACK(au16, 16);
a6ec74c1 1786#ifdef HAS_NTOHS
73cb7263
NC
1787 if (datumtype == 'n')
1788 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1789#endif
1790#ifdef HAS_VTOHS
73cb7263
NC
1791 if (datumtype == 'v')
1792 au16 = vtohs(au16);
a6ec74c1 1793#endif
08ca2aa3 1794 if (!checksum)
6e449a3a 1795 mPUSHu(au16);
73cb7263 1796 else if (checksum > bits_in_uv)
f337b084 1797 cdouble += (NV) au16;
73cb7263
NC
1798 else
1799 cuv += au16;
a6ec74c1
JH
1800 }
1801 break;
7212898e 1802#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1803 case 'v' | TYPE_IS_SHRIEKING:
1804 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1805 while (len-- > 0) {
08ca2aa3
TH
1806 I16 ai16;
1807# if U16SIZE > SIZE16
1808 ai16 = 0;
1809# endif
f337b084 1810 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1811# ifdef HAS_NTOHS
73cb7263 1812 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1813 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1814# endif /* HAS_NTOHS */
1815# ifdef HAS_VTOHS
73cb7263 1816 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1817 ai16 = (I16) vtohs((U16) ai16);
1818# endif /* HAS_VTOHS */
1819 if (!checksum)
6e449a3a 1820 mPUSHi(ai16);
73cb7263 1821 else if (checksum > bits_in_uv)
08ca2aa3 1822 cdouble += (NV) ai16;
73cb7263
NC
1823 else
1824 cuv += ai16;
068bd2e7
MHM
1825 }
1826 break;
08ca2aa3 1827#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1828 case 'i':
49704364 1829 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1830 while (len-- > 0) {
08ca2aa3 1831 int aint;
f337b084
TH
1832 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1833 DO_BO_UNPACK(aint, i);
08ca2aa3 1834 if (!checksum)
6e449a3a 1835 mPUSHi(aint);
73cb7263
NC
1836 else if (checksum > bits_in_uv)
1837 cdouble += (NV)aint;
1838 else
1839 cuv += aint;
a6ec74c1
JH
1840 }
1841 break;
1842 case 'I':
49704364 1843 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1844 while (len-- > 0) {
08ca2aa3 1845 unsigned int auint;
f337b084
TH
1846 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1847 DO_BO_UNPACK(auint, i);
08ca2aa3 1848 if (!checksum)
6e449a3a 1849 mPUSHu(auint);
73cb7263
NC
1850 else if (checksum > bits_in_uv)
1851 cdouble += (NV)auint;
1852 else
1853 cuv += auint;
a6ec74c1
JH
1854 }
1855 break;
92d41999 1856 case 'j':
73cb7263 1857 while (len-- > 0) {
08ca2aa3 1858 IV aiv;
f337b084 1859 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1860#if IVSIZE == INTSIZE
f337b084 1861 DO_BO_UNPACK(aiv, i);
1109a392 1862#elif IVSIZE == LONGSIZE
f337b084 1863 DO_BO_UNPACK(aiv, l);
1109a392 1864#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1865 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1866#else
1867 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1868#endif
08ca2aa3 1869 if (!checksum)
6e449a3a 1870 mPUSHi(aiv);
73cb7263
NC
1871 else if (checksum > bits_in_uv)
1872 cdouble += (NV)aiv;
1873 else
1874 cuv += aiv;
92d41999
JH
1875 }
1876 break;
1877 case 'J':
73cb7263 1878 while (len-- > 0) {
08ca2aa3 1879 UV auv;
f337b084 1880 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1881#if IVSIZE == INTSIZE
f337b084 1882 DO_BO_UNPACK(auv, i);
08ca2aa3 1883#elif IVSIZE == LONGSIZE
f337b084 1884 DO_BO_UNPACK(auv, l);
08ca2aa3 1885#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1886 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1887#else
1888 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1889#endif
08ca2aa3 1890 if (!checksum)
6e449a3a 1891 mPUSHu(auv);
73cb7263
NC
1892 else if (checksum > bits_in_uv)
1893 cdouble += (NV)auv;
1894 else
1895 cuv += auv;
92d41999
JH
1896 }
1897 break;
49704364
WL
1898 case 'l' | TYPE_IS_SHRIEKING:
1899#if LONGSIZE != SIZE32
73cb7263 1900 while (len-- > 0) {
08ca2aa3 1901 long along;
f337b084
TH
1902 SHIFT_VAR(utf8, s, strend, along, datumtype);
1903 DO_BO_UNPACK(along, l);
08ca2aa3 1904 if (!checksum)
6e449a3a 1905 mPUSHi(along);
73cb7263
NC
1906 else if (checksum > bits_in_uv)
1907 cdouble += (NV)along;
1908 else
1909 cuv += along;
49704364
WL
1910 }
1911 break;
1912#else
1913 /* Fallthrough! */
a6ec74c1 1914#endif
49704364 1915 case 'l':
73cb7263 1916 while (len-- > 0) {
08ca2aa3
TH
1917 I32 ai32;
1918#if U32SIZE > SIZE32
1919 ai32 = 0;
1920#endif
f337b084 1921 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1922 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1923#if U32SIZE > SIZE32
08ca2aa3 1924 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1925#endif
08ca2aa3 1926 if (!checksum)
6e449a3a 1927 mPUSHi(ai32);
73cb7263
NC
1928 else if (checksum > bits_in_uv)
1929 cdouble += (NV)ai32;
1930 else
1931 cuv += ai32;
a6ec74c1
JH
1932 }
1933 break;
49704364
WL
1934 case 'L' | TYPE_IS_SHRIEKING:
1935#if LONGSIZE != SIZE32
73cb7263 1936 while (len-- > 0) {
08ca2aa3 1937 unsigned long aulong;
f337b084
TH
1938 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1939 DO_BO_UNPACK(aulong, l);
08ca2aa3 1940 if (!checksum)
6e449a3a 1941 mPUSHu(aulong);
73cb7263
NC
1942 else if (checksum > bits_in_uv)
1943 cdouble += (NV)aulong;
1944 else
1945 cuv += aulong;
49704364
WL
1946 }
1947 break;
1948#else
1949 /* Fall through! */
1950#endif
a6ec74c1
JH
1951 case 'V':
1952 case 'N':
1953 case 'L':
73cb7263 1954 while (len-- > 0) {
08ca2aa3
TH
1955 U32 au32;
1956#if U32SIZE > SIZE32
1957 au32 = 0;
1958#endif
f337b084 1959 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1960 DO_BO_UNPACK(au32, 32);
a6ec74c1 1961#ifdef HAS_NTOHL
73cb7263
NC
1962 if (datumtype == 'N')
1963 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1964#endif
1965#ifdef HAS_VTOHL
73cb7263
NC
1966 if (datumtype == 'V')
1967 au32 = vtohl(au32);
a6ec74c1 1968#endif
08ca2aa3 1969 if (!checksum)
6e449a3a 1970 mPUSHu(au32);
fc241834
RGS
1971 else if (checksum > bits_in_uv)
1972 cdouble += (NV)au32;
1973 else
1974 cuv += au32;
a6ec74c1
JH
1975 }
1976 break;
7212898e 1977#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1978 case 'V' | TYPE_IS_SHRIEKING:
1979 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1980 while (len-- > 0) {
08ca2aa3
TH
1981 I32 ai32;
1982# if U32SIZE > SIZE32
1983 ai32 = 0;
1984# endif
f337b084 1985 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1986# ifdef HAS_NTOHL
73cb7263
NC
1987 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1988 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1989# endif
1990# ifdef HAS_VTOHL
73cb7263
NC
1991 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1992 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1993# endif
1994 if (!checksum)
6e449a3a 1995 mPUSHi(ai32);
73cb7263
NC
1996 else if (checksum > bits_in_uv)
1997 cdouble += (NV)ai32;
1998 else
1999 cuv += ai32;
068bd2e7
MHM
2000 }
2001 break;
08ca2aa3 2002#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 2003 case 'p':
a6ec74c1 2004 while (len-- > 0) {
f7fe979e 2005 const char *aptr;
f337b084 2006 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2007 DO_BO_UNPACK_PC(aptr);
c4c5f44a 2008 /* newSVpv generates undef if aptr is NULL */
6e449a3a 2009 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
2010 }
2011 break;
2012 case 'w':
a6ec74c1
JH
2013 {
2014 UV auv = 0;
2015 U32 bytes = 0;
fc241834 2016
08ca2aa3
TH
2017 while (len > 0 && s < strend) {
2018 U8 ch;
f337b084 2019 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2020 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2021 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2022 if (ch < 0x80) {
a6ec74c1 2023 bytes = 0;
6e449a3a 2024 mPUSHu(auv);
a6ec74c1
JH
2025 len--;
2026 auv = 0;
08ca2aa3 2027 continue;
a6ec74c1 2028 }
08ca2aa3 2029 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2030 const char *t;
a6ec74c1 2031
f5992bc4 2032 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2033 while (s < strend) {
f337b084 2034 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2035 sv = mul128(sv, (U8)(ch & 0x7f));
2036 if (!(ch & 0x80)) {
a6ec74c1
JH
2037 bytes = 0;
2038 break;
2039 }
2040 }
10516c54 2041 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2042 while (*t == '0')
2043 t++;
2044 sv_chop(sv, t);
6e449a3a 2045 mPUSHs(sv);
a6ec74c1
JH
2046 len--;
2047 auv = 0;
2048 }
2049 }
2050 if ((s >= strend) && bytes)
49704364 2051 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2052 }
2053 break;
2054 case 'P':
49704364
WL
2055 if (symptr->howlen == e_star)
2056 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2057 EXTEND(SP, 1);
2d3e0934 2058 if (s + sizeof(char*) <= strend) {
08ca2aa3 2059 char *aptr;
f337b084 2060 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2061 DO_BO_UNPACK_PC(aptr);
fc241834 2062 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2063 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2064 }
a6ec74c1
JH
2065 break;
2066#ifdef HAS_QUAD
2067 case 'q':
73cb7263 2068 while (len-- > 0) {
08ca2aa3 2069 Quad_t aquad;
f337b084
TH
2070 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2071 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2072 if (!checksum)
6e449a3a
MHM
2073 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2074 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2075 else if (checksum > bits_in_uv)
2076 cdouble += (NV)aquad;
2077 else
2078 cuv += aquad;
2079 }
a6ec74c1
JH
2080 break;
2081 case 'Q':
73cb7263 2082 while (len-- > 0) {
08ca2aa3 2083 Uquad_t auquad;
f337b084
TH
2084 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2085 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2086 if (!checksum)
6e449a3a
MHM
2087 mPUSHs(auquad <= UV_MAX ?
2088 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2089 else if (checksum > bits_in_uv)
2090 cdouble += (NV)auquad;
2091 else
2092 cuv += auquad;
a6ec74c1
JH
2093 }
2094 break;
08ca2aa3 2095#endif /* HAS_QUAD */
a6ec74c1
JH
2096 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2097 case 'f':
73cb7263 2098 while (len-- > 0) {
08ca2aa3 2099 float afloat;
f337b084 2100 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2101 DO_BO_UNPACK_N(afloat, float);
2102 if (!checksum)
6e449a3a 2103 mPUSHn(afloat);
08ca2aa3 2104 else
73cb7263 2105 cdouble += afloat;
fc241834 2106 }
a6ec74c1
JH
2107 break;
2108 case 'd':
73cb7263 2109 while (len-- > 0) {
08ca2aa3 2110 double adouble;
f337b084 2111 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2112 DO_BO_UNPACK_N(adouble, double);
2113 if (!checksum)
6e449a3a 2114 mPUSHn(adouble);
08ca2aa3 2115 else
73cb7263 2116 cdouble += adouble;
fc241834 2117 }
a6ec74c1 2118 break;
92d41999 2119 case 'F':
73cb7263 2120 while (len-- > 0) {
275663fa
TC
2121 NV_bytes anv;
2122 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2123 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 2124 if (!checksum)
275663fa 2125 mPUSHn(anv.nv);
08ca2aa3 2126 else
275663fa 2127 cdouble += anv.nv;
fc241834 2128 }
92d41999
JH
2129 break;
2130#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2131 case 'D':
73cb7263 2132 while (len-- > 0) {
275663fa
TC
2133 ld_bytes aldouble;
2134 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2135 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 2136 if (!checksum)
275663fa 2137 mPUSHn(aldouble.ld);
08ca2aa3 2138 else
275663fa 2139 cdouble += aldouble.ld;
92d41999
JH
2140 }
2141 break;
2142#endif
a6ec74c1 2143 case 'u':
858fe5e1 2144 if (!checksum) {
f7fe979e 2145 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2146 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2147 if (l) SvPOK_on(sv);
2148 }
2149 if (utf8) {
2150 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2151 I32 a, b, c, d;
db187877 2152 char hunk[3];
08ca2aa3 2153
08ca2aa3
TH
2154 while (len > 0) {
2155 next_uni_uu(aTHX_ &s, strend, &a);
2156 next_uni_uu(aTHX_ &s, strend, &b);
2157 next_uni_uu(aTHX_ &s, strend, &c);
2158 next_uni_uu(aTHX_ &s, strend, &d);
2159 hunk[0] = (char)((a << 2) | (b >> 4));
2160 hunk[1] = (char)((b << 4) | (c >> 2));
2161 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2162 if (!checksum)
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
2164 len -= 3;
2165 }
2166 if (s < strend) {
f7fe979e
AL
2167 if (*s == '\n') {
2168 s++;
2169 }
08ca2aa3
TH
2170 else {
2171 /* possible checksum byte */
f7fe979e
AL
2172 const char *skip = s+UTF8SKIP(s);
2173 if (skip < strend && *skip == '\n')
2174 s = skip+1;
08ca2aa3
TH
2175 }
2176 }
2177 }
2178 } else {
fc241834
RGS
2179 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2180 I32 a, b, c, d;
db187877 2181 char hunk[3];
a6ec74c1 2182
fc241834
RGS
2183 len = PL_uudmap[*(U8*)s++] & 077;
2184 while (len > 0) {
2185 if (s < strend && ISUUCHAR(*s))
2186 a = PL_uudmap[*(U8*)s++] & 077;
2187 else
2188 a = 0;
2189 if (s < strend && ISUUCHAR(*s))
2190 b = PL_uudmap[*(U8*)s++] & 077;
2191 else
2192 b = 0;
2193 if (s < strend && ISUUCHAR(*s))
2194 c = PL_uudmap[*(U8*)s++] & 077;
2195 else
2196 c = 0;
2197 if (s < strend && ISUUCHAR(*s))
2198 d = PL_uudmap[*(U8*)s++] & 077;
2199 else
2200 d = 0;
2201 hunk[0] = (char)((a << 2) | (b >> 4));
2202 hunk[1] = (char)((b << 4) | (c >> 2));
2203 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2204 if (!checksum)
2205 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
2206 len -= 3;
2207 }
2208 if (*s == '\n')
2209 s++;
2210 else /* possible checksum byte */
2211 if (s + 1 < strend && s[1] == '\n')
2212 s += 2;
a6ec74c1 2213 }
08ca2aa3 2214 }
858fe5e1
TC
2215 if (!checksum)
2216 XPUSHs(sv);
a6ec74c1
JH
2217 break;
2218 }
49704364 2219
a6ec74c1 2220 if (checksum) {
1109a392 2221 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2222 (checksum > bits_in_uv &&
08ca2aa3
TH
2223 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2224 NV trouble, anv;
a6ec74c1 2225
08ca2aa3 2226 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2227 while (checksum >= 16) {
2228 checksum -= 16;
08ca2aa3 2229 anv *= 65536.0;
a6ec74c1 2230 }
a6ec74c1 2231 while (cdouble < 0.0)
08ca2aa3
TH
2232 cdouble += anv;
2233 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2234 sv = newSVnv(cdouble);
a6ec74c1
JH
2235 }
2236 else {
fa8ec7c1
NC
2237 if (checksum < bits_in_uv) {
2238 UV mask = ((UV)1 << checksum) - 1;
92d41999 2239 cuv &= mask;
a6ec74c1 2240 }
c4c5f44a 2241 sv = newSVuv(cuv);
a6ec74c1 2242 }
6e449a3a 2243 mXPUSHs(sv);
a6ec74c1
JH
2244 checksum = 0;
2245 }
fc241834 2246
49704364
WL
2247 if (symptr->flags & FLAG_SLASH){
2248 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 2249 break;
49704364
WL
2250 if( next_symbol(symptr) ){
2251 if( symptr->howlen == e_number )
2252 Perl_croak(aTHX_ "Count after length/code in unpack" );
2253 if( beyond ){
2254 /* ...end of char buffer then no decent length available */
2255 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2256 } else {
2257 /* take top of stack (hope it's numeric) */
2258 len = POPi;
2259 if( len < 0 )
2260 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2261 }
2262 } else {
2263 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2264 }
2265 datumtype = symptr->code;
21c16052 2266 explicit_length = FALSE;
49704364
WL
2267 goto redo_switch;
2268 }
a6ec74c1 2269 }
49704364 2270
18529408
IZ
2271 if (new_s)
2272 *new_s = s;
2273 PUTBACK;
2274 return SP - PL_stack_base - start_sp_offset;
2275}
2276
2277PP(pp_unpack)
2278{
97aff369 2279 dVAR;
18529408 2280 dSP;
bab9c0ac 2281 dPOPPOPssrl;
18529408
IZ
2282 I32 gimme = GIMME_V;
2283 STRLEN llen;
2284 STRLEN rlen;
5c144d81
NC
2285 const char *pat = SvPV_const(left, llen);
2286 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2287 const char *strend = s + rlen;
2288 const char *patend = pat + llen;
08ca2aa3 2289 I32 cnt;
18529408
IZ
2290
2291 PUTBACK;
7accc089 2292 cnt = unpackstring(pat, patend, s, strend,
49704364 2293 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2294 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2295
18529408
IZ
2296 SPAGAIN;
2297 if ( !cnt && gimme == G_SCALAR )
2298 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2299 RETURN;
2300}
2301
f337b084 2302STATIC U8 *
f7fe979e 2303doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2304{
f337b084 2305 *h++ = PL_uuemap[len];
a6ec74c1 2306 while (len > 2) {
f337b084
TH
2307 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2308 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2309 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2310 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2311 s += 3;
2312 len -= 3;
2313 }
2314 if (len > 0) {
f7fe979e 2315 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2316 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2317 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2318 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2319 *h++ = PL_uuemap[0];
a6ec74c1 2320 }
f337b084
TH
2321 *h++ = '\n';
2322 return h;
a6ec74c1
JH
2323}
2324
2325STATIC SV *
f7fe979e 2326S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2327{
8b6e33c7
AL
2328 SV *result = newSVpvn(s, l);
2329 char *const result_c = SvPV_nolen(result); /* convenience */
2330 char *out = result_c;
2331 bool skip = 1;
2332 bool ignore = 0;
a6ec74c1 2333
7918f24d
NC
2334 PERL_ARGS_ASSERT_IS_AN_INT;
2335
a6ec74c1
JH
2336 while (*s) {
2337 switch (*s) {
2338 case ' ':
2339 break;
2340 case '+':
2341 if (!skip) {
2342 SvREFCNT_dec(result);
2343 return (NULL);
2344 }
2345 break;
2346 case '0':
2347 case '1':
2348 case '2':
2349 case '3':
2350 case '4':
2351 case '5':
2352 case '6':
2353 case '7':
2354 case '8':
2355 case '9':
2356 skip = 0;
2357 if (!ignore) {
2358 *(out++) = *s;
2359 }
2360 break;
2361 case '.':
2362 ignore = 1;
2363 break;
2364 default:
2365 SvREFCNT_dec(result);
2366 return (NULL);
2367 }
2368 s++;
2369 }
2370 *(out++) = '\0';
2371 SvCUR_set(result, out - result_c);
2372 return (result);
2373}
2374
2375/* pnum must be '\0' terminated */
2376STATIC int
2377S_div128(pTHX_ SV *pnum, bool *done)
2378{
8b6e33c7
AL
2379 STRLEN len;
2380 char * const s = SvPV(pnum, len);
2381 char *t = s;
2382 int m = 0;
2383
7918f24d
NC
2384 PERL_ARGS_ASSERT_DIV128;
2385
8b6e33c7
AL
2386 *done = 1;
2387 while (*t) {
2388 const int i = m * 10 + (*t - '0');
2389 const int r = (i >> 7); /* r < 10 */
2390 m = i & 0x7F;
2391 if (r) {
2392 *done = 0;
2393 }
2394 *(t++) = '0' + r;
a6ec74c1 2395 }
8b6e33c7
AL
2396 *(t++) = '\0';
2397 SvCUR_set(pnum, (STRLEN) (t - s));
2398 return (m);
a6ec74c1
JH
2399}
2400
18529408 2401/*
7accc089
JH
2402=for apidoc packlist
2403
2404The engine implementing pack() Perl function.
2405
bfce84ec
AL
2406=cut
2407*/
7accc089
JH
2408
2409void
5aaab254 2410Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2411{
97aff369 2412 dVAR;
aadb217d
JH
2413 tempsym_t sym;
2414
7918f24d
NC
2415 PERL_ARGS_ASSERT_PACKLIST;
2416
f7fe979e 2417 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2418
f337b084
TH
2419 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2420 Also make sure any UTF8 flag is loaded */
56eb0262 2421 SvPV_force_nolen(cat);
bfce84ec
AL
2422 if (DO_UTF8(cat))
2423 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2424
49704364
WL
2425 (void)pack_rec( cat, &sym, beglist, endlist );
2426}
2427
f337b084
TH
2428/* like sv_utf8_upgrade, but also repoint the group start markers */
2429STATIC void
2430marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2431 STRLEN len;
2432 tempsym_t *group;
f7fe979e
AL
2433 const char *from_ptr, *from_start, *from_end, **marks, **m;
2434 char *to_start, *to_ptr;
f337b084
TH
2435
2436 if (SvUTF8(sv)) return;
2437
aa07b2f6 2438 from_start = SvPVX_const(sv);
f337b084
TH
2439 from_end = from_start + SvCUR(sv);
2440 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2441 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2442 if (from_ptr == from_end) {
2443 /* Simple case: no character needs to be changed */
2444 SvUTF8_on(sv);
2445 return;
2446 }
2447
3473cf63 2448 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2449 Newx(to_start, len, char);
f337b084
TH
2450 Copy(from_start, to_start, from_ptr-from_start, char);
2451 to_ptr = to_start + (from_ptr-from_start);
2452
a02a5408 2453 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2454 for (group=sym_ptr; group; group = group->previous)
2455 marks[group->level] = from_start + group->strbeg;
2456 marks[sym_ptr->level+1] = from_end+1;
2457 for (m = marks; *m < from_ptr; m++)
2458 *m = to_start + (*m-from_start);
2459
2460 for (;from_ptr < from_end; from_ptr++) {
2461 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2462 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2463 }
2464 *to_ptr = 0;
2465
2466 while (*m == from_ptr) *m++ = to_ptr;
2467 if (m != marks + sym_ptr->level+1) {
2468 Safefree(marks);
2469 Safefree(to_start);
5637ef5b
NC
2470 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2471 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2472 }
2473 for (group=sym_ptr; group; group = group->previous)
2474 group->strbeg = marks[group->level] - to_start;
2475 Safefree(marks);
2476
2477 if (SvOOK(sv)) {
2478 if (SvIVX(sv)) {
b162af07 2479 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2480 from_start -= SvIVX(sv);
2481 SvIV_set(sv, 0);
2482 }
2483 SvFLAGS(sv) &= ~SVf_OOK;
2484 }
2485 if (SvLEN(sv) != 0)
2486 Safefree(from_start);
f880fe2f 2487 SvPV_set(sv, to_start);
b162af07
SP
2488 SvCUR_set(sv, to_ptr - to_start);
2489 SvLEN_set(sv, len);
f337b084
TH
2490 SvUTF8_on(sv);
2491}
2492
2493/* Exponential string grower. Makes string extension effectively O(n)
2494 needed says how many extra bytes we need (not counting the final '\0')
2495 Only grows the string if there is an actual lack of space
2496*/
2497STATIC char *
0bd48802 2498S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2499 const STRLEN cur = SvCUR(sv);
2500 const STRLEN len = SvLEN(sv);
f337b084 2501 STRLEN extend;
7918f24d
NC
2502
2503 PERL_ARGS_ASSERT_SV_EXP_GROW;
2504
f337b084
TH
2505 if (len - cur > needed) return SvPVX(sv);
2506 extend = needed > len ? needed : len;
2507 return SvGROW(sv, len+extend+1);
2508}
49704364
WL
2509
2510STATIC
2511SV **
f337b084 2512S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2513{
97aff369 2514 dVAR;
49704364 2515 tempsym_t lookahead;
f337b084
TH
2516 I32 items = endlist - beglist;
2517 bool found = next_symbol(symptr);
2518 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2519 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2520
7918f24d
NC
2521 PERL_ARGS_ASSERT_PACK_REC;
2522
f337b084
TH
2523 if (symptr->level == 0 && found && symptr->code == 'U') {
2524 marked_upgrade(aTHX_ cat, symptr);
2525 symptr->flags |= FLAG_DO_UTF8;
2526 utf8 = 0;
49704364 2527 }
f337b084 2528 symptr->strbeg = SvCUR(cat);
49704364
WL
2529
2530 while (found) {
f337b084
TH
2531 SV *fromstr;
2532 STRLEN fromlen;
2533 I32 len;
a0714e2c 2534 SV *lengthcode = NULL;
49704364 2535 I32 datumtype = symptr->code;
f337b084
TH
2536 howlen_t howlen = symptr->howlen;
2537 char *start = SvPVX(cat);
2538 char *cur = start + SvCUR(cat);
49704364 2539
f337b084
TH
2540#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2541
2542 switch (howlen) {
fc241834 2543 case e_star:
f337b084
TH
2544 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2545 0 : items;
2546 break;
2547 default:
2548 /* e_no_len and e_number */
2549 len = symptr->length;
49704364
WL
2550 break;
2551 }
2552
f337b084 2553 if (len) {
a7a3cfaa 2554 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2555
a7a3cfaa
TH
2556 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2557 /* We can process this letter. */
2558 STRLEN size = props & PACK_SIZE_MASK;
2559 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2560 }
f337b084
TH
2561 }
2562
49704364
WL
2563 /* Look ahead for next symbol. Do we have code/code? */
2564 lookahead = *symptr;
2565 found = next_symbol(&lookahead);
246f24af
TH
2566 if (symptr->flags & FLAG_SLASH) {
2567 IV count;
f337b084 2568 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2569 if (strchr("aAZ", lookahead.code)) {
2570 if (lookahead.howlen == e_number) count = lookahead.length;
2571 else {
ce399ba6 2572 if (items > 0) {
48a5da33 2573 count = sv_len_utf8(*beglist);
ce399ba6 2574 }
246f24af
TH
2575 else count = 0;
2576 if (lookahead.code == 'Z') count++;
2577 }
2578 } else {
2579 if (lookahead.howlen == e_number && lookahead.length < items)
2580 count = lookahead.length;
2581 else count = items;
2582 }
2583 lookahead.howlen = e_number;
2584 lookahead.length = count;
2585 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2586 }
49704364 2587
fc241834
RGS
2588 /* Code inside the switch must take care to properly update
2589 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2590 doesn't simply leave using break */
1109a392 2591 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2592 default:
f337b084
TH
2593 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2594 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2595 case '%':
49704364 2596 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2597 {
2598 char *from;
2599#ifdef PERL_PACK_CAN_SHRIEKSIGN
2600 case '.' | TYPE_IS_SHRIEKING:
2601#endif
2602 case '.':
2603 if (howlen == e_star) from = start;
2604 else if (len == 0) from = cur;
2605 else {
2606 tempsym_t *group = symptr;
2607
2608 while (--len && group) group = group->previous;
2609 from = group ? start + group->strbeg : start;
2610 }
2611 fromstr = NEXTFROM;
2612 len = SvIV(fromstr);
2613 goto resize;
2614#ifdef PERL_PACK_CAN_SHRIEKSIGN
2615 case '@' | TYPE_IS_SHRIEKING:
2616#endif
a6ec74c1 2617 case '@':
28be1210
TH
2618 from = start + symptr->strbeg;
2619 resize:
2620#ifdef PERL_PACK_CAN_SHRIEKSIGN
2621 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2622#else /* PERL_PACK_CAN_SHRIEKSIGN */
2623 if (utf8)
2624#endif
2625 if (len >= 0) {
2626 while (len && from < cur) {
2627 from += UTF8SKIP(from);
2628 len--;
2629 }
2630 if (from > cur)
2631 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2632 if (len) {
2633 /* Here we know from == cur */
2634 grow:
2635 GROWING(0, cat, start, cur, len);
2636 Zero(cur, len, char);
2637 cur += len;
2638 } else if (from < cur) {
2639 len = cur - from;
2640 goto shrink;
2641 } else goto no_change;
2642 } else {
2643 cur = from;
2644 len = -len;
2645 goto utf8_shrink;
f337b084 2646 }
28be1210
TH
2647 else {
2648 len -= cur - from;
f337b084 2649 if (len > 0) goto grow;
28be1210 2650 if (len == 0) goto no_change;
fc241834 2651 len = -len;
28be1210 2652 goto shrink;
f337b084 2653 }
a6ec74c1 2654 break;
28be1210 2655 }
fc241834 2656 case '(': {
49704364 2657 tempsym_t savsym = *symptr;
66c611c5
MHM
2658 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2659 symptr->flags |= group_modifiers;
49704364
WL
2660 symptr->patend = savsym.grpend;
2661 symptr->level++;
f337b084 2662 symptr->previous = &lookahead;
18529408 2663 while (len--) {
f337b084
TH
2664 U32 was_utf8;
2665 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2666 else symptr->flags &= ~FLAG_PARSE_UTF8;
2667 was_utf8 = SvUTF8(cat);
49704364 2668 symptr->patptr = savsym.grpbeg;
f337b084
TH
2669 beglist = pack_rec(cat, symptr, beglist, endlist);
2670 if (SvUTF8(cat) != was_utf8)
2671 /* This had better be an upgrade while in utf8==0 mode */
2672 utf8 = 1;
2673
49704364 2674 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2675 break; /* No way to continue */
2676 }
ee790063 2677 items = endlist - beglist;
f337b084
TH
2678 lookahead.flags = symptr->flags & ~group_modifiers;
2679 goto no_change;
18529408 2680 }
62f95557
IZ
2681 case 'X' | TYPE_IS_SHRIEKING:
2682 if (!len) /* Avoid division by 0 */
2683 len = 1;
f337b084
TH
2684 if (utf8) {
2685 char *hop, *last;
2686 I32 l = len;
2687 hop = last = start;
2688 while (hop < cur) {
2689 hop += UTF8SKIP(hop);
2690 if (--l == 0) {
2691 last = hop;
2692 l = len;
2693 }
2694 }
2695 if (last > cur)
2696 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2697 cur = last;
2698 break;
2699 }
2700 len = (cur-start) % len;
62f95557 2701 /* FALL THROUGH */
a6ec74c1 2702 case 'X':
f337b084
TH
2703 if (utf8) {
2704 if (len < 1) goto no_change;
28be1210 2705 utf8_shrink:
f337b084
TH
2706 while (len > 0) {
2707 if (cur <= start)
28be1210
TH
2708 Perl_croak(aTHX_ "'%c' outside of string in pack",
2709 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2710 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2711 if (cur <= start)
28be1210
TH
2712 Perl_croak(aTHX_ "'%c' outside of string in pack",
2713 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2714 }
2715 len--;
2716 }
2717 } else {
fc241834 2718 shrink:
f337b084 2719 if (cur - start < len)
28be1210
TH
2720 Perl_croak(aTHX_ "'%c' outside of string in pack",
2721 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2722 cur -= len;
2723 }
2724 if (cur < start+symptr->strbeg) {
2725 /* Make sure group starts don't point into the void */
2726 tempsym_t *group;
9e27e96a 2727 const STRLEN length = cur-start;
f337b084
TH
2728 for (group = symptr;
2729 group && length < group->strbeg;
2730 group = group->previous) group->strbeg = length;
2731 lookahead.strbeg = length;
2732 }
a6ec74c1 2733 break;
fc241834
RGS
2734 case 'x' | TYPE_IS_SHRIEKING: {
2735 I32 ai32;
62f95557
IZ
2736 if (!len) /* Avoid division by 0 */
2737 len = 1;
230e1fce 2738 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2739 else ai32 = (cur - start) % len;
2740 if (ai32 == 0) goto no_change;
2741 len -= ai32;
2742 }
2743 /* FALL THROUGH */
a6ec74c1 2744 case 'x':
f337b084 2745 goto grow;
a6ec74c1
JH
2746 case 'A':
2747 case 'Z':
f337b084 2748 case 'a': {
f7fe979e 2749 const char *aptr;
f337b084 2750
a6ec74c1 2751 fromstr = NEXTFROM;
e62f0680 2752 aptr = SvPV_const(fromstr, fromlen);
f337b084 2753 if (DO_UTF8(fromstr)) {
f7fe979e 2754 const char *end, *s;
f337b084
TH
2755
2756 if (!utf8 && !SvUTF8(cat)) {
2757 marked_upgrade(aTHX_ cat, symptr);
2758 lookahead.flags |= FLAG_DO_UTF8;
2759 lookahead.strbeg = symptr->strbeg;
2760 utf8 = 1;
2761 start = SvPVX(cat);
2762 cur = start + SvCUR(cat);
2763 }
fc241834 2764 if (howlen == e_star) {
f337b084
TH
2765 if (utf8) goto string_copy;
2766 len = fromlen+1;
2767 }
2768 s = aptr;
2769 end = aptr + fromlen;
2770 fromlen = datumtype == 'Z' ? len-1 : len;
2771 while ((I32) fromlen > 0 && s < end) {
2772 s += UTF8SKIP(s);
2773 fromlen--;
2774 }
2775 if (s > end)
2776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2777 if (utf8) {
fc241834 2778 len = fromlen;
f337b084
TH
2779 if (datumtype == 'Z') len++;
2780 fromlen = s-aptr;
2781 len += fromlen;
fc241834 2782
f337b084 2783 goto string_copy;
fc241834 2784 }
f337b084
TH
2785 fromlen = len - fromlen;
2786 if (datumtype == 'Z') fromlen--;
2787 if (howlen == e_star) {
2788 len = fromlen;
2789 if (datumtype == 'Z') len++;
fc241834 2790 }
f337b084 2791 GROWING(0, cat, start, cur, len);
fc241834 2792 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2793 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2794 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2795 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2796 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2797 cur += fromlen;
a6ec74c1 2798 len -= fromlen;
f337b084
TH
2799 } else if (utf8) {
2800 if (howlen == e_star) {
2801 len = fromlen;
2802 if (datumtype == 'Z') len++;
a6ec74c1 2803 }
f337b084
TH
2804 if (len <= (I32) fromlen) {
2805 fromlen = len;
2806 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2807 }
fc241834 2808 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2809 upgrade, so:
2810 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2811 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2812 len -= fromlen;
2813 while (fromlen > 0) {
230e1fce 2814 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2815 aptr++;
2816 fromlen--;
fc241834 2817 }
f337b084
TH
2818 } else {
2819 string_copy:
2820 if (howlen == e_star) {
2821 len = fromlen;
2822 if (datumtype == 'Z') len++;
2823 }
2824 if (len <= (I32) fromlen) {
2825 fromlen = len;
2826 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2827 }
f337b084
TH
2828 GROWING(0, cat, start, cur, len);
2829 Copy(aptr, cur, fromlen, char);
2830 cur += fromlen;
2831 len -= fromlen;
a6ec74c1 2832 }
f337b084
TH
2833 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2834 cur += len;
3c4fb04a 2835 SvTAINT(cat);
a6ec74c1 2836 break;
f337b084 2837 }
a6ec74c1 2838 case 'B':
f337b084 2839 case 'b': {
b83604b4 2840 const char *str, *end;
f337b084
TH
2841 I32 l, field_len;
2842 U8 bits;
2843 bool utf8_source;
2844 U32 utf8_flags;
a6ec74c1 2845
fc241834 2846 fromstr = NEXTFROM;
b83604b4 2847 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2848 end = str + fromlen;
2849 if (DO_UTF8(fromstr)) {
2850 utf8_source = TRUE;
041457d9 2851 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2852 } else {
2853 utf8_source = FALSE;
2854 utf8_flags = 0; /* Unused, but keep compilers happy */
2855 }
2856 if (howlen == e_star) len = fromlen;
2857 field_len = (len+7)/8;
2858 GROWING(utf8, cat, start, cur, field_len);
2859 if (len > (I32)fromlen) len = fromlen;
2860 bits = 0;
2861 l = 0;
2862 if (datumtype == 'B')
2863 while (l++ < len) {
2864 if (utf8_source) {
95b63a38 2865 UV val = 0;
f337b084
TH
2866 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2867 bits |= val & 1;
2868 } else bits |= *str++ & 1;
2869 if (l & 7) bits <<= 1;
fc241834 2870 else {
f337b084
TH
2871 PUSH_BYTE(utf8, cur, bits);
2872 bits = 0;
a6ec74c1
JH
2873 }
2874 }
f337b084
TH
2875 else
2876 /* datumtype == 'b' */
2877 while (l++ < len) {
2878 if (utf8_source) {
95b63a38 2879 UV val = 0;
f337b084
TH
2880 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2881 if (val & 1) bits |= 0x80;
2882 } else if (*str++ & 1)
2883 bits |= 0x80;
2884 if (l & 7) bits >>= 1;
fc241834 2885 else {
f337b084
TH
2886 PUSH_BYTE(utf8, cur, bits);
2887 bits = 0;
a6ec74c1
JH
2888 }
2889 }
f337b084
TH
2890 l--;
2891 if (l & 7) {
fc241834 2892 if (datumtype == 'B')
f337b084 2893 bits <<= 7 - (l & 7);
fc241834 2894 else
f337b084
TH
2895 bits >>= 7 - (l & 7);
2896 PUSH_BYTE(utf8, cur, bits);
2897 l += 7;
a6ec74c1 2898 }
f337b084
TH
2899 /* Determine how many chars are left in the requested field */
2900 l /= 8;
2901 if (howlen == e_star) field_len = 0;
2902 else field_len -= l;
2903 Zero(cur, field_len, char);
2904 cur += field_len;
a6ec74c1 2905 break;
f337b084 2906 }
a6ec74c1 2907 case 'H':
f337b084 2908 case 'h': {
10516c54 2909 const char *str, *end;
f337b084
TH
2910 I32 l, field_len;
2911 U8 bits;
2912 bool utf8_source;
2913 U32 utf8_flags;
a6ec74c1 2914
fc241834 2915 fromstr = NEXTFROM;
10516c54 2916 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2917 end = str + fromlen;
2918 if (DO_UTF8(fromstr)) {
2919 utf8_source = TRUE;
041457d9 2920 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2921 } else {
2922 utf8_source = FALSE;
2923 utf8_flags = 0; /* Unused, but keep compilers happy */
2924 }
2925 if (howlen == e_star) len = fromlen;
2926 field_len = (len+1)/2;
2927 GROWING(utf8, cat, start, cur, field_len);
2928 if (!utf8 && len > (I32)fromlen) len = fromlen;
2929 bits = 0;
2930 l = 0;
2931 if (datumtype == 'H')
2932 while (l++ < len) {
2933 if (utf8_source) {
95b63a38 2934 UV val = 0;
f337b084
TH
2935 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2936 if (val < 256 && isALPHA(val))
2937 bits |= (val + 9) & 0xf;
a6ec74c1 2938 else
f337b084
TH
2939 bits |= val & 0xf;
2940 } else if (isALPHA(*str))
2941 bits |= (*str++ + 9) & 0xf;
2942 else
2943 bits |= *str++ & 0xf;
2944 if (l & 1) bits <<= 4;
fc241834 2945 else {
f337b084
TH
2946 PUSH_BYTE(utf8, cur, bits);
2947 bits = 0;
a6ec74c1
JH
2948 }
2949 }
f337b084
TH
2950 else
2951 while (l++ < len) {
2952 if (utf8_source) {
95b63a38 2953 UV val = 0;
f337b084
TH
2954 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2955 if (val < 256 && isALPHA(val))
2956 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2957 else
f337b084
TH
2958 bits |= (val & 0xf) << 4;
2959 } else if (isALPHA(*str))
2960 bits |= ((*str++ + 9) & 0xf) << 4;
2961 else
2962 bits |= (*str++ & 0xf) << 4;
2963 if (l & 1) bits >>= 4;
fc241834 2964 else {
f337b084
TH
2965 PUSH_BYTE(utf8, cur, bits);
2966 bits = 0;
a6ec74c1 2967 }
fc241834 2968 }
f337b084
TH
2969 l--;
2970 if (l & 1) {
2971 PUSH_BYTE(utf8, cur, bits);
2972 l++;
2973 }
2974 /* Determine how many chars are left in the requested field */
2975 l /= 2;
2976 if (howlen == e_star) field_len = 0;
2977 else field_len -= l;
2978 Zero(cur, field_len, char);
2979 cur += field_len;
2980 break;
fc241834
RGS
2981 }
2982 case 'c':
f337b084
TH
2983 while (len-- > 0) {
2984 IV aiv;
2985 fromstr = NEXTFROM;
2986 aiv = SvIV(fromstr);
a2a5de95
NC
2987 if ((-128 > aiv || aiv > 127))
2988 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2989 "Character in 'c' format wrapped in pack");
585ec06d 2990 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2991 }
2992 break;
2993 case 'C':
f337b084
TH
2994 if (len == 0) {
2995 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2996 break;
2997 }
a6ec74c1 2998 while (len-- > 0) {
f337b084 2999 IV aiv;
a6ec74c1 3000 fromstr = NEXTFROM;
f337b084 3001 aiv = SvIV(fromstr);
a2a5de95
NC
3002 if ((0 > aiv || aiv > 0xff))
3003 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'C' format wrapped in pack");
1651fc44 3005 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 3006 }
fc241834
RGS
3007 break;
3008 case 'W': {
3009 char *end;
670f1322 3010 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
3011
3012 end = start+SvLEN(cat)-1;
3013 if (utf8) end -= UTF8_MAXLEN-1;
3014 while (len-- > 0) {
3015 UV auv;
3016 fromstr = NEXTFROM;
3017 auv = SvUV(fromstr);
3018 if (in_bytes) auv = auv % 0x100;
3019 if (utf8) {
3020 W_utf8:
3021 if (cur > end) {
3022 *cur = '\0';
b162af07 3023 SvCUR_set(cat, cur - start);
fc241834
RGS
3024
3025 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3026 end = start+SvLEN(cat)-UTF8_MAXLEN;
3027 }
230e1fce
NC
3028 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3029 NATIVE_TO_UNI(auv),
041457d9 3030 warn_utf8 ?
230e1fce 3031 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3032 } else {
3033 if (auv >= 0x100) {
3034 if (!SvUTF8(cat)) {
3035 *cur = '\0';
b162af07 3036 SvCUR_set(cat, cur - start);
fc241834
RGS
3037 marked_upgrade(aTHX_ cat, symptr);
3038 lookahead.flags |= FLAG_DO_UTF8;
3039 lookahead.strbeg = symptr->strbeg;
3040 utf8 = 1;
3041 start = SvPVX(cat);
3042 cur = start + SvCUR(cat);
3043 end = start+SvLEN(cat)-UTF8_MAXLEN;
3044 goto W_utf8;
3045 }
a2a5de95
NC
3046 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3047 "Character in 'W' format wrapped in pack");
fc241834
RGS
3048 auv &= 0xff;
3049 }
3050 if (cur >= end) {
3051 *cur = '\0';
b162af07 3052 SvCUR_set(cat, cur - start);
fc241834
RGS
3053 GROWING(0, cat, start, cur, len+1);
3054 end = start+SvLEN(cat)-1;
3055 }
fe2774ed 3056 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3057 }
3058 }
3059 break;
fc241834
RGS
3060 }
3061 case 'U': {
3062 char *end;
3063
3064 if (len == 0) {
3065 if (!(symptr->flags & FLAG_DO_UTF8)) {
3066 marked_upgrade(aTHX_ cat, symptr);
3067 lookahead.flags |= FLAG_DO_UTF8;
3068 lookahead.strbeg = symptr->strbeg;
3069 }
3070 utf8 = 0;
3071 goto no_change;
3072 }
3073
3074 end = start+SvLEN(cat);
3075 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3076 while (len-- > 0) {
fc241834 3077 UV auv;
a6ec74c1 3078 fromstr = NEXTFROM;
fc241834
RGS
3079 auv = SvUV(fromstr);
3080 if (utf8) {
230e1fce 3081 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3082 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3083 warn_utf8 ?
fc241834
RGS
3084 0 : UNICODE_ALLOW_ANY);
3085 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3086 *cur = '\0';
b162af07 3087 SvCUR_set(cat, cur - start);
fc241834
RGS
3088 GROWING(0, cat, start, cur,
3089 len+(endb-buffer)*UTF8_EXPAND);
3090 end = start+SvLEN(cat);
3091 }
64844641 3092 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3093 } else {
3094 if (cur >= end) {
3095 *cur = '\0';
b162af07 3096 SvCUR_set(cat, cur - start);
fc241834
RGS
3097 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3098 end = start+SvLEN(cat)-UTF8_MAXLEN;
3099 }
230e1fce 3100 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3101 warn_utf8 ?
230e1fce 3102 0 : UNICODE_ALLOW_ANY);
fc241834 3103 }
a6ec74c1 3104 }
a6ec74c1 3105 break;
fc241834 3106 }
a6ec74c1
JH
3107 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3108 case 'f':
a6ec74c1 3109 while (len-- > 0) {
f337b084
TH
3110 float afloat;
3111 NV anv;
a6ec74c1 3112 fromstr = NEXTFROM;
f337b084 3113 anv = SvNV(fromstr);
85bba25f 3114# if defined(VMS) && !defined(_IEEE_FP)
f337b084 3115 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3116 * on Alpha; fake it if we don't have them.
3117 */
f337b084 3118 if (anv > FLT_MAX)
fc241834 3119 afloat = FLT_MAX;
f337b084 3120 else if (anv < -FLT_MAX)
fc241834 3121 afloat = -FLT_MAX;
f337b084 3122 else afloat = (float)anv;
baf3cf9c 3123# else
f337b084 3124 afloat = (float)anv;
baf3cf9c 3125# endif
1109a392 3126 DO_BO_PACK_N(afloat, float);
f337b084 3127 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3128 }
3129 break;
3130 case 'd':
a6ec74c1 3131 while (len-- > 0) {
f337b084
TH
3132 double adouble;
3133 NV anv;
a6ec74c1 3134 fromstr = NEXTFROM;
f337b084 3135 anv = SvNV(fromstr);
85bba25f 3136# if defined(VMS) && !defined(_IEEE_FP)
f337b084 3137 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3138 * on Alpha; fake it if we don't have them.
3139 */
f337b084 3140 if (anv > DBL_MAX)
fc241834 3141 adouble = DBL_MAX;
f337b084 3142 else if (anv < -DBL_MAX)
fc241834 3143 adouble = -DBL_MAX;
f337b084 3144 else adouble = (double)anv;
baf3cf9c 3145# else
f337b084 3146 adouble = (double)anv;
baf3cf9c 3147# endif
1109a392 3148 DO_BO_PACK_N(adouble, double);
f337b084 3149 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3150 }
3151 break;
fc241834 3152 case 'F': {
275663fa 3153 NV_bytes anv;
1109a392 3154 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3155 while (len-- > 0) {
3156 fromstr = NEXTFROM;
cd07c537
DM
3157#ifdef __GNUC__
3158 /* to work round a gcc/x86 bug; don't use SvNV */
3159 anv.nv = sv_2nv(fromstr);
3160#else
275663fa 3161 anv.nv = SvNV(fromstr);
cd07c537 3162#endif
1109a392 3163 DO_BO_PACK_N(anv, NV);
275663fa 3164 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
3165 }
3166 break;
fc241834 3167 }
92d41999 3168#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 3169 case 'D': {
275663fa 3170 ld_bytes aldouble;
1109a392
MHM
3171 /* long doubles can have unused bits, which may be nonzero */
3172 Zero(&aldouble, 1, long double);
92d41999
JH
3173 while (len-- > 0) {
3174 fromstr = NEXTFROM;
cd07c537
DM
3175# ifdef __GNUC__
3176 /* to work round a gcc/x86 bug; don't use SvNV */
3177 aldouble.ld = (long double)sv_2nv(fromstr);
3178# else
275663fa 3179 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 3180# endif
1109a392 3181 DO_BO_PACK_N(aldouble, long double);
275663fa 3182 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
3183 }
3184 break;
fc241834 3185 }
92d41999 3186#endif
7212898e 3187#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3188 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3189#endif
a6ec74c1
JH
3190 case 'n':
3191 while (len-- > 0) {
f337b084 3192 I16 ai16;
a6ec74c1 3193 fromstr = NEXTFROM;
ef108786 3194 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3195#ifdef HAS_HTONS
ef108786 3196 ai16 = PerlSock_htons(ai16);
a6ec74c1 3197#endif
f337b084 3198 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3199 }
3200 break;
7212898e 3201#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3202 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3203#endif
a6ec74c1
JH
3204 case 'v':
3205 while (len-- > 0) {
f337b084 3206 I16 ai16;
a6ec74c1 3207 fromstr = NEXTFROM;
ef108786 3208 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3209#ifdef HAS_HTOVS
ef108786 3210 ai16 = htovs(ai16);
a6ec74c1 3211#endif
f337b084 3212 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3213 }
3214 break;
49704364 3215 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3216#if SHORTSIZE != SIZE16
fc241834 3217 while (len-- > 0) {
f337b084 3218 unsigned short aushort;
fc241834
RGS
3219 fromstr = NEXTFROM;
3220 aushort = SvUV(fromstr);
3221 DO_BO_PACK(aushort, s);
f337b084 3222 PUSH_VAR(utf8, cur, aushort);
fc241834 3223 }
49704364
WL
3224 break;
3225#else
3226 /* Fall through! */
a6ec74c1 3227#endif
49704364 3228 case 'S':
fc241834 3229 while (len-- > 0) {
f337b084 3230 U16 au16;
fc241834
RGS
3231 fromstr = NEXTFROM;
3232 au16 = (U16)SvUV(fromstr);
3233 DO_BO_PACK(au16, 16);
f337b084 3234 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3235 }
3236 break;
49704364 3237 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3238#if SHORTSIZE != SIZE16
fc241834 3239 while (len-- > 0) {
f337b084 3240 short ashort;
fc241834
RGS
3241 fromstr = NEXTFROM;
3242 ashort = SvIV(fromstr);
3243 DO_BO_PACK(ashort, s);
f337b084 3244 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3245 }
49704364
WL
3246 break;
3247#else
3248 /* Fall through! */
a6ec74c1 3249#endif
49704364
WL
3250 case 's':
3251 while (len-- > 0) {
f337b084 3252 I16 ai16;
49704364 3253 fromstr = NEXTFROM;
ef108786
MHM
3254 ai16 = (I16)SvIV(fromstr);
3255 DO_BO_PACK(ai16, 16);
f337b084 3256 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3257 }
3258 break;
3259 case 'I':
49704364 3260 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3261 while (len-- > 0) {
f337b084 3262 unsigned int auint;
a6ec74c1
JH
3263 fromstr = NEXTFROM;
3264 auint = SvUV(fromstr);
1109a392 3265 DO_BO_PACK(auint, i);
f337b084 3266 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3267 }
3268 break;
92d41999
JH
3269 case 'j':
3270 while (len-- > 0) {
f337b084 3271 IV aiv;
92d41999
JH
3272 fromstr = NEXTFROM;
3273 aiv = SvIV(fromstr);
1109a392
MHM
3274#if IVSIZE == INTSIZE
3275 DO_BO_PACK(aiv, i);
3276#elif IVSIZE == LONGSIZE
3277 DO_BO_PACK(aiv, l);
3278#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3279 DO_BO_PACK(aiv, 64);
f337b084
TH
3280#else
3281 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3282#endif
f337b084 3283 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3284 }
3285 break;
3286 case 'J':
3287 while (len-- > 0) {
f337b084 3288 UV auv;
92d41999
JH
3289 fromstr = NEXTFROM;
3290 auv = SvUV(fromstr);
1109a392
MHM
3291#if UVSIZE == INTSIZE
3292 DO_BO_PACK(auv, i);
3293#elif UVSIZE == LONGSIZE
3294 DO_BO_PACK(auv, l);
3295#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3296 DO_BO_PACK(auv, 64);
f337b084
TH
3297#else
3298 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3299#endif
f337b084 3300 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3301 }
3302 break;
a6ec74c1
JH
3303 case 'w':
3304 while (len-- > 0) {
f337b084 3305 NV anv;
a6ec74c1 3306 fromstr = NEXTFROM;
15e9f109 3307 anv = SvNV(fromstr);
a6ec74c1 3308
f337b084
TH
3309 if (anv < 0) {
3310 *cur = '\0';
b162af07 3311 SvCUR_set(cat, cur - start);
49704364 3312 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3313 }
a6ec74c1 3314
196b62db
NC
3315 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3316 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3317 any negative IVs will have already been got by the croak()
3318 above. IOK is untrue for fractions, so we test them
3319 against UV_MAX_P1. */
f337b084
TH
3320 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3321 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3322 char *in = buf + sizeof(buf);
196b62db 3323 UV auv = SvUV(fromstr);
a6ec74c1
JH
3324
3325 do {
eb160463 3326 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3327 auv >>= 7;
3328 } while (auv);
3329 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3330 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3331 in, (buf + sizeof(buf)) - in);
3332 } else if (SvPOKp(fromstr))
3333 goto w_string;
a6ec74c1 3334 else if (SvNOKp(fromstr)) {
0258719b 3335 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3336 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3337 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3338 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3339 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3340 Some C compilers are strict about integral constant
3341 expressions so we conservatively divide by a slightly
3342 smaller integer instead of multiplying by the exact
3343 floating-point value.
0258719b
NC
3344 */
3345#ifdef NV_MAX_10_EXP
f337b084 3346 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3347 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3348#else
f337b084 3349 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3350 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3351#endif
a6ec74c1
JH
3352 char *in = buf + sizeof(buf);
3353
8b6e33c7 3354 anv = Perl_floor(anv);
a6ec74c1 3355 do {
8b6e33c7 3356 const NV next = Perl_floor(anv / 128);
a6ec74c1 3357 if (in <= buf) /* this cannot happen ;-) */
49704364 3358 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3359 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3360 anv = next;
3361 } while (anv > 0);
a6ec74c1 3362 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3363 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3364 in, (buf + sizeof(buf)) - in);
3365 } else {
8b6e33c7
AL
3366 const char *from;
3367 char *result, *in;
735b914b
JH
3368 SV *norm;
3369 STRLEN len;
3370 bool done;
3371
f337b084 3372 w_string:
735b914b 3373 /* Copy string and check for compliance */
349d4f2f 3374 from = SvPV_const(fromstr, len);
735b914b 3375 if ((norm = is_an_int(from, len)) == NULL)
49704364 3376 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3377
a02a5408 3378 Newx(result, len, char);
735b914b
JH
3379 in = result + len;
3380 done = FALSE;
f337b084 3381 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3382 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3383 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3384 in, (result + len) - in);
735b914b
JH
3385 Safefree(result);
3386 SvREFCNT_dec(norm); /* free norm */
fc241834 3387 }
a6ec74c1
JH
3388 }
3389 break;
3390 case 'i':
49704364 3391 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3392 while (len-- > 0) {
f337b084 3393 int aint;
a6ec74c1
JH
3394 fromstr = NEXTFROM;
3395 aint = SvIV(fromstr);
1109a392 3396 DO_BO_PACK(aint, i);
f337b084 3397 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3398 }
3399 break;
7212898e 3400#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3401 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3402#endif
a6ec74c1
JH
3403 case 'N':
3404 while (len-- > 0) {
f337b084 3405 U32 au32;
a6ec74c1 3406 fromstr = NEXTFROM;
ef108786 3407 au32 = SvUV(fromstr);
a6ec74c1 3408#ifdef HAS_HTONL
ef108786 3409 au32 = PerlSock_htonl(au32);
a6ec74c1 3410#endif
f337b084 3411 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3412 }
3413 break;
7212898e 3414#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3415 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3416#endif
a6ec74c1
JH
3417 case 'V':
3418 while (len-- > 0) {
f337b084 3419 U32 au32;
a6ec74c1 3420 fromstr = NEXTFROM;
ef108786 3421 au32 = SvUV(fromstr);
a6ec74c1 3422#ifdef HAS_HTOVL
ef108786 3423 au32 = htovl(au32);
a6ec74c1 3424#endif
f337b084 3425 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3426 }
3427 break;
49704364 3428 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3429#if LONGSIZE != SIZE32
fc241834 3430 while (len-- > 0) {
f337b084 3431 unsigned long aulong;
fc241834
RGS
3432 fromstr = NEXTFROM;
3433 aulong = SvUV(fromstr);
3434 DO_BO_PACK(aulong, l);
f337b084 3435 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3436 }
49704364
WL
3437 break;
3438#else
3439 /* Fall though! */
a6ec74c1 3440#endif
49704364 3441 case 'L':
fc241834 3442 while (len-- > 0) {
f337b084 3443 U32 au32;
fc241834
RGS
3444 fromstr = NEXTFROM;
3445 au32 = SvUV(fromstr);
3446 DO_BO_PACK(au32, 32);
f337b084 3447 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3448 }
3449 break;
49704364 3450 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3451#if LONGSIZE != SIZE32
fc241834 3452 while (len-- > 0) {
f337b084 3453 long along;
fc241834
RGS
3454 fromstr = NEXTFROM;
3455 along = SvIV(fromstr);
3456 DO_BO_PACK(along, l);
f337b084 3457 PUSH_VAR(utf8, cur, along);
a6ec74c1 3458 }
49704364
WL
3459 break;
3460#else
3461 /* Fall though! */
a6ec74c1 3462#endif
49704364
WL
3463 case 'l':
3464 while (len-- > 0) {
f337b084 3465 I32 ai32;
49704364 3466 fromstr = NEXTFROM;
ef108786
MHM
3467 ai32 = SvIV(fromstr);
3468 DO_BO_PACK(ai32, 32);
f337b084 3469 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3470 }
3471 break;
3472#ifdef HAS_QUAD
3473 case 'Q':
3474 while (len-- > 0) {
f337b084 3475 Uquad_t auquad;
a6ec74c1 3476 fromstr = NEXTFROM;
f337b084 3477 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3478 DO_BO_PACK(auquad, 64);
f337b084 3479 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3480 }
3481 break;
3482 case 'q':
3483 while (len-- > 0) {
f337b084 3484 Quad_t aquad;
a6ec74c1
JH
3485 fromstr = NEXTFROM;
3486 aquad = (Quad_t)SvIV(fromstr);
1109a392 3487 DO_BO_PACK(aquad, 64);
f337b084 3488 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3489 }
3490 break;
f337b084 3491#endif /* HAS_QUAD */
a6ec74c1
JH
3492 case 'P':
3493 len = 1; /* assume SV is correct length */
f337b084 3494 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3495 /* Fall through! */
a6ec74c1
JH
3496 case 'p':
3497 while (len-- > 0) {
83003860 3498 const char *aptr;
f337b084 3499
a6ec74c1 3500 fromstr = NEXTFROM;
28a4f200
TH
3501 SvGETMAGIC(fromstr);
3502 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3503 else {
a6ec74c1
JH
3504 /* XXX better yet, could spirit away the string to
3505 * a safe spot and hang on to it until the result
3506 * of pack() (and all copies of the result) are
3507 * gone.
3508 */
041457d9 3509 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3510 !SvREADONLY(fromstr)))) {
3511 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3512 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3513 }
3514 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3515 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3516 else
2596d9fe 3517 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3518 }
07409e01 3519 DO_BO_PACK_PC(aptr);
f337b084 3520 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3521 }
3522 break;
fc241834 3523 case 'u': {
f7fe979e 3524 const char *aptr, *aend;
fc241834 3525 bool from_utf8;
f337b084 3526
a6ec74c1 3527 fromstr = NEXTFROM;
fc241834
RGS
3528 if (len <= 2) len = 45;
3529 else len = len / 3 * 3;
3530 if (len >= 64) {
a2a5de95
NC
3531 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3532 "Field too wide in 'u' format in pack");
fc241834
RGS
3533 len = 63;
3534 }
83003860 3535 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3536 from_utf8 = DO_UTF8(fromstr);
3537 if (from_utf8) {
3538 aend = aptr + fromlen;
3f63b0e5 3539 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3540 } else aend = NULL; /* Unused, but keep compilers happy */
3541 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3542 while (fromlen > 0) {
fc241834 3543 U8 *end;
a6ec74c1 3544 I32 todo;
fc241834 3545 U8 hunk[1+63/3*4+1];
a6ec74c1 3546
eb160463 3547 if ((I32)fromlen > len)
a6ec74c1
JH
3548 todo = len;
3549 else
3550 todo = fromlen;
fc241834
RGS
3551 if (from_utf8) {
3552 char buffer[64];
3553 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3554 'u' | TYPE_IS_PACK)) {
3555 *cur = '\0';
b162af07 3556 SvCUR_set(cat, cur - start);
5637ef5b
NC
3557 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3558 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3559 aptr, aend, buffer, (long) todo);
fc241834
RGS
3560 }
3561 end = doencodes(hunk, buffer, todo);
3562 } else {
3563 end = doencodes(hunk, aptr, todo);
3564 aptr += todo;
3565 }
3566 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3567 fromlen -= todo;
3568 }
a6ec74c1
JH
3569 break;
3570 }
f337b084
TH
3571 }
3572 *cur = '\0';
b162af07 3573 SvCUR_set(cat, cur - start);
f337b084 3574 no_change:
49704364 3575 *symptr = lookahead;
a6ec74c1 3576 }
49704364 3577 return beglist;
18529408
IZ
3578}
3579#undef NEXTFROM
3580
3581
3582PP(pp_pack)
3583{
97aff369 3584 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3585 SV *cat = TARG;
18529408 3586 STRLEN fromlen;
349d4f2f 3587 SV *pat_sv = *++MARK;
eb578fdb
KW
3588 const char *pat = SvPV_const(pat_sv, fromlen);
3589 const char *patend = pat + fromlen;
18529408
IZ
3590
3591 MARK++;
76f68e9b 3592 sv_setpvs(cat, "");
f337b084 3593 SvUTF8_off(cat);
18529408 3594
7accc089 3595 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3596
a6ec74c1
JH
3597 SvSETMAGIC(cat);
3598 SP = ORIGMARK;
3599 PUSHs(cat);
3600 RETURN;
3601}
a6ec74c1 3602
73cb7263
NC
3603/*
3604 * Local variables:
3605 * c-indentation-style: bsd
3606 * c-basic-offset: 4
14d04a33 3607 * indent-tabs-mode: nil
73cb7263
NC
3608 * End:
3609 *
14d04a33 3610 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3611 */