This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add a test for s.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
WL
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
78d46eaa
NC
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
78d46eaa
NC
400#if 'J'-'I' == 1
401/* ASCII */
f43a9a31 402STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
1651fc44 409 /* C */ sizeof(unsigned char),
80a13697 410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 411 /* D */ LONG_DOUBLESIZE,
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* Q */ sizeof(Uquad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 450#if defined(HAS_QUAD)
a7a3cfaa 451 /* q */ sizeof(Quad_t),
80a13697 452#else
a7a3cfaa 453 0,
80a13697 454#endif
a7a3cfaa
TH
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
7212898e 479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 480 /* N */ SIZE32,
7212898e 481#else
a7a3cfaa 482 0,
7212898e 483#endif
a7a3cfaa
TH
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
7212898e 487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 488 /* V */ SIZE32,
7212898e 489#else
a7a3cfaa 490 0,
7212898e 491#endif
a7a3cfaa
TH
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
7212898e 498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 499 /* n */ SIZE16,
7212898e 500#else
a7a3cfaa 501 0,
7212898e 502#endif
a7a3cfaa
TH
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
7212898e 506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 507 /* v */ SIZE16,
7212898e 508#else
a7a3cfaa 509 0,
7212898e 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
520};
521#else
522/* EBCDIC (or bust) */
f43a9a31 523STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 548#if defined(HAS_QUAD)
a7a3cfaa 549 /* q */ sizeof(Quad_t),
80a13697 550#else
a7a3cfaa 551 0,
80a13697 552#endif
a7a3cfaa
TH
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 560 /* C */ sizeof(unsigned char),
80a13697 561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 562 /* D */ LONG_DOUBLESIZE,
80a13697 563#else
a7a3cfaa 564 0,
80a13697 565#endif
a7a3cfaa
TH
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
80a13697 577#if defined(HAS_QUAD)
a7a3cfaa 578 /* Q */ sizeof(Uquad_t),
80a13697 579#else
a7a3cfaa 580 0,
80a13697 581#endif
a7a3cfaa
TH
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
7212898e 604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 605 /* n */ SIZE16,
7212898e 606#else
a7a3cfaa 607 0,
7212898e 608#endif
a7a3cfaa
TH
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
7212898e 612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 613 /* v */ SIZE16,
7212898e 614#else
a7a3cfaa 615 0,
7212898e 616#endif
a7a3cfaa
TH
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
7212898e 624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 625 /* N */ SIZE32,
7212898e 626#else
a7a3cfaa 627 0,
7212898e 628#endif
a7a3cfaa
TH
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
7212898e 632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 633 /* V */ SIZE32,
7212898e 634#else
a7a3cfaa 635 0,
7212898e 636#endif
a7a3cfaa
TH
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
639};
640#endif
641
08ca2aa3 642STATIC U8
f7fe979e 643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 644{
08ca2aa3 645 STRLEN retlen;
0bcc34c2 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 648 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 654 if (val >= 0x100) {
a2a5de95
NC
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
658 val &= 0xff;
659 }
660 *s += retlen;
fe2774ed 661 return (U8)val;
08ca2aa3
TH
662}
663
f337b084
TH
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
666 *(U8 *)(s)++)
667
668STATIC bool
f7fe979e 669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
670{
671 UV val;
672 STRLEN retlen;
f7fe979e 673 const char *from = *s;
08ca2aa3 674 int bad = 0;
f7fe979e 675 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
f337b084 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
fe2774ed 688 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
f7fe979e 694 const char *ptr;
9e27e96a 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
f337b084 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
699 }
700 if (from > end) from = end;
701 }
a2a5de95
NC
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 704 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
f7fe979e 714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 715{
97aff369 716 dVAR;
08ca2aa3 717 STRLEN retlen;
0bcc34c2 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
f337b084 725 *s += retlen;
08ca2aa3
TH
726 return TRUE;
727}
78d46eaa 728
64844641 729STATIC char *
14333449 730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 731 const U8 * const end = start + len;
64844641 732
7918f24d
NC
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
f337b084 735 while (start < end) {
48fa4626
TS
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
742 }
743 start++;
744 }
64844641 745 return dest;
f337b084
TH
746}
747
230e1fce
NC
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
64844641
AL
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
f337b084
TH
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
3473cf63 761 if (utf8) glen *= UTF8_EXPAND; \
f337b084 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 763 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
f7fe979e 770 const STRLEN glen = (in_len); \
f337b084 771 STRLEN gl = glen; \
3473cf63 772 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
b162af07 775 SvCUR_set((cat), (cur) - (start)); \
0bd48802 776 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
f7fe979e 785 const U8 au8 = (byte); \
64844641 786 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
f7fe979e
AL
803static const char *_action( const tempsym_t* symptr )
804{
10edeb5d 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
806}
807
206947d2 808/* Returns the sizeof() struct described by pat */
028d1f6d 809STATIC I32
f337b084 810S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 811{
f337b084 812 I32 total = 0;
206947d2 813
7918f24d
NC
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
49704364 816 while (next_symbol(symptr)) {
f337b084 817 I32 len;
f7fe979e 818 int size;
f337b084
TH
819
820 switch (symptr->howlen) {
fc241834 821 case e_star:
49704364 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 823 _action( symptr ) );
49704364 824 break;
f337b084
TH
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
49704364
WL
829 }
830
a7a3cfaa 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 832 if (!size) {
f7fe979e 833 int star;
80a13697
NC
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 839 _action( symptr ) );
28be1210
TH
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
80a13697 844 case '@':
28be1210 845 case '.':
80a13697
NC
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 851 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 852 _action( symptr ) );
80a13697
NC
853 case '%':
854 size = 0;
855 break;
856 case '(':
fc241834
RGS
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
80a13697
NC
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
f7fe979e 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
80a13697
NC
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
78d46eaa 906
80a13697
NC
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
78d46eaa
NC
910 break;
911 }
206947d2
IZ
912 }
913 total += len * size;
914 }
915 return total;
916}
917
49704364
WL
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
f7fe979e
AL
922STATIC const char *
923S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 924{
7918f24d
NC
925 PERL_ARGS_ASSERT_GROUP_END;
926
49704364 927 while (patptr < patend) {
f7fe979e 928 const char c = *patptr++;
49704364
WL
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
18529408 942 }
49704364
WL
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
18529408
IZ
946}
947
49704364
WL
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
fc241834 952 */
f7fe979e
AL
953STATIC const char *
954S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
955{
956 I32 len = *patptr++ - '0';
7918f24d
NC
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
49704364
WL
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
f337b084 973S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 974{
f7fe979e 975 const char* patptr = symptr->patptr;
0bcc34c2 976 const char* const patend = symptr->patend;
49704364 977
7918f24d
NC
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
49704364
WL
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
fc241834 992 /* We should have found a template code */
49704364 993 I32 code = *patptr++ & 0xFF;
66c611c5 994 U32 inherited_modifiers = 0;
49704364
WL
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 1000 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
1001 }
1002 continue;
1003 }
fc241834 1004
49704364 1005 /* for '(', skip to ')' */
fc241834 1006 if (code == '(') {
49704364
WL
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 1009 _action( symptr ) );
49704364
WL
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 1014 _action( symptr ) );
49704364
WL
1015 }
1016
66c611c5
MHM
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1109a392
MHM
1023 /* look for modifiers */
1024 while (patptr < patend) {
b7787f18
AL
1025 const char *allowed;
1026 I32 modifier;
1109a392
MHM
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
7212898e 1030 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1031 break;
7212898e 1032#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1040 break;
f337b084 1041#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1042 default:
b7787f18
AL
1043 allowed = "";
1044 modifier = 0;
1109a392
MHM
1045 break;
1046 }
66c611c5 1047
1109a392
MHM
1048 if (modifier == 0)
1049 break;
66c611c5 1050
1109a392
MHM
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1053 allowed, _action( symptr ) );
66c611c5
MHM
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1061 *patptr, _action( symptr ) );
66c611c5 1062
a2a5de95
NC
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1109a392 1068 }
66c611c5 1069
1109a392
MHM
1070 code |= modifier;
1071 patptr++;
49704364
WL
1072 }
1073
66c611c5
MHM
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
fc241834 1077 /* look for count and/or / */
49704364
WL
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
f7fe979e 1088 const char* lenptr = ++patptr;
49704364
WL
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1096 _action( symptr ) );
49704364
WL
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
66c611c5 1120 if (*patptr == '/') {
49704364
WL
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
66c611c5
MHM
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1126 _action( symptr ) );
49704364
WL
1127 }
1128 break;
1129 }
18529408 1130 }
49704364
WL
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
fc241834 1138 symptr->patptr = patptr;
49704364 1139 return TRUE;
18529408 1140 }
49704364 1141 }
fc241834 1142 symptr->patptr = patptr;
49704364 1143 return FALSE;
18529408
IZ
1144}
1145
18529408 1146/*
fc241834 1147 There is no way to cleanly handle the case where we should process the
08ca2aa3 1148 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
fc241834 1155STATIC bool
08ca2aa3
TH
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
7918f24d
NC
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
08ca2aa3
TH
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
f7fe979e 1165 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
08ca2aa3
TH
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
f7fe979e 1182 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
7accc089
JH
1190=for apidoc unpackstring
1191
608d3aed
WL
1192The engine implementing unpack() Perl function. C<unpackstring> puts the
1193extracted list items on the stack and returns the number of elements.
1194Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1195
1196=cut */
1197
1198I32
f7fe979e 1199Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1200{
f7fe979e 1201 tempsym_t sym;
08ca2aa3 1202
7918f24d
NC
1203 PERL_ARGS_ASSERT_UNPACKSTRING;
1204
f337b084 1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
230e1fce 1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1211 SAVEFREEPV(s);
1212 strend = s + len;
f337b084 1213 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1214 }
1215
f337b084
TH
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1218
f7fe979e 1219 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1220
1221 return unpack_rec(&sym, s, s, strend, NULL );
1222}
1223
4136a0f7 1224STATIC I32
f7fe979e 1225S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1226{
27da23d5 1227 dVAR; dSP;
3297d27d 1228 SV *sv = NULL;
f7fe979e 1229 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1230 howlen_t howlen;
a6ec74c1 1231 I32 checksum = 0;
92d41999 1232 UV cuv = 0;
a6ec74c1 1233 NV cdouble = 0.0;
f337b084 1234 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1235 bool beyond = FALSE;
21c16052 1236 bool explicit_length;
9e27e96a 1237 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1238 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1239
1240 PERL_ARGS_ASSERT_UNPACK_REC;
1241
28be1210 1242 symptr->strbeg = s - strbeg;
49704364 1243
49704364 1244 while (next_symbol(symptr)) {
a7a3cfaa 1245 packprops_t props;
9e27e96a 1246 I32 len;
f337b084 1247 I32 datumtype = symptr->code;
206947d2 1248 /* do first one only unless in list context
08ca2aa3 1249 / is implemented by unpacking the count, then popping it from the
206947d2 1250 stack, so must check that we're not in the middle of a / */
49704364 1251 if ( unpack_only_one
206947d2 1252 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1253 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1254 break;
49704364 1255
f337b084 1256 switch (howlen = symptr->howlen) {
fc241834
RGS
1257 case e_star:
1258 len = strend - strbeg; /* long enough */
49704364 1259 break;
f337b084
TH
1260 default:
1261 /* e_no_len and e_number */
1262 len = symptr->length;
1263 break;
49704364 1264 }
18529408 1265
21c16052 1266 explicit_length = TRUE;
a6ec74c1 1267 redo_switch:
49704364 1268 beyond = s >= strend;
a7a3cfaa
TH
1269
1270 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1271 if (props) {
1272 /* props nonzero means we can process this letter. */
9e27e96a
AL
1273 const long size = props & PACK_SIZE_MASK;
1274 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1275 if (len > howmany)
1276 len = howmany;
1277
1278 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
78d46eaa
NC
1282 }
1283 }
a7a3cfaa 1284
1109a392 1285 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1286 default:
1109a392 1287 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1288
a6ec74c1 1289 case '%':
49704364 1290 if (howlen == e_no_len)
18529408 1291 len = 16; /* len is not specified */
a6ec74c1 1292 checksum = len;
92d41999 1293 cuv = 0;
a6ec74c1 1294 cdouble = 0;
18529408 1295 continue;
a6ec74c1 1296 break;
18529408
IZ
1297 case '(':
1298 {
49704364 1299 tempsym_t savsym = *symptr;
9e27e96a 1300 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1301 symptr->flags |= group_modifiers;
49704364 1302 symptr->patend = savsym.grpend;
28be1210 1303 symptr->previous = &savsym;
49704364 1304 symptr->level++;
18529408 1305 PUTBACK;
c6f750d1 1306 if (len && unpack_only_one) len = 1;
18529408 1307 while (len--) {
49704364 1308 symptr->patptr = savsym.grpbeg;
f337b084
TH
1309 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1310 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1311 unpack_rec(symptr, s, strbeg, strend, &s);
1312 if (s == strend && savsym.howlen == e_star)
49704364 1313 break; /* No way to continue */
18529408
IZ
1314 }
1315 SPAGAIN;
28be1210 1316 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1317 *symptr = savsym;
18529408
IZ
1318 break;
1319 }
28be1210
TH
1320#ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 case '.' | TYPE_IS_SHRIEKING:
1322#endif
1323 case '.': {
9e27e96a 1324 const char *from;
28be1210
TH
1325 SV *sv;
1326#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1327 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1328#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1329 const bool u8 = utf8;
28be1210
TH
1330#endif
1331 if (howlen == e_star) from = strbeg;
1332 else if (len <= 0) from = s;
1333 else {
1334 tempsym_t *group = symptr;
1335
1336 while (--len && group) group = group->previous;
1337 from = group ? strbeg + group->strbeg : strbeg;
1338 }
1339 sv = from <= s ?
00646304
CB
1340 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1342 mXPUSHs(sv);
28be1210
TH
1343 break;
1344 }
1345#ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 case '@' | TYPE_IS_SHRIEKING:
1347#endif
a6ec74c1 1348 case '@':
28be1210
TH
1349 s = strbeg + symptr->strbeg;
1350#ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1352#else /* PERL_PACK_CAN_SHRIEKSIGN */
1353 if (utf8)
1354#endif
1355 {
08ca2aa3
TH
1356 while (len > 0) {
1357 if (s >= strend)
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 s += UTF8SKIP(s);
1360 len--;
1361 }
1362 if (s > strend)
1363 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1364 } else {
28be1210 1365 if (strend-s < len)
fc241834 1366 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1367 s += len;
08ca2aa3 1368 }
a6ec74c1 1369 break;
62f95557
IZ
1370 case 'X' | TYPE_IS_SHRIEKING:
1371 if (!len) /* Avoid division by 0 */
1372 len = 1;
08ca2aa3 1373 if (utf8) {
f7fe979e 1374 const char *hop, *last;
f337b084
TH
1375 I32 l = len;
1376 hop = last = strbeg;
1377 while (hop < s) {
1378 hop += UTF8SKIP(hop);
1379 if (--l == 0) {
08ca2aa3 1380 last = hop;
f337b084
TH
1381 l = len;
1382 }
fc241834 1383 }
f337b084
TH
1384 if (last > s)
1385 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1386 s = last;
1387 break;
f337b084
TH
1388 }
1389 len = (s - strbeg) % len;
62f95557 1390 /* FALL THROUGH */
a6ec74c1 1391 case 'X':
08ca2aa3
TH
1392 if (utf8) {
1393 while (len > 0) {
1394 if (s <= strbeg)
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1396 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1397 if (s <= strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack");
1399 }
1400 len--;
1401 }
1402 } else {
fc241834
RGS
1403 if (len > s - strbeg)
1404 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1405 s -= len;
08ca2aa3 1406 }
a6ec74c1 1407 break;
9e27e96a
AL
1408 case 'x' | TYPE_IS_SHRIEKING: {
1409 I32 ai32;
62f95557
IZ
1410 if (!len) /* Avoid division by 0 */
1411 len = 1;
230e1fce
NC
1412 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1414 if (ai32 == 0) break;
1415 len -= ai32;
9e27e96a 1416 }
62f95557 1417 /* FALL THROUGH */
a6ec74c1 1418 case 'x':
08ca2aa3
TH
1419 if (utf8) {
1420 while (len>0) {
1421 if (s >= strend)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 s += UTF8SKIP(s);
1424 len--;
1425 }
1426 } else {
fc241834
RGS
1427 if (len > strend - s)
1428 Perl_croak(aTHX_ "'x' outside of string in unpack");
1429 s += len;
f337b084 1430 }
a6ec74c1
JH
1431 break;
1432 case '/':
49704364
WL
1433 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1434 break;
a6ec74c1
JH
1435 case 'A':
1436 case 'Z':
1437 case 'a':
08ca2aa3
TH
1438 if (checksum) {
1439 /* Preliminary length estimate is assumed done in 'W' */
1440 if (len > strend - s) len = strend - s;
1441 goto W_checksum;
1442 }
1443 if (utf8) {
1444 I32 l;
f7fe979e 1445 const char *hop;
08ca2aa3
TH
1446 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 if (hop >= strend) {
1448 if (hop > strend)
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 break;
fc241834 1451 }
a6ec74c1 1452 }
08ca2aa3
TH
1453 if (hop > strend)
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1455 len = hop - s;
1456 } else if (len > strend - s)
1457 len = strend - s;
1458
1459 if (datumtype == 'Z') {
1460 /* 'Z' strips stuff after first null */
f7fe979e 1461 const char *ptr, *end;
f337b084
TH
1462 end = s + len;
1463 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1464 sv = newSVpvn(s, ptr-s);
1465 if (howlen == e_star) /* exact for 'Z*' */
1466 len = ptr-s + (ptr != strend ? 1 : 0);
1467 } else if (datumtype == 'A') {
1468 /* 'A' strips both nulls and spaces */
f7fe979e 1469 const char *ptr;
18bdf90a
TH
1470 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1473 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1474 if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 else ptr++;
28be1210 1476 if (ptr > s+len)
18bdf90a
TH
1477 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1478 } else {
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 ptr++;
1482 }
08ca2aa3
TH
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1485
1486 if (utf8) {
1487 SvUTF8_on(sv);
1488 /* Undo any upgrade done due to need_utf8() */
f337b084 1489 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1490 sv_utf8_downgrade(sv, 0);
a6ec74c1 1491 }
6e449a3a 1492 mXPUSHs(sv);
08ca2aa3 1493 s += len;
a6ec74c1
JH
1494 break;
1495 case 'B':
08ca2aa3
TH
1496 case 'b': {
1497 char *str;
49704364 1498 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1499 len = (strend - s) * 8;
1500 if (checksum) {
f337b084 1501 if (utf8)
08ca2aa3 1502 while (len >= 8 && s < strend) {
f337b084 1503 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1504 len -= 8;
1505 }
f337b084 1506 else
fc241834 1507 while (len >= 8) {
08ca2aa3 1508 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1509 len -= 8;
1510 }
08ca2aa3
TH
1511 if (len && s < strend) {
1512 U8 bits;
f337b084
TH
1513 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 if (datumtype == 'b')
a6ec74c1 1515 while (len-- > 0) {
92d41999 1516 if (bits & 1) cuv++;
a6ec74c1
JH
1517 bits >>= 1;
1518 }
f337b084 1519 else
a6ec74c1 1520 while (len-- > 0) {
08ca2aa3 1521 if (bits & 0x80) cuv++;
a6ec74c1
JH
1522 bits <<= 1;
1523 }
fc241834 1524 }
a6ec74c1
JH
1525 break;
1526 }
08ca2aa3 1527
561b68a9 1528 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1529 SvPOK_on(sv);
1530 str = SvPVX(sv);
1531 if (datumtype == 'b') {
f337b084 1532 U8 bits = 0;
f7fe979e 1533 const I32 ai32 = len;
08ca2aa3
TH
1534 for (len = 0; len < ai32; len++) {
1535 if (len & 7) bits >>= 1;
1536 else if (utf8) {
1537 if (s >= strend) break;
f337b084 1538 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1539 } else bits = *(U8 *) s++;
1540 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1541 }
08ca2aa3 1542 } else {
f337b084 1543 U8 bits = 0;
f7fe979e 1544 const I32 ai32 = len;
08ca2aa3
TH
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits <<= 1;
1547 else if (utf8) {
1548 if (s >= strend) break;
f337b084 1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1552 }
1553 }
1554 *str = '\0';
aa07b2f6 1555 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1556 XPUSHs(sv);
a6ec74c1 1557 break;
08ca2aa3 1558 }
a6ec74c1 1559 case 'H':
08ca2aa3 1560 case 'h': {
3297d27d 1561 char *str = NULL;
fc241834 1562 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1563 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1564 len = (strend - s) * 2;
858fe5e1
TC
1565 if (!checksum) {
1566 sv = sv_2mortal(newSV(len ? len : 1));
1567 SvPOK_on(sv);
1568 str = SvPVX(sv);
1569 }
a6ec74c1 1570 if (datumtype == 'h') {
f337b084 1571 U8 bits = 0;
9e27e96a 1572 I32 ai32 = len;
fc241834
RGS
1573 for (len = 0; len < ai32; len++) {
1574 if (len & 1) bits >>= 4;
1575 else if (utf8) {
1576 if (s >= strend) break;
f337b084 1577 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1578 } else bits = * (U8 *) s++;
858fe5e1
TC
1579 if (!checksum)
1580 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1581 }
08ca2aa3 1582 } else {
f337b084 1583 U8 bits = 0;
f7fe979e 1584 const I32 ai32 = len;
08ca2aa3
TH
1585 for (len = 0; len < ai32; len++) {
1586 if (len & 1) bits <<= 4;
1587 else if (utf8) {
1588 if (s >= strend) break;
f337b084 1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1590 } else bits = *(U8 *) s++;
858fe5e1
TC
1591 if (!checksum)
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1593 }
1594 }
858fe5e1
TC
1595 if (!checksum) {
1596 *str = '\0';
1597 SvCUR_set(sv, str - SvPVX_const(sv));
1598 XPUSHs(sv);
1599 }
a6ec74c1 1600 break;
08ca2aa3 1601 }
1651fc44
ML
1602 case 'C':
1603 if (len == 0) {
1604 if (explicit_length)
1605 /* Switch to "character" mode */
1606 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1607 break;
1608 }
1609 /* FALL THROUGH */
a6ec74c1 1610 case 'c':
1651fc44
ML
1611 while (len-- > 0 && s < strend) {
1612 int aint;
1613 if (utf8)
1614 {
1615 STRLEN retlen;
1616 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1617 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1618 if (retlen == (STRLEN) -1 || retlen == 0)
1619 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1620 s += retlen;
1621 }
1622 else
1623 aint = *(U8 *)(s)++;
1624 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1625 aint -= 256;
08ca2aa3 1626 if (!checksum)
6e449a3a 1627 mPUSHi(aint);
73cb7263
NC
1628 else if (checksum > bits_in_uv)
1629 cdouble += (NV)aint;
1630 else
1631 cuv += aint;
a6ec74c1
JH
1632 }
1633 break;
08ca2aa3
TH
1634 case 'W':
1635 W_checksum:
1651fc44 1636 if (utf8) {
08ca2aa3 1637 while (len-- > 0 && s < strend) {
08ca2aa3 1638 STRLEN retlen;
f7fe979e 1639 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1640 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1641 if (retlen == (STRLEN) -1 || retlen == 0)
1642 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1643 s += retlen;
1644 if (!checksum)
6e449a3a 1645 mPUSHu(val);
08ca2aa3
TH
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV) val;
d6d3e8bd 1648 else
08ca2aa3 1649 cuv += val;
fc241834 1650 }
08ca2aa3 1651 } else if (!checksum)
a6ec74c1 1652 while (len-- > 0) {
f7fe979e 1653 const U8 ch = *(U8 *) s++;
6e449a3a 1654 mPUSHu(ch);
a6ec74c1 1655 }
08ca2aa3
TH
1656 else if (checksum > bits_in_uv)
1657 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1658 else
1659 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1660 break;
1661 case 'U':
35bcd338 1662 if (len == 0) {
c5333953 1663 if (explicit_length && howlen != e_star) {
08ca2aa3 1664 /* Switch to "bytes in UTF-8" mode */
f337b084 1665 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1666 else
1667 /* Should be impossible due to the need_utf8() test */
1668 Perl_croak(aTHX_ "U0 mode on a byte string");
1669 }
35bcd338
JH
1670 break;
1671 }
08ca2aa3 1672 if (len > strend - s) len = strend - s;
fc241834 1673 if (!checksum) {
08ca2aa3
TH
1674 if (len && unpack_only_one) len = 1;
1675 EXTEND(SP, len);
1676 EXTEND_MORTAL(len);
fc241834 1677 }
08ca2aa3
TH
1678 while (len-- > 0 && s < strend) {
1679 STRLEN retlen;
1680 UV auv;
1681 if (utf8) {
1682 U8 result[UTF8_MAXLEN];
f7fe979e 1683 const char *ptr = s;
08ca2aa3 1684 STRLEN len;
08ca2aa3
TH
1685 /* Bug: warns about bad utf8 even if we are short on bytes
1686 and will break out of the loop */
230e1fce
NC
1687 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1688 'U'))
08ca2aa3
TH
1689 break;
1690 len = UTF8SKIP(result);
fc241834 1691 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1692 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1693 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1694 s = ptr;
1695 } else {
1696 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1697 if (retlen == (STRLEN) -1 || retlen == 0)
1698 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1699 s += retlen;
1700 }
1701 if (!checksum)
6e449a3a 1702 mPUSHu(auv);
73cb7263 1703 else if (checksum > bits_in_uv)
08ca2aa3 1704 cdouble += (NV) auv;
73cb7263 1705 else
08ca2aa3 1706 cuv += auv;
a6ec74c1
JH
1707 }
1708 break;
49704364
WL
1709 case 's' | TYPE_IS_SHRIEKING:
1710#if SHORTSIZE != SIZE16
73cb7263 1711 while (len-- > 0) {
08ca2aa3 1712 short ashort;
f337b084
TH
1713 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1714 DO_BO_UNPACK(ashort, s);
08ca2aa3 1715 if (!checksum)
6e449a3a 1716 mPUSHi(ashort);
73cb7263
NC
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)ashort;
1719 else
1720 cuv += ashort;
49704364
WL
1721 }
1722 break;
1723#else
1724 /* Fallthrough! */
a6ec74c1 1725#endif
49704364 1726 case 's':
73cb7263 1727 while (len-- > 0) {
08ca2aa3
TH
1728 I16 ai16;
1729
1730#if U16SIZE > SIZE16
1731 ai16 = 0;
1732#endif
f337b084 1733 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1734 DO_BO_UNPACK(ai16, 16);
1109a392 1735#if U16SIZE > SIZE16
73cb7263
NC
1736 if (ai16 > 32767)
1737 ai16 -= 65536;
a6ec74c1 1738#endif
08ca2aa3 1739 if (!checksum)
6e449a3a 1740 mPUSHi(ai16);
73cb7263
NC
1741 else if (checksum > bits_in_uv)
1742 cdouble += (NV)ai16;
1743 else
1744 cuv += ai16;
a6ec74c1
JH
1745 }
1746 break;
49704364
WL
1747 case 'S' | TYPE_IS_SHRIEKING:
1748#if SHORTSIZE != SIZE16
73cb7263 1749 while (len-- > 0) {
08ca2aa3 1750 unsigned short aushort;
f337b084
TH
1751 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1752 DO_BO_UNPACK(aushort, s);
08ca2aa3 1753 if (!checksum)
6e449a3a 1754 mPUSHu(aushort);
73cb7263
NC
1755 else if (checksum > bits_in_uv)
1756 cdouble += (NV)aushort;
1757 else
1758 cuv += aushort;
49704364
WL
1759 }
1760 break;
1761#else
486ec47a 1762 /* Fallthrough! */
49704364 1763#endif
a6ec74c1
JH
1764 case 'v':
1765 case 'n':
1766 case 'S':
73cb7263 1767 while (len-- > 0) {
08ca2aa3
TH
1768 U16 au16;
1769#if U16SIZE > SIZE16
1770 au16 = 0;
1771#endif
f337b084 1772 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1773 DO_BO_UNPACK(au16, 16);
a6ec74c1 1774#ifdef HAS_NTOHS
73cb7263
NC
1775 if (datumtype == 'n')
1776 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1777#endif
1778#ifdef HAS_VTOHS
73cb7263
NC
1779 if (datumtype == 'v')
1780 au16 = vtohs(au16);
a6ec74c1 1781#endif
08ca2aa3 1782 if (!checksum)
6e449a3a 1783 mPUSHu(au16);
73cb7263 1784 else if (checksum > bits_in_uv)
f337b084 1785 cdouble += (NV) au16;
73cb7263
NC
1786 else
1787 cuv += au16;
a6ec74c1
JH
1788 }
1789 break;
7212898e 1790#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1791 case 'v' | TYPE_IS_SHRIEKING:
1792 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1793 while (len-- > 0) {
08ca2aa3
TH
1794 I16 ai16;
1795# if U16SIZE > SIZE16
1796 ai16 = 0;
1797# endif
f337b084 1798 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1799# ifdef HAS_NTOHS
73cb7263 1800 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1801 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1802# endif /* HAS_NTOHS */
1803# ifdef HAS_VTOHS
73cb7263 1804 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1805 ai16 = (I16) vtohs((U16) ai16);
1806# endif /* HAS_VTOHS */
1807 if (!checksum)
6e449a3a 1808 mPUSHi(ai16);
73cb7263 1809 else if (checksum > bits_in_uv)
08ca2aa3 1810 cdouble += (NV) ai16;
73cb7263
NC
1811 else
1812 cuv += ai16;
068bd2e7
MHM
1813 }
1814 break;
08ca2aa3 1815#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1816 case 'i':
49704364 1817 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1818 while (len-- > 0) {
08ca2aa3 1819 int aint;
f337b084
TH
1820 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1821 DO_BO_UNPACK(aint, i);
08ca2aa3 1822 if (!checksum)
6e449a3a 1823 mPUSHi(aint);
73cb7263
NC
1824 else if (checksum > bits_in_uv)
1825 cdouble += (NV)aint;
1826 else
1827 cuv += aint;
a6ec74c1
JH
1828 }
1829 break;
1830 case 'I':
49704364 1831 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1832 while (len-- > 0) {
08ca2aa3 1833 unsigned int auint;
f337b084
TH
1834 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1835 DO_BO_UNPACK(auint, i);
08ca2aa3 1836 if (!checksum)
6e449a3a 1837 mPUSHu(auint);
73cb7263
NC
1838 else if (checksum > bits_in_uv)
1839 cdouble += (NV)auint;
1840 else
1841 cuv += auint;
a6ec74c1
JH
1842 }
1843 break;
92d41999 1844 case 'j':
73cb7263 1845 while (len-- > 0) {
08ca2aa3 1846 IV aiv;
f337b084 1847 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1848#if IVSIZE == INTSIZE
f337b084 1849 DO_BO_UNPACK(aiv, i);
1109a392 1850#elif IVSIZE == LONGSIZE
f337b084 1851 DO_BO_UNPACK(aiv, l);
1109a392 1852#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1853 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1854#else
1855 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1856#endif
08ca2aa3 1857 if (!checksum)
6e449a3a 1858 mPUSHi(aiv);
73cb7263
NC
1859 else if (checksum > bits_in_uv)
1860 cdouble += (NV)aiv;
1861 else
1862 cuv += aiv;
92d41999
JH
1863 }
1864 break;
1865 case 'J':
73cb7263 1866 while (len-- > 0) {
08ca2aa3 1867 UV auv;
f337b084 1868 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1869#if IVSIZE == INTSIZE
f337b084 1870 DO_BO_UNPACK(auv, i);
08ca2aa3 1871#elif IVSIZE == LONGSIZE
f337b084 1872 DO_BO_UNPACK(auv, l);
08ca2aa3 1873#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1874 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1875#else
1876 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1877#endif
08ca2aa3 1878 if (!checksum)
6e449a3a 1879 mPUSHu(auv);
73cb7263
NC
1880 else if (checksum > bits_in_uv)
1881 cdouble += (NV)auv;
1882 else
1883 cuv += auv;
92d41999
JH
1884 }
1885 break;
49704364
WL
1886 case 'l' | TYPE_IS_SHRIEKING:
1887#if LONGSIZE != SIZE32
73cb7263 1888 while (len-- > 0) {
08ca2aa3 1889 long along;
f337b084
TH
1890 SHIFT_VAR(utf8, s, strend, along, datumtype);
1891 DO_BO_UNPACK(along, l);
08ca2aa3 1892 if (!checksum)
6e449a3a 1893 mPUSHi(along);
73cb7263
NC
1894 else if (checksum > bits_in_uv)
1895 cdouble += (NV)along;
1896 else
1897 cuv += along;
49704364
WL
1898 }
1899 break;
1900#else
1901 /* Fallthrough! */
a6ec74c1 1902#endif
49704364 1903 case 'l':
73cb7263 1904 while (len-- > 0) {
08ca2aa3
TH
1905 I32 ai32;
1906#if U32SIZE > SIZE32
1907 ai32 = 0;
1908#endif
f337b084 1909 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1910 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1911#if U32SIZE > SIZE32
08ca2aa3 1912 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1913#endif
08ca2aa3 1914 if (!checksum)
6e449a3a 1915 mPUSHi(ai32);
73cb7263
NC
1916 else if (checksum > bits_in_uv)
1917 cdouble += (NV)ai32;
1918 else
1919 cuv += ai32;
a6ec74c1
JH
1920 }
1921 break;
49704364
WL
1922 case 'L' | TYPE_IS_SHRIEKING:
1923#if LONGSIZE != SIZE32
73cb7263 1924 while (len-- > 0) {
08ca2aa3 1925 unsigned long aulong;
f337b084
TH
1926 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1927 DO_BO_UNPACK(aulong, l);
08ca2aa3 1928 if (!checksum)
6e449a3a 1929 mPUSHu(aulong);
73cb7263
NC
1930 else if (checksum > bits_in_uv)
1931 cdouble += (NV)aulong;
1932 else
1933 cuv += aulong;
49704364
WL
1934 }
1935 break;
1936#else
1937 /* Fall through! */
1938#endif
a6ec74c1
JH
1939 case 'V':
1940 case 'N':
1941 case 'L':
73cb7263 1942 while (len-- > 0) {
08ca2aa3
TH
1943 U32 au32;
1944#if U32SIZE > SIZE32
1945 au32 = 0;
1946#endif
f337b084 1947 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1948 DO_BO_UNPACK(au32, 32);
a6ec74c1 1949#ifdef HAS_NTOHL
73cb7263
NC
1950 if (datumtype == 'N')
1951 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1952#endif
1953#ifdef HAS_VTOHL
73cb7263
NC
1954 if (datumtype == 'V')
1955 au32 = vtohl(au32);
a6ec74c1 1956#endif
08ca2aa3 1957 if (!checksum)
6e449a3a 1958 mPUSHu(au32);
fc241834
RGS
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)au32;
1961 else
1962 cuv += au32;
a6ec74c1
JH
1963 }
1964 break;
7212898e 1965#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1966 case 'V' | TYPE_IS_SHRIEKING:
1967 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1968 while (len-- > 0) {
08ca2aa3
TH
1969 I32 ai32;
1970# if U32SIZE > SIZE32
1971 ai32 = 0;
1972# endif
f337b084 1973 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1974# ifdef HAS_NTOHL
73cb7263
NC
1975 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1976 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1977# endif
1978# ifdef HAS_VTOHL
73cb7263
NC
1979 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1980 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1981# endif
1982 if (!checksum)
6e449a3a 1983 mPUSHi(ai32);
73cb7263
NC
1984 else if (checksum > bits_in_uv)
1985 cdouble += (NV)ai32;
1986 else
1987 cuv += ai32;
068bd2e7
MHM
1988 }
1989 break;
08ca2aa3 1990#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1991 case 'p':
a6ec74c1 1992 while (len-- > 0) {
f7fe979e 1993 const char *aptr;
f337b084 1994 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1995 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1996 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1997 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1998 }
1999 break;
2000 case 'w':
a6ec74c1
JH
2001 {
2002 UV auv = 0;
2003 U32 bytes = 0;
fc241834 2004
08ca2aa3
TH
2005 while (len > 0 && s < strend) {
2006 U8 ch;
f337b084 2007 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2008 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2009 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2010 if (ch < 0x80) {
a6ec74c1 2011 bytes = 0;
6e449a3a 2012 mPUSHu(auv);
a6ec74c1
JH
2013 len--;
2014 auv = 0;
08ca2aa3 2015 continue;
a6ec74c1 2016 }
08ca2aa3 2017 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2018 const char *t;
a6ec74c1 2019
f5992bc4 2020 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2021 while (s < strend) {
f337b084 2022 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2023 sv = mul128(sv, (U8)(ch & 0x7f));
2024 if (!(ch & 0x80)) {
a6ec74c1
JH
2025 bytes = 0;
2026 break;
2027 }
2028 }
10516c54 2029 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2030 while (*t == '0')
2031 t++;
2032 sv_chop(sv, t);
6e449a3a 2033 mPUSHs(sv);
a6ec74c1
JH
2034 len--;
2035 auv = 0;
2036 }
2037 }
2038 if ((s >= strend) && bytes)
49704364 2039 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2040 }
2041 break;
2042 case 'P':
49704364
WL
2043 if (symptr->howlen == e_star)
2044 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2045 EXTEND(SP, 1);
2d3e0934 2046 if (s + sizeof(char*) <= strend) {
08ca2aa3 2047 char *aptr;
f337b084 2048 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2049 DO_BO_UNPACK_PC(aptr);
fc241834 2050 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2051 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2052 }
a6ec74c1
JH
2053 break;
2054#ifdef HAS_QUAD
2055 case 'q':
73cb7263 2056 while (len-- > 0) {
08ca2aa3 2057 Quad_t aquad;
f337b084
TH
2058 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2059 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2060 if (!checksum)
6e449a3a
MHM
2061 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2062 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2063 else if (checksum > bits_in_uv)
2064 cdouble += (NV)aquad;
2065 else
2066 cuv += aquad;
2067 }
a6ec74c1
JH
2068 break;
2069 case 'Q':
73cb7263 2070 while (len-- > 0) {
08ca2aa3 2071 Uquad_t auquad;
f337b084
TH
2072 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2073 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2074 if (!checksum)
6e449a3a
MHM
2075 mPUSHs(auquad <= UV_MAX ?
2076 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2077 else if (checksum > bits_in_uv)
2078 cdouble += (NV)auquad;
2079 else
2080 cuv += auquad;
a6ec74c1
JH
2081 }
2082 break;
08ca2aa3 2083#endif /* HAS_QUAD */
a6ec74c1
JH
2084 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2085 case 'f':
73cb7263 2086 while (len-- > 0) {
08ca2aa3 2087 float afloat;
f337b084 2088 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2089 DO_BO_UNPACK_N(afloat, float);
2090 if (!checksum)
6e449a3a 2091 mPUSHn(afloat);
08ca2aa3 2092 else
73cb7263 2093 cdouble += afloat;
fc241834 2094 }
a6ec74c1
JH
2095 break;
2096 case 'd':
73cb7263 2097 while (len-- > 0) {
08ca2aa3 2098 double adouble;
f337b084 2099 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2100 DO_BO_UNPACK_N(adouble, double);
2101 if (!checksum)
6e449a3a 2102 mPUSHn(adouble);
08ca2aa3 2103 else
73cb7263 2104 cdouble += adouble;
fc241834 2105 }
a6ec74c1 2106 break;
92d41999 2107 case 'F':
73cb7263 2108 while (len-- > 0) {
275663fa
TC
2109 NV_bytes anv;
2110 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2111 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 2112 if (!checksum)
275663fa 2113 mPUSHn(anv.nv);
08ca2aa3 2114 else
275663fa 2115 cdouble += anv.nv;
fc241834 2116 }
92d41999
JH
2117 break;
2118#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2119 case 'D':
73cb7263 2120 while (len-- > 0) {
275663fa
TC
2121 ld_bytes aldouble;
2122 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2123 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 2124 if (!checksum)
275663fa 2125 mPUSHn(aldouble.ld);
08ca2aa3 2126 else
275663fa 2127 cdouble += aldouble.ld;
92d41999
JH
2128 }
2129 break;
2130#endif
a6ec74c1 2131 case 'u':
858fe5e1 2132 if (!checksum) {
f7fe979e 2133 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2134 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2135 if (l) SvPOK_on(sv);
2136 }
2137 if (utf8) {
2138 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2139 I32 a, b, c, d;
db187877 2140 char hunk[3];
08ca2aa3 2141
08ca2aa3
TH
2142 while (len > 0) {
2143 next_uni_uu(aTHX_ &s, strend, &a);
2144 next_uni_uu(aTHX_ &s, strend, &b);
2145 next_uni_uu(aTHX_ &s, strend, &c);
2146 next_uni_uu(aTHX_ &s, strend, &d);
2147 hunk[0] = (char)((a << 2) | (b >> 4));
2148 hunk[1] = (char)((b << 4) | (c >> 2));
2149 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2150 if (!checksum)
2151 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
2152 len -= 3;
2153 }
2154 if (s < strend) {
f7fe979e
AL
2155 if (*s == '\n') {
2156 s++;
2157 }
08ca2aa3
TH
2158 else {
2159 /* possible checksum byte */
f7fe979e
AL
2160 const char *skip = s+UTF8SKIP(s);
2161 if (skip < strend && *skip == '\n')
2162 s = skip+1;
08ca2aa3
TH
2163 }
2164 }
2165 }
2166 } else {
fc241834
RGS
2167 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2168 I32 a, b, c, d;
db187877 2169 char hunk[3];
a6ec74c1 2170
fc241834
RGS
2171 len = PL_uudmap[*(U8*)s++] & 077;
2172 while (len > 0) {
2173 if (s < strend && ISUUCHAR(*s))
2174 a = PL_uudmap[*(U8*)s++] & 077;
2175 else
2176 a = 0;
2177 if (s < strend && ISUUCHAR(*s))
2178 b = PL_uudmap[*(U8*)s++] & 077;
2179 else
2180 b = 0;
2181 if (s < strend && ISUUCHAR(*s))
2182 c = PL_uudmap[*(U8*)s++] & 077;
2183 else
2184 c = 0;
2185 if (s < strend && ISUUCHAR(*s))
2186 d = PL_uudmap[*(U8*)s++] & 077;
2187 else
2188 d = 0;
2189 hunk[0] = (char)((a << 2) | (b >> 4));
2190 hunk[1] = (char)((b << 4) | (c >> 2));
2191 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2192 if (!checksum)
2193 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
2194 len -= 3;
2195 }
2196 if (*s == '\n')
2197 s++;
2198 else /* possible checksum byte */
2199 if (s + 1 < strend && s[1] == '\n')
2200 s += 2;
a6ec74c1 2201 }
08ca2aa3 2202 }
858fe5e1
TC
2203 if (!checksum)
2204 XPUSHs(sv);
a6ec74c1
JH
2205 break;
2206 }
49704364 2207
a6ec74c1 2208 if (checksum) {
1109a392 2209 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2210 (checksum > bits_in_uv &&
08ca2aa3
TH
2211 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2212 NV trouble, anv;
a6ec74c1 2213
08ca2aa3 2214 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2215 while (checksum >= 16) {
2216 checksum -= 16;
08ca2aa3 2217 anv *= 65536.0;
a6ec74c1 2218 }
a6ec74c1 2219 while (cdouble < 0.0)
08ca2aa3
TH
2220 cdouble += anv;
2221 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2222 sv = newSVnv(cdouble);
a6ec74c1
JH
2223 }
2224 else {
fa8ec7c1
NC
2225 if (checksum < bits_in_uv) {
2226 UV mask = ((UV)1 << checksum) - 1;
92d41999 2227 cuv &= mask;
a6ec74c1 2228 }
c4c5f44a 2229 sv = newSVuv(cuv);
a6ec74c1 2230 }
6e449a3a 2231 mXPUSHs(sv);
a6ec74c1
JH
2232 checksum = 0;
2233 }
fc241834 2234
49704364
WL
2235 if (symptr->flags & FLAG_SLASH){
2236 if (SP - PL_stack_base - start_sp_offset <= 0)
2237 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2238 if( next_symbol(symptr) ){
2239 if( symptr->howlen == e_number )
2240 Perl_croak(aTHX_ "Count after length/code in unpack" );
2241 if( beyond ){
2242 /* ...end of char buffer then no decent length available */
2243 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2244 } else {
2245 /* take top of stack (hope it's numeric) */
2246 len = POPi;
2247 if( len < 0 )
2248 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2249 }
2250 } else {
2251 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2252 }
2253 datumtype = symptr->code;
21c16052 2254 explicit_length = FALSE;
49704364
WL
2255 goto redo_switch;
2256 }
a6ec74c1 2257 }
49704364 2258
18529408
IZ
2259 if (new_s)
2260 *new_s = s;
2261 PUTBACK;
2262 return SP - PL_stack_base - start_sp_offset;
2263}
2264
2265PP(pp_unpack)
2266{
97aff369 2267 dVAR;
18529408 2268 dSP;
bab9c0ac 2269 dPOPPOPssrl;
18529408
IZ
2270 I32 gimme = GIMME_V;
2271 STRLEN llen;
2272 STRLEN rlen;
5c144d81
NC
2273 const char *pat = SvPV_const(left, llen);
2274 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2275 const char *strend = s + rlen;
2276 const char *patend = pat + llen;
08ca2aa3 2277 I32 cnt;
18529408
IZ
2278
2279 PUTBACK;
7accc089 2280 cnt = unpackstring(pat, patend, s, strend,
49704364 2281 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2282 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2283
18529408
IZ
2284 SPAGAIN;
2285 if ( !cnt && gimme == G_SCALAR )
2286 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2287 RETURN;
2288}
2289
f337b084 2290STATIC U8 *
f7fe979e 2291doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2292{
f337b084 2293 *h++ = PL_uuemap[len];
a6ec74c1 2294 while (len > 2) {
f337b084
TH
2295 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2297 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2298 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2299 s += 3;
2300 len -= 3;
2301 }
2302 if (len > 0) {
f7fe979e 2303 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2304 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2305 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2306 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2307 *h++ = PL_uuemap[0];
a6ec74c1 2308 }
f337b084
TH
2309 *h++ = '\n';
2310 return h;
a6ec74c1
JH
2311}
2312
2313STATIC SV *
f7fe979e 2314S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2315{
8b6e33c7
AL
2316 SV *result = newSVpvn(s, l);
2317 char *const result_c = SvPV_nolen(result); /* convenience */
2318 char *out = result_c;
2319 bool skip = 1;
2320 bool ignore = 0;
a6ec74c1 2321
7918f24d
NC
2322 PERL_ARGS_ASSERT_IS_AN_INT;
2323
a6ec74c1
JH
2324 while (*s) {
2325 switch (*s) {
2326 case ' ':
2327 break;
2328 case '+':
2329 if (!skip) {
2330 SvREFCNT_dec(result);
2331 return (NULL);
2332 }
2333 break;
2334 case '0':
2335 case '1':
2336 case '2':
2337 case '3':
2338 case '4':
2339 case '5':
2340 case '6':
2341 case '7':
2342 case '8':
2343 case '9':
2344 skip = 0;
2345 if (!ignore) {
2346 *(out++) = *s;
2347 }
2348 break;
2349 case '.':
2350 ignore = 1;
2351 break;
2352 default:
2353 SvREFCNT_dec(result);
2354 return (NULL);
2355 }
2356 s++;
2357 }
2358 *(out++) = '\0';
2359 SvCUR_set(result, out - result_c);
2360 return (result);
2361}
2362
2363/* pnum must be '\0' terminated */
2364STATIC int
2365S_div128(pTHX_ SV *pnum, bool *done)
2366{
8b6e33c7
AL
2367 STRLEN len;
2368 char * const s = SvPV(pnum, len);
2369 char *t = s;
2370 int m = 0;
2371
7918f24d
NC
2372 PERL_ARGS_ASSERT_DIV128;
2373
8b6e33c7
AL
2374 *done = 1;
2375 while (*t) {
2376 const int i = m * 10 + (*t - '0');
2377 const int r = (i >> 7); /* r < 10 */
2378 m = i & 0x7F;
2379 if (r) {
2380 *done = 0;
2381 }
2382 *(t++) = '0' + r;
a6ec74c1 2383 }
8b6e33c7
AL
2384 *(t++) = '\0';
2385 SvCUR_set(pnum, (STRLEN) (t - s));
2386 return (m);
a6ec74c1
JH
2387}
2388
18529408 2389/*
7accc089
JH
2390=for apidoc packlist
2391
2392The engine implementing pack() Perl function.
2393
bfce84ec
AL
2394=cut
2395*/
7accc089
JH
2396
2397void
f7fe979e 2398Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2399{
97aff369 2400 dVAR;
aadb217d
JH
2401 tempsym_t sym;
2402
7918f24d
NC
2403 PERL_ARGS_ASSERT_PACKLIST;
2404
f7fe979e 2405 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2406
f337b084
TH
2407 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2408 Also make sure any UTF8 flag is loaded */
56eb0262 2409 SvPV_force_nolen(cat);
bfce84ec
AL
2410 if (DO_UTF8(cat))
2411 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2412
49704364
WL
2413 (void)pack_rec( cat, &sym, beglist, endlist );
2414}
2415
f337b084
TH
2416/* like sv_utf8_upgrade, but also repoint the group start markers */
2417STATIC void
2418marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2419 STRLEN len;
2420 tempsym_t *group;
f7fe979e
AL
2421 const char *from_ptr, *from_start, *from_end, **marks, **m;
2422 char *to_start, *to_ptr;
f337b084
TH
2423
2424 if (SvUTF8(sv)) return;
2425
aa07b2f6 2426 from_start = SvPVX_const(sv);
f337b084
TH
2427 from_end = from_start + SvCUR(sv);
2428 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2429 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2430 if (from_ptr == from_end) {
2431 /* Simple case: no character needs to be changed */
2432 SvUTF8_on(sv);
2433 return;
2434 }
2435
3473cf63 2436 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2437 Newx(to_start, len, char);
f337b084
TH
2438 Copy(from_start, to_start, from_ptr-from_start, char);
2439 to_ptr = to_start + (from_ptr-from_start);
2440
a02a5408 2441 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2442 for (group=sym_ptr; group; group = group->previous)
2443 marks[group->level] = from_start + group->strbeg;
2444 marks[sym_ptr->level+1] = from_end+1;
2445 for (m = marks; *m < from_ptr; m++)
2446 *m = to_start + (*m-from_start);
2447
2448 for (;from_ptr < from_end; from_ptr++) {
2449 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2450 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2451 }
2452 *to_ptr = 0;
2453
2454 while (*m == from_ptr) *m++ = to_ptr;
2455 if (m != marks + sym_ptr->level+1) {
2456 Safefree(marks);
2457 Safefree(to_start);
5f5991a0 2458 Perl_croak(aTHX_ "panic: marks beyond string end");
f337b084
TH
2459 }
2460 for (group=sym_ptr; group; group = group->previous)
2461 group->strbeg = marks[group->level] - to_start;
2462 Safefree(marks);
2463
2464 if (SvOOK(sv)) {
2465 if (SvIVX(sv)) {
b162af07 2466 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2467 from_start -= SvIVX(sv);
2468 SvIV_set(sv, 0);
2469 }
2470 SvFLAGS(sv) &= ~SVf_OOK;
2471 }
2472 if (SvLEN(sv) != 0)
2473 Safefree(from_start);
f880fe2f 2474 SvPV_set(sv, to_start);
b162af07
SP
2475 SvCUR_set(sv, to_ptr - to_start);
2476 SvLEN_set(sv, len);
f337b084
TH
2477 SvUTF8_on(sv);
2478}
2479
2480/* Exponential string grower. Makes string extension effectively O(n)
2481 needed says how many extra bytes we need (not counting the final '\0')
2482 Only grows the string if there is an actual lack of space
2483*/
2484STATIC char *
0bd48802 2485S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2486 const STRLEN cur = SvCUR(sv);
2487 const STRLEN len = SvLEN(sv);
f337b084 2488 STRLEN extend;
7918f24d
NC
2489
2490 PERL_ARGS_ASSERT_SV_EXP_GROW;
2491
f337b084
TH
2492 if (len - cur > needed) return SvPVX(sv);
2493 extend = needed > len ? needed : len;
2494 return SvGROW(sv, len+extend+1);
2495}
49704364
WL
2496
2497STATIC
2498SV **
f337b084 2499S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2500{
97aff369 2501 dVAR;
49704364 2502 tempsym_t lookahead;
f337b084
TH
2503 I32 items = endlist - beglist;
2504 bool found = next_symbol(symptr);
2505 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2506 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2507
7918f24d
NC
2508 PERL_ARGS_ASSERT_PACK_REC;
2509
f337b084
TH
2510 if (symptr->level == 0 && found && symptr->code == 'U') {
2511 marked_upgrade(aTHX_ cat, symptr);
2512 symptr->flags |= FLAG_DO_UTF8;
2513 utf8 = 0;
49704364 2514 }
f337b084 2515 symptr->strbeg = SvCUR(cat);
49704364
WL
2516
2517 while (found) {
f337b084
TH
2518 SV *fromstr;
2519 STRLEN fromlen;
2520 I32 len;
a0714e2c 2521 SV *lengthcode = NULL;
49704364 2522 I32 datumtype = symptr->code;
f337b084
TH
2523 howlen_t howlen = symptr->howlen;
2524 char *start = SvPVX(cat);
2525 char *cur = start + SvCUR(cat);
49704364 2526
f337b084
TH
2527#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2528
2529 switch (howlen) {
fc241834 2530 case e_star:
f337b084
TH
2531 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2532 0 : items;
2533 break;
2534 default:
2535 /* e_no_len and e_number */
2536 len = symptr->length;
49704364
WL
2537 break;
2538 }
2539
f337b084 2540 if (len) {
a7a3cfaa 2541 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2542
a7a3cfaa
TH
2543 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2544 /* We can process this letter. */
2545 STRLEN size = props & PACK_SIZE_MASK;
2546 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2547 }
f337b084
TH
2548 }
2549
49704364
WL
2550 /* Look ahead for next symbol. Do we have code/code? */
2551 lookahead = *symptr;
2552 found = next_symbol(&lookahead);
246f24af
TH
2553 if (symptr->flags & FLAG_SLASH) {
2554 IV count;
f337b084 2555 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2556 if (strchr("aAZ", lookahead.code)) {
2557 if (lookahead.howlen == e_number) count = lookahead.length;
2558 else {
ce399ba6
NC
2559 if (items > 0) {
2560 if (SvGAMAGIC(*beglist)) {
2561 /* Avoid reading the active data more than once
2562 by copying it to a temporary. */
2563 STRLEN len;
2564 const char *const pv = SvPV_const(*beglist, len);
740cce10 2565 SV *const temp
59cd0e26
NC
2566 = newSVpvn_flags(pv, len,
2567 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2568 *beglist = temp;
2569 }
246f24af
TH
2570 count = DO_UTF8(*beglist) ?
2571 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2572 }
246f24af
TH
2573 else count = 0;
2574 if (lookahead.code == 'Z') count++;
2575 }
2576 } else {
2577 if (lookahead.howlen == e_number && lookahead.length < items)
2578 count = lookahead.length;
2579 else count = items;
2580 }
2581 lookahead.howlen = e_number;
2582 lookahead.length = count;
2583 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2584 }
49704364 2585
fc241834
RGS
2586 /* Code inside the switch must take care to properly update
2587 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2588 doesn't simply leave using break */
1109a392 2589 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2590 default:
f337b084
TH
2591 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2592 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2593 case '%':
49704364 2594 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2595 {
2596 char *from;
2597#ifdef PERL_PACK_CAN_SHRIEKSIGN
2598 case '.' | TYPE_IS_SHRIEKING:
2599#endif
2600 case '.':
2601 if (howlen == e_star) from = start;
2602 else if (len == 0) from = cur;
2603 else {
2604 tempsym_t *group = symptr;
2605
2606 while (--len && group) group = group->previous;
2607 from = group ? start + group->strbeg : start;
2608 }
2609 fromstr = NEXTFROM;
2610 len = SvIV(fromstr);
2611 goto resize;
2612#ifdef PERL_PACK_CAN_SHRIEKSIGN
2613 case '@' | TYPE_IS_SHRIEKING:
2614#endif
a6ec74c1 2615 case '@':
28be1210
TH
2616 from = start + symptr->strbeg;
2617 resize:
2618#ifdef PERL_PACK_CAN_SHRIEKSIGN
2619 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2620#else /* PERL_PACK_CAN_SHRIEKSIGN */
2621 if (utf8)
2622#endif
2623 if (len >= 0) {
2624 while (len && from < cur) {
2625 from += UTF8SKIP(from);
2626 len--;
2627 }
2628 if (from > cur)
2629 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2630 if (len) {
2631 /* Here we know from == cur */
2632 grow:
2633 GROWING(0, cat, start, cur, len);
2634 Zero(cur, len, char);
2635 cur += len;
2636 } else if (from < cur) {
2637 len = cur - from;
2638 goto shrink;
2639 } else goto no_change;
2640 } else {
2641 cur = from;
2642 len = -len;
2643 goto utf8_shrink;
f337b084 2644 }
28be1210
TH
2645 else {
2646 len -= cur - from;
f337b084 2647 if (len > 0) goto grow;
28be1210 2648 if (len == 0) goto no_change;
fc241834 2649 len = -len;
28be1210 2650 goto shrink;
f337b084 2651 }
a6ec74c1 2652 break;
28be1210 2653 }
fc241834 2654 case '(': {
49704364 2655 tempsym_t savsym = *symptr;
66c611c5
MHM
2656 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2657 symptr->flags |= group_modifiers;
49704364
WL
2658 symptr->patend = savsym.grpend;
2659 symptr->level++;
f337b084 2660 symptr->previous = &lookahead;
18529408 2661 while (len--) {
f337b084
TH
2662 U32 was_utf8;
2663 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2664 else symptr->flags &= ~FLAG_PARSE_UTF8;
2665 was_utf8 = SvUTF8(cat);
49704364 2666 symptr->patptr = savsym.grpbeg;
f337b084
TH
2667 beglist = pack_rec(cat, symptr, beglist, endlist);
2668 if (SvUTF8(cat) != was_utf8)
2669 /* This had better be an upgrade while in utf8==0 mode */
2670 utf8 = 1;
2671
49704364 2672 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2673 break; /* No way to continue */
2674 }
ee790063 2675 items = endlist - beglist;
f337b084
TH
2676 lookahead.flags = symptr->flags & ~group_modifiers;
2677 goto no_change;
18529408 2678 }
62f95557
IZ
2679 case 'X' | TYPE_IS_SHRIEKING:
2680 if (!len) /* Avoid division by 0 */
2681 len = 1;
f337b084
TH
2682 if (utf8) {
2683 char *hop, *last;
2684 I32 l = len;
2685 hop = last = start;
2686 while (hop < cur) {
2687 hop += UTF8SKIP(hop);
2688 if (--l == 0) {
2689 last = hop;
2690 l = len;
2691 }
2692 }
2693 if (last > cur)
2694 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2695 cur = last;
2696 break;
2697 }
2698 len = (cur-start) % len;
62f95557 2699 /* FALL THROUGH */
a6ec74c1 2700 case 'X':
f337b084
TH
2701 if (utf8) {
2702 if (len < 1) goto no_change;
28be1210 2703 utf8_shrink:
f337b084
TH
2704 while (len > 0) {
2705 if (cur <= start)
28be1210
TH
2706 Perl_croak(aTHX_ "'%c' outside of string in pack",
2707 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2708 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2709 if (cur <= start)
28be1210
TH
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2712 }
2713 len--;
2714 }
2715 } else {
fc241834 2716 shrink:
f337b084 2717 if (cur - start < len)
28be1210
TH
2718 Perl_croak(aTHX_ "'%c' outside of string in pack",
2719 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2720 cur -= len;
2721 }
2722 if (cur < start+symptr->strbeg) {
2723 /* Make sure group starts don't point into the void */
2724 tempsym_t *group;
9e27e96a 2725 const STRLEN length = cur-start;
f337b084
TH
2726 for (group = symptr;
2727 group && length < group->strbeg;
2728 group = group->previous) group->strbeg = length;
2729 lookahead.strbeg = length;
2730 }
a6ec74c1 2731 break;
fc241834
RGS
2732 case 'x' | TYPE_IS_SHRIEKING: {
2733 I32 ai32;
62f95557
IZ
2734 if (!len) /* Avoid division by 0 */
2735 len = 1;
230e1fce 2736 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2737 else ai32 = (cur - start) % len;
2738 if (ai32 == 0) goto no_change;
2739 len -= ai32;
2740 }
2741 /* FALL THROUGH */
a6ec74c1 2742 case 'x':
f337b084 2743 goto grow;
a6ec74c1
JH
2744 case 'A':
2745 case 'Z':
f337b084 2746 case 'a': {
f7fe979e 2747 const char *aptr;
f337b084 2748
a6ec74c1 2749 fromstr = NEXTFROM;
e62f0680 2750 aptr = SvPV_const(fromstr, fromlen);
f337b084 2751 if (DO_UTF8(fromstr)) {
f7fe979e 2752 const char *end, *s;
f337b084
TH
2753
2754 if (!utf8 && !SvUTF8(cat)) {
2755 marked_upgrade(aTHX_ cat, symptr);
2756 lookahead.flags |= FLAG_DO_UTF8;
2757 lookahead.strbeg = symptr->strbeg;
2758 utf8 = 1;
2759 start = SvPVX(cat);
2760 cur = start + SvCUR(cat);
2761 }
fc241834 2762 if (howlen == e_star) {
f337b084
TH
2763 if (utf8) goto string_copy;
2764 len = fromlen+1;
2765 }
2766 s = aptr;
2767 end = aptr + fromlen;
2768 fromlen = datumtype == 'Z' ? len-1 : len;
2769 while ((I32) fromlen > 0 && s < end) {
2770 s += UTF8SKIP(s);
2771 fromlen--;
2772 }
2773 if (s > end)
2774 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2775 if (utf8) {
fc241834 2776 len = fromlen;
f337b084
TH
2777 if (datumtype == 'Z') len++;
2778 fromlen = s-aptr;
2779 len += fromlen;
fc241834 2780
f337b084 2781 goto string_copy;
fc241834 2782 }
f337b084
TH
2783 fromlen = len - fromlen;
2784 if (datumtype == 'Z') fromlen--;
2785 if (howlen == e_star) {
2786 len = fromlen;
2787 if (datumtype == 'Z') len++;
fc241834 2788 }
f337b084 2789 GROWING(0, cat, start, cur, len);
fc241834 2790 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2791 datumtype | TYPE_IS_PACK))
5f5991a0 2792 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
f337b084 2793 cur += fromlen;
a6ec74c1 2794 len -= fromlen;
f337b084
TH
2795 } else if (utf8) {
2796 if (howlen == e_star) {
2797 len = fromlen;
2798 if (datumtype == 'Z') len++;
a6ec74c1 2799 }
f337b084
TH
2800 if (len <= (I32) fromlen) {
2801 fromlen = len;
2802 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2803 }
fc241834 2804 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2805 upgrade, so:
2806 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2807 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2808 len -= fromlen;
2809 while (fromlen > 0) {
230e1fce 2810 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2811 aptr++;
2812 fromlen--;
fc241834 2813 }
f337b084
TH
2814 } else {
2815 string_copy:
2816 if (howlen == e_star) {
2817 len = fromlen;
2818 if (datumtype == 'Z') len++;
2819 }
2820 if (len <= (I32) fromlen) {
2821 fromlen = len;
2822 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2823 }
f337b084
TH
2824 GROWING(0, cat, start, cur, len);
2825 Copy(aptr, cur, fromlen, char);
2826 cur += fromlen;
2827 len -= fromlen;
a6ec74c1 2828 }
f337b084
TH
2829 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2830 cur += len;
3c4fb04a 2831 SvTAINT(cat);
a6ec74c1 2832 break;
f337b084 2833 }
a6ec74c1 2834 case 'B':
f337b084 2835 case 'b': {
b83604b4 2836 const char *str, *end;
f337b084
TH
2837 I32 l, field_len;
2838 U8 bits;
2839 bool utf8_source;
2840 U32 utf8_flags;
a6ec74c1 2841
fc241834 2842 fromstr = NEXTFROM;
b83604b4 2843 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2844 end = str + fromlen;
2845 if (DO_UTF8(fromstr)) {
2846 utf8_source = TRUE;
041457d9 2847 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2848 } else {
2849 utf8_source = FALSE;
2850 utf8_flags = 0; /* Unused, but keep compilers happy */
2851 }
2852 if (howlen == e_star) len = fromlen;
2853 field_len = (len+7)/8;
2854 GROWING(utf8, cat, start, cur, field_len);
2855 if (len > (I32)fromlen) len = fromlen;
2856 bits = 0;
2857 l = 0;
2858 if (datumtype == 'B')
2859 while (l++ < len) {
2860 if (utf8_source) {
95b63a38 2861 UV val = 0;
f337b084
TH
2862 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2863 bits |= val & 1;
2864 } else bits |= *str++ & 1;
2865 if (l & 7) bits <<= 1;
fc241834 2866 else {
f337b084
TH
2867 PUSH_BYTE(utf8, cur, bits);
2868 bits = 0;
a6ec74c1
JH
2869 }
2870 }
f337b084
TH
2871 else
2872 /* datumtype == 'b' */
2873 while (l++ < len) {
2874 if (utf8_source) {
95b63a38 2875 UV val = 0;
f337b084
TH
2876 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2877 if (val & 1) bits |= 0x80;
2878 } else if (*str++ & 1)
2879 bits |= 0x80;
2880 if (l & 7) bits >>= 1;
fc241834 2881 else {
f337b084
TH
2882 PUSH_BYTE(utf8, cur, bits);
2883 bits = 0;
a6ec74c1
JH
2884 }
2885 }
f337b084
TH
2886 l--;
2887 if (l & 7) {
fc241834 2888 if (datumtype == 'B')
f337b084 2889 bits <<= 7 - (l & 7);
fc241834 2890 else
f337b084
TH
2891 bits >>= 7 - (l & 7);
2892 PUSH_BYTE(utf8, cur, bits);
2893 l += 7;
a6ec74c1 2894 }
f337b084
TH
2895 /* Determine how many chars are left in the requested field */
2896 l /= 8;
2897 if (howlen == e_star) field_len = 0;
2898 else field_len -= l;
2899 Zero(cur, field_len, char);
2900 cur += field_len;
a6ec74c1 2901 break;
f337b084 2902 }
a6ec74c1 2903 case 'H':
f337b084 2904 case 'h': {
10516c54 2905 const char *str, *end;
f337b084
TH
2906 I32 l, field_len;
2907 U8 bits;
2908 bool utf8_source;
2909 U32 utf8_flags;
a6ec74c1 2910
fc241834 2911 fromstr = NEXTFROM;
10516c54 2912 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2913 end = str + fromlen;
2914 if (DO_UTF8(fromstr)) {
2915 utf8_source = TRUE;
041457d9 2916 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2917 } else {
2918 utf8_source = FALSE;
2919 utf8_flags = 0; /* Unused, but keep compilers happy */
2920 }
2921 if (howlen == e_star) len = fromlen;
2922 field_len = (len+1)/2;
2923 GROWING(utf8, cat, start, cur, field_len);
2924 if (!utf8 && len > (I32)fromlen) len = fromlen;
2925 bits = 0;
2926 l = 0;
2927 if (datumtype == 'H')
2928 while (l++ < len) {
2929 if (utf8_source) {
95b63a38 2930 UV val = 0;
f337b084
TH
2931 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2932 if (val < 256 && isALPHA(val))
2933 bits |= (val + 9) & 0xf;
a6ec74c1 2934 else
f337b084
TH
2935 bits |= val & 0xf;
2936 } else if (isALPHA(*str))
2937 bits |= (*str++ + 9) & 0xf;
2938 else
2939 bits |= *str++ & 0xf;
2940 if (l & 1) bits <<= 4;
fc241834 2941 else {
f337b084
TH
2942 PUSH_BYTE(utf8, cur, bits);
2943 bits = 0;
a6ec74c1
JH
2944 }
2945 }
f337b084
TH
2946 else
2947 while (l++ < len) {
2948 if (utf8_source) {
95b63a38 2949 UV val = 0;
f337b084
TH
2950 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2951 if (val < 256 && isALPHA(val))
2952 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2953 else
f337b084
TH
2954 bits |= (val & 0xf) << 4;
2955 } else if (isALPHA(*str))
2956 bits |= ((*str++ + 9) & 0xf) << 4;
2957 else
2958 bits |= (*str++ & 0xf) << 4;
2959 if (l & 1) bits >>= 4;
fc241834 2960 else {
f337b084
TH
2961 PUSH_BYTE(utf8, cur, bits);
2962 bits = 0;
a6ec74c1 2963 }
fc241834 2964 }
f337b084
TH
2965 l--;
2966 if (l & 1) {
2967 PUSH_BYTE(utf8, cur, bits);
2968 l++;
2969 }
2970 /* Determine how many chars are left in the requested field */
2971 l /= 2;
2972 if (howlen == e_star) field_len = 0;
2973 else field_len -= l;
2974 Zero(cur, field_len, char);
2975 cur += field_len;
2976 break;
fc241834
RGS
2977 }
2978 case 'c':
f337b084
TH
2979 while (len-- > 0) {
2980 IV aiv;
2981 fromstr = NEXTFROM;
2982 aiv = SvIV(fromstr);
a2a5de95
NC
2983 if ((-128 > aiv || aiv > 127))
2984 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2985 "Character in 'c' format wrapped in pack");
585ec06d 2986 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2987 }
2988 break;
2989 case 'C':
f337b084
TH
2990 if (len == 0) {
2991 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2992 break;
2993 }
a6ec74c1 2994 while (len-- > 0) {
f337b084 2995 IV aiv;
a6ec74c1 2996 fromstr = NEXTFROM;
f337b084 2997 aiv = SvIV(fromstr);
a2a5de95
NC
2998 if ((0 > aiv || aiv > 0xff))
2999 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3000 "Character in 'C' format wrapped in pack");
1651fc44 3001 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 3002 }
fc241834
RGS
3003 break;
3004 case 'W': {
3005 char *end;
670f1322 3006 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
3007
3008 end = start+SvLEN(cat)-1;
3009 if (utf8) end -= UTF8_MAXLEN-1;
3010 while (len-- > 0) {
3011 UV auv;
3012 fromstr = NEXTFROM;
3013 auv = SvUV(fromstr);
3014 if (in_bytes) auv = auv % 0x100;
3015 if (utf8) {
3016 W_utf8:
3017 if (cur > end) {
3018 *cur = '\0';
b162af07 3019 SvCUR_set(cat, cur - start);
fc241834
RGS
3020
3021 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3022 end = start+SvLEN(cat)-UTF8_MAXLEN;
3023 }
230e1fce
NC
3024 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3025 NATIVE_TO_UNI(auv),
041457d9 3026 warn_utf8 ?
230e1fce 3027 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3028 } else {
3029 if (auv >= 0x100) {
3030 if (!SvUTF8(cat)) {
3031 *cur = '\0';
b162af07 3032 SvCUR_set(cat, cur - start);
fc241834
RGS
3033 marked_upgrade(aTHX_ cat, symptr);
3034 lookahead.flags |= FLAG_DO_UTF8;
3035 lookahead.strbeg = symptr->strbeg;
3036 utf8 = 1;
3037 start = SvPVX(cat);
3038 cur = start + SvCUR(cat);
3039 end = start+SvLEN(cat)-UTF8_MAXLEN;
3040 goto W_utf8;
3041 }
a2a5de95
NC
3042 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3043 "Character in 'W' format wrapped in pack");
fc241834
RGS
3044 auv &= 0xff;
3045 }
3046 if (cur >= end) {
3047 *cur = '\0';
b162af07 3048 SvCUR_set(cat, cur - start);
fc241834
RGS
3049 GROWING(0, cat, start, cur, len+1);
3050 end = start+SvLEN(cat)-1;
3051 }
fe2774ed 3052 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3053 }
3054 }
3055 break;
fc241834
RGS
3056 }
3057 case 'U': {
3058 char *end;
3059
3060 if (len == 0) {
3061 if (!(symptr->flags & FLAG_DO_UTF8)) {
3062 marked_upgrade(aTHX_ cat, symptr);
3063 lookahead.flags |= FLAG_DO_UTF8;
3064 lookahead.strbeg = symptr->strbeg;
3065 }
3066 utf8 = 0;
3067 goto no_change;
3068 }
3069
3070 end = start+SvLEN(cat);
3071 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3072 while (len-- > 0) {
fc241834 3073 UV auv;
a6ec74c1 3074 fromstr = NEXTFROM;
fc241834
RGS
3075 auv = SvUV(fromstr);
3076 if (utf8) {
230e1fce 3077 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3078 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3079 warn_utf8 ?
fc241834
RGS
3080 0 : UNICODE_ALLOW_ANY);
3081 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3082 *cur = '\0';
b162af07 3083 SvCUR_set(cat, cur - start);
fc241834
RGS
3084 GROWING(0, cat, start, cur,
3085 len+(endb-buffer)*UTF8_EXPAND);
3086 end = start+SvLEN(cat);
3087 }
64844641 3088 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3089 } else {
3090 if (cur >= end) {
3091 *cur = '\0';
b162af07 3092 SvCUR_set(cat, cur - start);
fc241834
RGS
3093 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3094 end = start+SvLEN(cat)-UTF8_MAXLEN;
3095 }
230e1fce 3096 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3097 warn_utf8 ?
230e1fce 3098 0 : UNICODE_ALLOW_ANY);
fc241834 3099 }
a6ec74c1 3100 }
a6ec74c1 3101 break;
fc241834 3102 }
a6ec74c1
JH
3103 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3104 case 'f':
a6ec74c1 3105 while (len-- > 0) {
f337b084
TH
3106 float afloat;
3107 NV anv;
a6ec74c1 3108 fromstr = NEXTFROM;
f337b084 3109 anv = SvNV(fromstr);
5cdb9e01 3110#ifdef __VOS__
f337b084 3111 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3112 during conversion from double to float into infinity, so we
3113 do it by hand. This code should either be generalized for
3114 any OS that needs it, or removed if and when VOS implements
3115 posix-976 (suggestion to support mapping to infinity).
3116 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3117{
3118extern const float _float_constants[];
f337b084 3119 if (anv > FLT_MAX)
fc241834 3120 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3121 else if (anv < -FLT_MAX)
fc241834 3122 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3123 else afloat = (float) anv;
3722f0dc 3124}
f337b084 3125#else /* __VOS__ */
baf3cf9c 3126# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3127 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3128 * on Alpha; fake it if we don't have them.
3129 */
f337b084 3130 if (anv > FLT_MAX)
fc241834 3131 afloat = FLT_MAX;
f337b084 3132 else if (anv < -FLT_MAX)
fc241834 3133 afloat = -FLT_MAX;
f337b084 3134 else afloat = (float)anv;
baf3cf9c 3135# else
f337b084 3136 afloat = (float)anv;
baf3cf9c 3137# endif
f337b084 3138#endif /* __VOS__ */
1109a392 3139 DO_BO_PACK_N(afloat, float);
f337b084 3140 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3141 }
3142 break;
3143 case 'd':
a6ec74c1 3144 while (len-- > 0) {
f337b084
TH
3145 double adouble;
3146 NV anv;
a6ec74c1 3147 fromstr = NEXTFROM;
f337b084 3148 anv = SvNV(fromstr);
5cdb9e01 3149#ifdef __VOS__
f337b084 3150 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3151 during conversion from long double to double into infinity,
3152 so we do it by hand. This code should either be generalized
3153 for any OS that needs it, or removed if and when VOS
3154 implements posix-976 (suggestion to support mapping to
3155 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3156{
3157extern const double _double_constants[];
f337b084 3158 if (anv > DBL_MAX)
fc241834 3159 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3160 else if (anv < -DBL_MAX)
fc241834 3161 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3162 else adouble = (double) anv;
3722f0dc 3163}
f337b084 3164#else /* __VOS__ */
baf3cf9c 3165# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3166 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3167 * on Alpha; fake it if we don't have them.
3168 */
f337b084 3169 if (anv > DBL_MAX)
fc241834 3170 adouble = DBL_MAX;
f337b084 3171 else if (anv < -DBL_MAX)
fc241834 3172 adouble = -DBL_MAX;
f337b084 3173 else adouble = (double)anv;
baf3cf9c 3174# else
f337b084 3175 adouble = (double)anv;
baf3cf9c 3176# endif
f337b084 3177#endif /* __VOS__ */
1109a392 3178 DO_BO_PACK_N(adouble, double);
f337b084 3179 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3180 }
3181 break;
fc241834 3182 case 'F': {
275663fa 3183 NV_bytes anv;
1109a392 3184 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3185 while (len-- > 0) {
3186 fromstr = NEXTFROM;
cd07c537
DM
3187#ifdef __GNUC__
3188 /* to work round a gcc/x86 bug; don't use SvNV */
3189 anv.nv = sv_2nv(fromstr);
3190#else
275663fa 3191 anv.nv = SvNV(fromstr);
cd07c537 3192#endif
1109a392 3193 DO_BO_PACK_N(anv, NV);
275663fa 3194 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
3195 }
3196 break;
fc241834 3197 }
92d41999 3198#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 3199 case 'D': {
275663fa 3200 ld_bytes aldouble;
1109a392
MHM
3201 /* long doubles can have unused bits, which may be nonzero */
3202 Zero(&aldouble, 1, long double);
92d41999
JH
3203 while (len-- > 0) {
3204 fromstr = NEXTFROM;
cd07c537
DM
3205# ifdef __GNUC__
3206 /* to work round a gcc/x86 bug; don't use SvNV */
3207 aldouble.ld = (long double)sv_2nv(fromstr);
3208# else
275663fa 3209 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 3210# endif
1109a392 3211 DO_BO_PACK_N(aldouble, long double);
275663fa 3212 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
3213 }
3214 break;
fc241834 3215 }
92d41999 3216#endif
7212898e 3217#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3218 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3219#endif
a6ec74c1
JH
3220 case 'n':
3221 while (len-- > 0) {
f337b084 3222 I16 ai16;
a6ec74c1 3223 fromstr = NEXTFROM;
ef108786 3224 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3225#ifdef HAS_HTONS
ef108786 3226 ai16 = PerlSock_htons(ai16);
a6ec74c1 3227#endif
f337b084 3228 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3229 }
3230 break;
7212898e 3231#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3232 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3233#endif
a6ec74c1
JH
3234 case 'v':
3235 while (len-- > 0) {
f337b084 3236 I16 ai16;
a6ec74c1 3237 fromstr = NEXTFROM;
ef108786 3238 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3239#ifdef HAS_HTOVS
ef108786 3240 ai16 = htovs(ai16);
a6ec74c1 3241#endif
f337b084 3242 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3243 }
3244 break;
49704364 3245 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3246#if SHORTSIZE != SIZE16
fc241834 3247 while (len-- > 0) {
f337b084 3248 unsigned short aushort;
fc241834
RGS
3249 fromstr = NEXTFROM;
3250 aushort = SvUV(fromstr);
3251 DO_BO_PACK(aushort, s);
f337b084 3252 PUSH_VAR(utf8, cur, aushort);
fc241834 3253 }
49704364
WL
3254 break;
3255#else
3256 /* Fall through! */
a6ec74c1 3257#endif
49704364 3258 case 'S':
fc241834 3259 while (len-- > 0) {
f337b084 3260 U16 au16;
fc241834
RGS
3261 fromstr = NEXTFROM;
3262 au16 = (U16)SvUV(fromstr);
3263 DO_BO_PACK(au16, 16);
f337b084 3264 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3265 }
3266 break;
49704364 3267 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3268#if SHORTSIZE != SIZE16
fc241834 3269 while (len-- > 0) {
f337b084 3270 short ashort;
fc241834
RGS
3271 fromstr = NEXTFROM;
3272 ashort = SvIV(fromstr);
3273 DO_BO_PACK(ashort, s);
f337b084 3274 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3275 }
49704364
WL
3276 break;
3277#else
3278 /* Fall through! */
a6ec74c1 3279#endif
49704364
WL
3280 case 's':
3281 while (len-- > 0) {
f337b084 3282 I16 ai16;
49704364 3283 fromstr = NEXTFROM;
ef108786
MHM
3284 ai16 = (I16)SvIV(fromstr);
3285 DO_BO_PACK(ai16, 16);
f337b084 3286 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3287 }
3288 break;
3289 case 'I':
49704364 3290 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3291 while (len-- > 0) {
f337b084 3292 unsigned int auint;
a6ec74c1
JH
3293 fromstr = NEXTFROM;
3294 auint = SvUV(fromstr);
1109a392 3295 DO_BO_PACK(auint, i);
f337b084 3296 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3297 }
3298 break;
92d41999
JH
3299 case 'j':
3300 while (len-- > 0) {
f337b084 3301 IV aiv;
92d41999
JH
3302 fromstr = NEXTFROM;
3303 aiv = SvIV(fromstr);
1109a392
MHM
3304#if IVSIZE == INTSIZE
3305 DO_BO_PACK(aiv, i);
3306#elif IVSIZE == LONGSIZE
3307 DO_BO_PACK(aiv, l);
3308#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3309 DO_BO_PACK(aiv, 64);
f337b084
TH
3310#else
3311 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3312#endif
f337b084 3313 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3314 }
3315 break;
3316 case 'J':
3317 while (len-- > 0) {
f337b084 3318 UV auv;
92d41999
JH
3319 fromstr = NEXTFROM;
3320 auv = SvUV(fromstr);
1109a392
MHM
3321#if UVSIZE == INTSIZE
3322 DO_BO_PACK(auv, i);
3323#elif UVSIZE == LONGSIZE
3324 DO_BO_PACK(auv, l);
3325#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3326 DO_BO_PACK(auv, 64);
f337b084
TH
3327#else
3328 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3329#endif
f337b084 3330 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3331 }
3332 break;
a6ec74c1
JH
3333 case 'w':
3334 while (len-- > 0) {
f337b084 3335 NV anv;
a6ec74c1 3336 fromstr = NEXTFROM;
15e9f109 3337 anv = SvNV(fromstr);
a6ec74c1 3338
f337b084
TH
3339 if (anv < 0) {
3340 *cur = '\0';
b162af07 3341 SvCUR_set(cat, cur - start);
49704364 3342 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3343 }
a6ec74c1 3344
196b62db
NC
3345 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3346 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3347 any negative IVs will have already been got by the croak()
3348 above. IOK is untrue for fractions, so we test them
3349 against UV_MAX_P1. */
f337b084
TH
3350 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3351 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3352 char *in = buf + sizeof(buf);
196b62db 3353 UV auv = SvUV(fromstr);
a6ec74c1
JH
3354
3355 do {
eb160463 3356 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3357 auv >>= 7;
3358 } while (auv);
3359 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3360 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3361 in, (buf + sizeof(buf)) - in);
3362 } else if (SvPOKp(fromstr))
3363 goto w_string;
a6ec74c1 3364 else if (SvNOKp(fromstr)) {
0258719b 3365 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3366 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3367 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3368 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3369 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3370 Some C compilers are strict about integral constant
3371 expressions so we conservatively divide by a slightly
3372 smaller integer instead of multiplying by the exact
3373 floating-point value.
0258719b
NC
3374 */
3375#ifdef NV_MAX_10_EXP
f337b084 3376 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3377 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3378#else
f337b084 3379 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3380 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3381#endif
a6ec74c1
JH
3382 char *in = buf + sizeof(buf);
3383
8b6e33c7 3384 anv = Perl_floor(anv);
a6ec74c1 3385 do {
8b6e33c7 3386 const NV next = Perl_floor(anv / 128);
a6ec74c1 3387 if (in <= buf) /* this cannot happen ;-) */
49704364 3388 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3389 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3390 anv = next;
3391 } while (anv > 0);
a6ec74c1 3392 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3393 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3394 in, (buf + sizeof(buf)) - in);
3395 } else {
8b6e33c7
AL
3396 const char *from;
3397 char *result, *in;
735b914b
JH
3398 SV *norm;
3399 STRLEN len;
3400 bool done;
3401
f337b084 3402 w_string:
735b914b 3403 /* Copy string and check for compliance */
349d4f2f 3404 from = SvPV_const(fromstr, len);
735b914b 3405 if ((norm = is_an_int(from, len)) == NULL)
49704364 3406 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3407
a02a5408 3408 Newx(result, len, char);
735b914b
JH
3409 in = result + len;
3410 done = FALSE;
f337b084 3411 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3412 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3413 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3414 in, (result + len) - in);
735b914b
JH
3415 Safefree(result);
3416 SvREFCNT_dec(norm); /* free norm */
fc241834 3417 }
a6ec74c1
JH
3418 }
3419 break;
3420 case 'i':
49704364 3421 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3422 while (len-- > 0) {
f337b084 3423 int aint;
a6ec74c1
JH
3424 fromstr = NEXTFROM;
3425 aint = SvIV(fromstr);
1109a392 3426 DO_BO_PACK(aint, i);
f337b084 3427 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3428 }
3429 break;
7212898e 3430#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3431 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3432#endif
a6ec74c1
JH
3433 case 'N':
3434 while (len-- > 0) {
f337b084 3435 U32 au32;
a6ec74c1 3436 fromstr = NEXTFROM;
ef108786 3437 au32 = SvUV(fromstr);
a6ec74c1 3438#ifdef HAS_HTONL
ef108786 3439 au32 = PerlSock_htonl(au32);
a6ec74c1 3440#endif
f337b084 3441 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3442 }
3443 break;
7212898e 3444#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3445 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3446#endif
a6ec74c1
JH
3447 case 'V':
3448 while (len-- > 0) {
f337b084 3449 U32 au32;
a6ec74c1 3450 fromstr = NEXTFROM;
ef108786 3451 au32 = SvUV(fromstr);
a6ec74c1 3452#ifdef HAS_HTOVL
ef108786 3453 au32 = htovl(au32);
a6ec74c1 3454#endif
f337b084 3455 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3456 }
3457 break;
49704364 3458 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3459#if LONGSIZE != SIZE32
fc241834 3460 while (len-- > 0) {
f337b084 3461 unsigned long aulong;
fc241834
RGS
3462 fromstr = NEXTFROM;
3463 aulong = SvUV(fromstr);
3464 DO_BO_PACK(aulong, l);
f337b084 3465 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3466 }
49704364
WL
3467 break;
3468#else
3469 /* Fall though! */
a6ec74c1 3470#endif
49704364 3471 case 'L':
fc241834 3472 while (len-- > 0) {
f337b084 3473 U32 au32;
fc241834
RGS
3474 fromstr = NEXTFROM;
3475 au32 = SvUV(fromstr);
3476 DO_BO_PACK(au32, 32);
f337b084 3477 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3478 }
3479 break;
49704364 3480 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3481#if LONGSIZE != SIZE32
fc241834 3482 while (len-- > 0) {
f337b084 3483 long along;
fc241834
RGS
3484 fromstr = NEXTFROM;
3485 along = SvIV(fromstr);
3486 DO_BO_PACK(along, l);
f337b084 3487 PUSH_VAR(utf8, cur, along);
a6ec74c1 3488 }
49704364
WL
3489 break;
3490#else
3491 /* Fall though! */
a6ec74c1 3492#endif
49704364
WL
3493 case 'l':
3494 while (len-- > 0) {
f337b084 3495 I32 ai32;
49704364 3496 fromstr = NEXTFROM;
ef108786
MHM
3497 ai32 = SvIV(fromstr);
3498 DO_BO_PACK(ai32, 32);
f337b084 3499 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3500 }
3501 break;
3502#ifdef HAS_QUAD
3503 case 'Q':
3504 while (len-- > 0) {
f337b084 3505 Uquad_t auquad;
a6ec74c1 3506 fromstr = NEXTFROM;
f337b084 3507 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3508 DO_BO_PACK(auquad, 64);
f337b084 3509 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3510 }
3511 break;
3512 case 'q':
3513 while (len-- > 0) {
f337b084 3514 Quad_t aquad;
a6ec74c1
JH
3515 fromstr = NEXTFROM;
3516 aquad = (Quad_t)SvIV(fromstr);
1109a392 3517 DO_BO_PACK(aquad, 64);
f337b084 3518 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3519 }
3520 break;
f337b084 3521#endif /* HAS_QUAD */
a6ec74c1
JH
3522 case 'P':
3523 len = 1; /* assume SV is correct length */
f337b084 3524 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3525 /* Fall through! */
a6ec74c1
JH
3526 case 'p':
3527 while (len-- > 0) {
83003860 3528 const char *aptr;
f337b084 3529
a6ec74c1 3530 fromstr = NEXTFROM;
28a4f200
TH
3531 SvGETMAGIC(fromstr);
3532 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3533 else {
a6ec74c1
JH
3534 /* XXX better yet, could spirit away the string to
3535 * a safe spot and hang on to it until the result
3536 * of pack() (and all copies of the result) are
3537 * gone.
3538 */
041457d9 3539 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3540 !SvREADONLY(fromstr)))) {
3541 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3542 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3543 }
3544 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3545 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3546 else
2596d9fe 3547 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3548 }
07409e01 3549 DO_BO_PACK_PC(aptr);
f337b084 3550 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3551 }
3552 break;
fc241834 3553 case 'u': {
f7fe979e 3554 const char *aptr, *aend;
fc241834 3555 bool from_utf8;
f337b084 3556
a6ec74c1 3557 fromstr = NEXTFROM;
fc241834
RGS
3558 if (len <= 2) len = 45;
3559 else len = len / 3 * 3;
3560 if (len >= 64) {
a2a5de95
NC
3561 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3562 "Field too wide in 'u' format in pack");
fc241834
RGS
3563 len = 63;
3564 }
83003860 3565 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3566 from_utf8 = DO_UTF8(fromstr);
3567 if (from_utf8) {
3568 aend = aptr + fromlen;
3569 fromlen = sv_len_utf8(fromstr);
3570 } else aend = NULL; /* Unused, but keep compilers happy */
3571 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3572 while (fromlen > 0) {
fc241834 3573 U8 *end;
a6ec74c1 3574 I32 todo;
fc241834 3575 U8 hunk[1+63/3*4+1];
a6ec74c1 3576
eb160463 3577 if ((I32)fromlen > len)
a6ec74c1
JH
3578 todo = len;
3579 else
3580 todo = fromlen;
fc241834
RGS
3581 if (from_utf8) {
3582 char buffer[64];
3583 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3584 'u' | TYPE_IS_PACK)) {
3585 *cur = '\0';
b162af07 3586 SvCUR_set(cat, cur - start);
5f5991a0 3587 Perl_croak(aTHX_ "panic: string is shorter than advertised");
fc241834
RGS
3588 }
3589 end = doencodes(hunk, buffer, todo);
3590 } else {
3591 end = doencodes(hunk, aptr, todo);
3592 aptr += todo;
3593 }
3594 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3595 fromlen -= todo;
3596 }
a6ec74c1
JH
3597 break;
3598 }
f337b084
TH
3599 }
3600 *cur = '\0';
b162af07 3601 SvCUR_set(cat, cur - start);
f337b084 3602 no_change:
49704364 3603 *symptr = lookahead;
a6ec74c1 3604 }
49704364 3605 return beglist;
18529408
IZ
3606}
3607#undef NEXTFROM
3608
3609
3610PP(pp_pack)
3611{
97aff369 3612 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3613 register SV *cat = TARG;
3614 STRLEN fromlen;
349d4f2f
NC
3615 SV *pat_sv = *++MARK;
3616 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3617 register const char *patend = pat + fromlen;
18529408
IZ
3618
3619 MARK++;
76f68e9b 3620 sv_setpvs(cat, "");
f337b084 3621 SvUTF8_off(cat);
18529408 3622
7accc089 3623 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3624
a6ec74c1
JH
3625 SvSETMAGIC(cat);
3626 SP = ORIGMARK;
3627 PUSHs(cat);
3628 RETURN;
3629}
a6ec74c1 3630
73cb7263
NC
3631/*
3632 * Local variables:
3633 * c-indentation-style: bsd
3634 * c-basic-offset: 4
3635 * indent-tabs-mode: t
3636 * End:
3637 *
37442d52
RGS
3638 * ex: set ts=8 sts=4 sw=4 noet:
3639 */