This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
lib/unicore/mktables needs to get File::Spec from ext/Cwd/lib
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
7212898e 73#if PERL_VERSION >= 9
f337b084
TH
74# define PERL_PACK_CAN_BYTEORDER
75# define PERL_PACK_CAN_SHRIEKSIGN
76#endif
77
78#ifndef CHAR_BIT
79# define CHAR_BIT 8
7212898e 80#endif
3473cf63
RGS
81/* Maximum number of bytes to which a byte can grow due to upgrade */
82#define UTF8_EXPAND 2
7212898e 83
a6ec74c1 84/*
a6ec74c1
JH
85 * Offset for integer pack/unpack.
86 *
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
89 */
90
91/*
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
97 */
98/*
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
101 */
102#define SIZE16 2
103#define SIZE32 4
104
105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 --jhi Feb 1999 */
107
1109a392
MHM
108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
109# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
110# define OFF16(p) ((char*)(p))
111# define OFF32(p) ((char*)(p))
a6ec74c1 112# else
1109a392 113# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
114# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116# else
08ca2aa3 117 ++++ bad cray byte order
a6ec74c1
JH
118# endif
119# endif
a6ec74c1 120#else
08ca2aa3
TH
121# define OFF16(p) ((char *) (p))
122# define OFF32(p) ((char *) (p))
a6ec74c1
JH
123#endif
124
f337b084
TH
125/* Only to be used inside a loop (see the break) */
126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (utf8) { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 } else { \
130 Copy(s, OFF16(p), SIZE16, char); \
131 (s) += SIZE16; \
132 } \
133} STMT_END
134
135/* Only to be used inside a loop (see the break) */
136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (utf8) { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 } else { \
140 Copy(s, OFF32(p), SIZE32, char); \
141 (s) += SIZE32; \
142 } \
143} STMT_END
144
145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
147
148/* Only to be used inside a loop (see the break) */
f337b084 149#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084
TH
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
154 } else { \
155 Copy(s, (char *) &var, sizeof(var), char); \
156 s += sizeof(var); \
157 } \
08ca2aa3
TH
158} STMT_END
159
f337b084 160#define PUSH_VAR(utf8, aptr, var) \
230e1fce 161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 162
49704364
WL
163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164#define MAX_SUB_TEMPLATE_LEVEL 100
165
66c611c5 166/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
167#define FLAG_WAS_UTF8 0x40
168#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 169#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 170#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
171#define FLAG_SLASH 0x04
172#define FLAG_COMMA 0x02
173#define FLAG_PACK 0x01
174
a6ec74c1
JH
175STATIC SV *
176S_mul128(pTHX_ SV *sv, U8 m)
177{
178 STRLEN len;
179 char *s = SvPV(sv, len);
180 char *t;
a6ec74c1 181
7918f24d
NC
182 PERL_ARGS_ASSERT_MUL128;
183
a6ec74c1 184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 185 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
186
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
189 sv = tmpNew;
190 s = SvPV(sv, len);
191 }
192 t = s + len - 1;
193 while (!*t) /* trailing '\0'? */
194 t--;
195 while (t > s) {
f7fe979e 196 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
197 *(t--) = '0' + (char)(i % 10);
198 m = (char)(i / 10);
a6ec74c1
JH
199 }
200 return (sv);
201}
202
203/* Explosives and implosives. */
204
205#if 'I' == 73 && 'J' == 74
206/* On an ASCII/ISO kind of system */
207#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208#else
209/*
210 Some other sort of character set - use memchr() so we don't match
211 the null byte.
212 */
213#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214#endif
215
66c611c5 216/* type modifiers */
62f95557 217#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
218#define TYPE_IS_BIG_ENDIAN 0x200
219#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 220#define TYPE_IS_PACK 0x800
1109a392 221#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 222#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
223#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
224
7212898e 225#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 227#else
28be1210 228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
229#endif
230
231#ifndef PERL_PACK_CAN_BYTEORDER
232/* Put "can't" first because it is shorter */
233# define TYPE_ENDIANNESS(t) 0
234# define TYPE_NO_ENDIANNESS(t) (t)
235
236# define ENDIANNESS_ALLOWED_TYPES ""
237
238# define DO_BO_UNPACK(var, type)
239# define DO_BO_PACK(var, type)
07409e01
NC
240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
242# define DO_BO_UNPACK_N(var, type)
243# define DO_BO_PACK_N(var, type)
244# define DO_BO_UNPACK_P(var)
245# define DO_BO_PACK_P(var)
45f723e1
NC
246# define DO_BO_UNPACK_PC(var)
247# define DO_BO_PACK_PC(var)
66c611c5 248
f337b084 249#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
250
251# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
253
254# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
255
256# define DO_BO_UNPACK(var, type) \
1109a392 257 STMT_START { \
66c611c5 258 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 default: break; \
262 } \
263 } STMT_END
264
7212898e 265# define DO_BO_PACK(var, type) \
1109a392 266 STMT_START { \
66c611c5 267 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 default: break; \
271 } \
272 } STMT_END
273
07409e01 274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 275 STMT_START { \
66c611c5 276 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 277 case TYPE_IS_BIG_ENDIAN: \
07409e01 278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
282 break; \
283 default: \
284 break; \
285 } \
286 } STMT_END
287
07409e01 288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 289 STMT_START { \
66c611c5 290 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 291 case TYPE_IS_BIG_ENDIAN: \
07409e01 292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
303 STMT_START { \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 default: \
314 break; \
315 } \
316 } STMT_END
317
7212898e 318# if PTRSIZE == INTSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 323# elif PTRSIZE == LONGSIZE
07409e01
NC
324# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
328# elif PTRSIZE == IVSIZE
329# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
333# else
334# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
336# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 338# endif
1109a392 339
7212898e 340# if defined(my_htolen) && defined(my_letohn) && \
1109a392 341 defined(my_htoben) && defined(my_betohn)
7212898e 342# define DO_BO_UNPACK_N(var, type) \
1109a392 343 STMT_START { \
66c611c5 344 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347 default: break; \
348 } \
349 } STMT_END
350
7212898e 351# define DO_BO_PACK_N(var, type) \
1109a392 352 STMT_START { \
66c611c5 353 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356 default: break; \
357 } \
358 } STMT_END
7212898e
NC
359# else
360# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
362# endif
363
f337b084 364#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 365
78d46eaa 366#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 367#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
368#define PACK_SIZE_MASK 0x3F
369
78d46eaa
NC
370/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
372
373#if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
375#endif
376
377typedef U8 packprops_t;
78d46eaa
NC
378#if 'J'-'I' == 1
379/* ASCII */
f43a9a31 380STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
381 /* normal */
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0,
1651fc44 387 /* C */ sizeof(unsigned char),
80a13697 388#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 389 /* D */ LONG_DOUBLESIZE,
80a13697 390#else
a7a3cfaa 391 0,
80a13697 392#endif
a7a3cfaa
TH
393 0,
394 /* F */ NVSIZE,
395 0, 0,
396 /* I */ sizeof(unsigned int),
397 /* J */ UVSIZE,
398 0,
399 /* L */ SIZE32,
400 0,
401 /* N */ SIZE32,
402 0, 0,
80a13697 403#if defined(HAS_QUAD)
a7a3cfaa 404 /* Q */ sizeof(Uquad_t),
80a13697 405#else
a7a3cfaa 406 0,
80a13697 407#endif
a7a3cfaa
TH
408 0,
409 /* S */ SIZE16,
410 0,
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
412 /* V */ SIZE32,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
417 0,
418 /* f */ sizeof(float),
419 0, 0,
420 /* i */ sizeof(int),
421 /* j */ IVSIZE,
422 0,
423 /* l */ SIZE32,
424 0,
425 /* n */ SIZE16,
426 0,
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 428#if defined(HAS_QUAD)
a7a3cfaa 429 /* q */ sizeof(Quad_t),
80a13697 430#else
a7a3cfaa 431 0,
80a13697 432#endif
a7a3cfaa
TH
433 0,
434 /* s */ SIZE16,
435 0, 0,
436 /* v */ SIZE16,
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
447 /* shrieking */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
454 0, 0,
455 /* L */ sizeof(unsigned long),
456 0,
7212898e 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 458 /* N */ SIZE32,
7212898e 459#else
a7a3cfaa 460 0,
7212898e 461#endif
a7a3cfaa
TH
462 0, 0, 0, 0,
463 /* S */ sizeof(unsigned short),
464 0, 0,
7212898e 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 466 /* V */ SIZE32,
7212898e 467#else
a7a3cfaa 468 0,
7212898e 469#endif
a7a3cfaa
TH
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0,
472 /* i */ sizeof(int),
473 0, 0,
474 /* l */ sizeof(long),
475 0,
7212898e 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 477 /* n */ SIZE16,
7212898e 478#else
a7a3cfaa 479 0,
7212898e 480#endif
a7a3cfaa
TH
481 0, 0, 0, 0,
482 /* s */ sizeof(short),
483 0, 0,
7212898e 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 485 /* v */ SIZE16,
7212898e 486#else
a7a3cfaa 487 0,
7212898e 488#endif
a7a3cfaa
TH
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
498};
499#else
500/* EBCDIC (or bust) */
f43a9a31 501STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
502 /* normal */
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
514 0,
515 /* f */ sizeof(float),
516 0, 0,
517 /* i */ sizeof(int),
518 0, 0, 0, 0, 0, 0, 0,
519 /* j */ IVSIZE,
520 0,
521 /* l */ SIZE32,
522 0,
523 /* n */ SIZE16,
524 0,
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 526#if defined(HAS_QUAD)
a7a3cfaa 527 /* q */ sizeof(Quad_t),
80a13697 528#else
a7a3cfaa 529 0,
80a13697 530#endif
a7a3cfaa
TH
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* s */ SIZE16,
533 0, 0,
534 /* v */ SIZE16,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 538 /* C */ sizeof(unsigned char),
80a13697 539#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 540 /* D */ LONG_DOUBLESIZE,
80a13697 541#else
a7a3cfaa 542 0,
80a13697 543#endif
a7a3cfaa
TH
544 0,
545 /* F */ NVSIZE,
546 0, 0,
547 /* I */ sizeof(unsigned int),
548 0, 0, 0, 0, 0, 0, 0,
549 /* J */ UVSIZE,
550 0,
551 /* L */ SIZE32,
552 0,
553 /* N */ SIZE32,
554 0, 0,
80a13697 555#if defined(HAS_QUAD)
a7a3cfaa 556 /* Q */ sizeof(Uquad_t),
80a13697 557#else
a7a3cfaa 558 0,
80a13697 559#endif
a7a3cfaa
TH
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 /* S */ SIZE16,
562 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
564 /* V */ SIZE32,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* shrieking */
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* i */ sizeof(int),
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
581 0,
7212898e 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 583 /* n */ SIZE16,
7212898e 584#else
a7a3cfaa 585 0,
7212898e 586#endif
a7a3cfaa
TH
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
589 0, 0,
7212898e 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 591 /* v */ SIZE16,
7212898e 592#else
a7a3cfaa 593 0,
7212898e 594#endif
a7a3cfaa
TH
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
601 0,
7212898e 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 603 /* N */ SIZE32,
7212898e 604#else
a7a3cfaa 605 0,
7212898e 606#endif
a7a3cfaa
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
609 0, 0,
7212898e 610#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 611 /* V */ SIZE32,
7212898e 612#else
a7a3cfaa 613 0,
7212898e 614#endif
a7a3cfaa
TH
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
617};
618#endif
619
08ca2aa3 620STATIC U8
f7fe979e 621uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 622{
08ca2aa3 623 STRLEN retlen;
0bcc34c2 624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 632 if (val >= 0x100) {
f337b084 633 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
f337b084 636 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
637 val &= 0xff;
638 }
639 *s += retlen;
fe2774ed 640 return (U8)val;
08ca2aa3
TH
641}
642
f337b084
TH
643#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
645 *(U8 *)(s)++)
646
647STATIC bool
f7fe979e 648uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
649{
650 UV val;
651 STRLEN retlen;
f7fe979e 652 const char *from = *s;
08ca2aa3 653 int bad = 0;
f7fe979e 654 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
f337b084 658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
661 bad |= 1;
662 } else from += retlen;
663 if (val >= 0x100) {
664 bad |= 2;
665 val &= 0xff;
666 }
fe2774ed 667 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
668 }
669 /* We have enough characters for the buffer. Did we have problems ? */
670 if (bad) {
671 if (bad & 1) {
672 /* Rewalk the string fragment while warning */
f7fe979e 673 const char *ptr;
9e27e96a 674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
f337b084 677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
678 }
679 if (from > end) from = end;
680 }
681 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
683 WARN_PACK : WARN_UNPACK),
684 "Character(s) in '%c' format wrapped in %s",
fc241834 685 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 686 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
687 }
688 *s = from;
689 return TRUE;
690}
691
692STATIC bool
f7fe979e 693next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 694{
97aff369 695 dVAR;
08ca2aa3 696 STRLEN retlen;
0bcc34c2 697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
698 if (val >= 0x100 || !ISUUCHAR(val) ||
699 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = 0;
701 return FALSE;
702 }
703 *out = PL_uudmap[val] & 077;
f337b084 704 *s += retlen;
08ca2aa3
TH
705 return TRUE;
706}
78d46eaa 707
64844641 708STATIC char *
14333449 709S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 710 const U8 * const end = start + len;
64844641 711
7918f24d
NC
712 PERL_ARGS_ASSERT_BYTES_TO_UNI;
713
f337b084 714 while (start < end) {
48fa4626
TS
715 const UV uv = NATIVE_TO_ASCII(*start);
716 if (UNI_IS_INVARIANT(uv))
717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
718 else {
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
721 }
722 start++;
723 }
64844641 724 return dest;
f337b084
TH
725}
726
230e1fce
NC
727#define PUSH_BYTES(utf8, cur, buf, len) \
728STMT_START { \
64844641
AL
729 if (utf8) \
730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
731 else { \
732 Copy(buf, cur, len, char); \
733 (cur) += (len); \
734 } \
f337b084
TH
735} STMT_END
736
737#define GROWING(utf8, cat, start, cur, in_len) \
738STMT_START { \
739 STRLEN glen = (in_len); \
3473cf63 740 if (utf8) glen *= UTF8_EXPAND; \
f337b084 741 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 742 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
743 (cur) = (start) + SvCUR(cat); \
744 } \
745} STMT_END
746
747#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
748STMT_START { \
f7fe979e 749 const STRLEN glen = (in_len); \
f337b084 750 STRLEN gl = glen; \
3473cf63 751 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
752 if ((cur) + gl >= (start) + SvLEN(cat)) { \
753 *cur = '\0'; \
b162af07 754 SvCUR_set((cat), (cur) - (start)); \
0bd48802 755 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
756 (cur) = (start) + SvCUR(cat); \
757 } \
758 PUSH_BYTES(utf8, cur, buf, glen); \
759} STMT_END
760
761#define PUSH_BYTE(utf8, s, byte) \
762STMT_START { \
763 if (utf8) { \
f7fe979e 764 const U8 au8 = (byte); \
64844641 765 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
766 } else *(U8 *)(s)++ = (byte); \
767} STMT_END
768
769/* Only to be used inside a loop (see the break) */
770#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
771STMT_START { \
772 STRLEN retlen; \
773 if (str >= end) break; \
774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
775 if (retlen == (STRLEN) -1 || retlen == 0) { \
776 *cur = '\0'; \
777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
778 } \
779 str += retlen; \
780} STMT_END
781
f7fe979e
AL
782static const char *_action( const tempsym_t* symptr )
783{
10edeb5d 784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
785}
786
206947d2 787/* Returns the sizeof() struct described by pat */
028d1f6d 788STATIC I32
f337b084 789S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 790{
f337b084 791 I32 total = 0;
206947d2 792
7918f24d
NC
793 PERL_ARGS_ASSERT_MEASURE_STRUCT;
794
49704364 795 while (next_symbol(symptr)) {
f337b084 796 I32 len;
f7fe979e 797 int size;
f337b084
TH
798
799 switch (symptr->howlen) {
fc241834 800 case e_star:
49704364 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 802 _action( symptr ) );
49704364 803 break;
f337b084
TH
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
49704364
WL
808 }
809
a7a3cfaa 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 811 if (!size) {
f7fe979e 812 int star;
80a13697
NC
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 818 _action( symptr ) );
28be1210
TH
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
80a13697 823 case '@':
28be1210 824 case '.':
80a13697
NC
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 830 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 831 _action( symptr ) );
80a13697
NC
832 case '%':
833 size = 0;
834 break;
835 case '(':
fc241834
RGS
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
80a13697
NC
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
f7fe979e 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
80a13697
NC
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
78d46eaa 885
80a13697
NC
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
78d46eaa
NC
889 break;
890 }
206947d2
IZ
891 }
892 total += len * size;
893 }
894 return total;
895}
896
49704364
WL
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
f7fe979e
AL
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 903{
7918f24d
NC
904 PERL_ARGS_ASSERT_GROUP_END;
905
49704364 906 while (patptr < patend) {
f7fe979e 907 const char c = *patptr++;
49704364
WL
908
909 if (isSPACE(c))
910 continue;
911 else if (c == ender)
912 return patptr-1;
913 else if (c == '#') {
914 while (patptr < patend && *patptr != '\n')
915 patptr++;
916 continue;
917 } else if (c == '(')
918 patptr = group_end(patptr, patend, ')') + 1;
919 else if (c == '[')
920 patptr = group_end(patptr, patend, ']') + 1;
18529408 921 }
49704364
WL
922 Perl_croak(aTHX_ "No group ending character '%c' found in template",
923 ender);
924 return 0;
18529408
IZ
925}
926
49704364
WL
927
928/* Convert unsigned decimal number to binary.
929 * Expects a pointer to the first digit and address of length variable
930 * Advances char pointer to 1st non-digit char and returns number
fc241834 931 */
f7fe979e
AL
932STATIC const char *
933S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
934{
935 I32 len = *patptr++ - '0';
7918f24d
NC
936
937 PERL_ARGS_ASSERT_GET_NUM;
938
49704364
WL
939 while (isDIGIT(*patptr)) {
940 if (len >= 0x7FFFFFFF/10)
941 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942 len = (len * 10) + (*patptr++ - '0');
943 }
944 *lenptr = len;
945 return patptr;
946}
947
948/* The marvellous template parsing routine: Using state stored in *symptr,
949 * locates next template code and count
950 */
951STATIC bool
f337b084 952S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 953{
f7fe979e 954 const char* patptr = symptr->patptr;
0bcc34c2 955 const char* const patend = symptr->patend;
49704364 956
7918f24d
NC
957 PERL_ARGS_ASSERT_NEXT_SYMBOL;
958
49704364
WL
959 symptr->flags &= ~FLAG_SLASH;
960
961 while (patptr < patend) {
962 if (isSPACE(*patptr))
963 patptr++;
964 else if (*patptr == '#') {
965 patptr++;
966 while (patptr < patend && *patptr != '\n')
967 patptr++;
968 if (patptr < patend)
969 patptr++;
970 } else {
fc241834 971 /* We should have found a template code */
49704364 972 I32 code = *patptr++ & 0xFF;
66c611c5 973 U32 inherited_modifiers = 0;
49704364
WL
974
975 if (code == ','){ /* grandfather in commas but with a warning */
976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977 symptr->flags |= FLAG_COMMA;
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 979 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
980 }
981 continue;
982 }
fc241834 983
49704364 984 /* for '(', skip to ')' */
fc241834 985 if (code == '(') {
49704364
WL
986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 988 _action( symptr ) );
49704364
WL
989 symptr->grpbeg = patptr;
990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 993 _action( symptr ) );
49704364
WL
994 }
995
66c611c5
MHM
996 /* look for group modifiers to inherit */
997 if (TYPE_ENDIANNESS(symptr->flags)) {
998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1000 }
1001
1109a392
MHM
1002 /* look for modifiers */
1003 while (patptr < patend) {
b7787f18
AL
1004 const char *allowed;
1005 I32 modifier;
1109a392
MHM
1006 switch (*patptr) {
1007 case '!':
1008 modifier = TYPE_IS_SHRIEKING;
7212898e 1009 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1010 break;
7212898e 1011#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1012 case '>':
1013 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1014 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1015 break;
1016 case '<':
1017 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1018 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1019 break;
f337b084 1020#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1021 default:
b7787f18
AL
1022 allowed = "";
1023 modifier = 0;
1109a392
MHM
1024 break;
1025 }
66c611c5 1026
1109a392
MHM
1027 if (modifier == 0)
1028 break;
66c611c5 1029
1109a392
MHM
1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1032 allowed, _action( symptr ) );
66c611c5
MHM
1033
1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038 TYPE_ENDIANNESS_MASK)
1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1040 *patptr, _action( symptr ) );
66c611c5 1041
041457d9 1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044 "Duplicate modifier '%c' after '%c' in %s",
1045 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1046 _action( symptr ) );
1109a392 1047 }
66c611c5 1048
1109a392
MHM
1049 code |= modifier;
1050 patptr++;
49704364
WL
1051 }
1052
66c611c5
MHM
1053 /* inherit modifiers */
1054 code |= inherited_modifiers;
1055
fc241834 1056 /* look for count and/or / */
49704364
WL
1057 if (patptr < patend) {
1058 if (isDIGIT(*patptr)) {
1059 patptr = get_num( patptr, &symptr->length );
1060 symptr->howlen = e_number;
1061
1062 } else if (*patptr == '*') {
1063 patptr++;
1064 symptr->howlen = e_star;
1065
1066 } else if (*patptr == '[') {
f7fe979e 1067 const char* lenptr = ++patptr;
49704364
WL
1068 symptr->howlen = e_number;
1069 patptr = group_end( patptr, patend, ']' ) + 1;
1070 /* what kind of [] is it? */
1071 if (isDIGIT(*lenptr)) {
1072 lenptr = get_num( lenptr, &symptr->length );
1073 if( *lenptr != ']' )
1074 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1075 _action( symptr ) );
49704364
WL
1076 } else {
1077 tempsym_t savsym = *symptr;
1078 symptr->patend = patptr-1;
1079 symptr->patptr = lenptr;
1080 savsym.length = measure_struct(symptr);
1081 *symptr = savsym;
1082 }
1083 } else {
1084 symptr->howlen = e_no_len;
1085 symptr->length = 1;
1086 }
1087
1088 /* try to find / */
1089 while (patptr < patend) {
1090 if (isSPACE(*patptr))
1091 patptr++;
1092 else if (*patptr == '#') {
1093 patptr++;
1094 while (patptr < patend && *patptr != '\n')
1095 patptr++;
1096 if (patptr < patend)
1097 patptr++;
1098 } else {
66c611c5 1099 if (*patptr == '/') {
49704364
WL
1100 symptr->flags |= FLAG_SLASH;
1101 patptr++;
66c611c5
MHM
1102 if (patptr < patend &&
1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1105 _action( symptr ) );
49704364
WL
1106 }
1107 break;
1108 }
18529408 1109 }
49704364
WL
1110 } else {
1111 /* at end - no count, no / */
1112 symptr->howlen = e_no_len;
1113 symptr->length = 1;
1114 }
1115
1116 symptr->code = code;
fc241834 1117 symptr->patptr = patptr;
49704364 1118 return TRUE;
18529408 1119 }
49704364 1120 }
fc241834 1121 symptr->patptr = patptr;
49704364 1122 return FALSE;
18529408
IZ
1123}
1124
18529408 1125/*
fc241834 1126 There is no way to cleanly handle the case where we should process the
08ca2aa3 1127 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1128 (e.g. estimates like strend-s as an upper bound for the number of
1129 characters left wouldn't work). So if we foresee the need of this
1130 (pattern starts with U or contains U0), we want to work on the encoded
1131 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1132 themselves if they need to do a lot of unpacks like this on it
1133*/
fc241834 1134STATIC bool
08ca2aa3
TH
1135need_utf8(const char *pat, const char *patend)
1136{
1137 bool first = TRUE;
7918f24d
NC
1138
1139 PERL_ARGS_ASSERT_NEED_UTF8;
1140
08ca2aa3
TH
1141 while (pat < patend) {
1142 if (pat[0] == '#') {
1143 pat++;
f7fe979e 1144 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1145 if (!pat) return FALSE;
1146 } else if (pat[0] == 'U') {
1147 if (first || pat[1] == '0') return TRUE;
1148 } else first = FALSE;
1149 pat++;
1150 }
1151 return FALSE;
1152}
1153
1154STATIC char
1155first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1156 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1157
08ca2aa3
TH
1158 while (pat < patend) {
1159 if (pat[0] != '#') return pat[0];
1160 pat++;
f7fe979e 1161 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1162 if (!pat) return 0;
1163 pat++;
1164 }
1165 return 0;
1166}
1167
1168/*
7accc089
JH
1169=for apidoc unpackstring
1170
608d3aed
WL
1171The engine implementing unpack() Perl function. C<unpackstring> puts the
1172extracted list items on the stack and returns the number of elements.
1173Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1174
1175=cut */
1176
1177I32
f7fe979e 1178Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1179{
f7fe979e 1180 tempsym_t sym;
08ca2aa3 1181
7918f24d
NC
1182 PERL_ARGS_ASSERT_UNPACKSTRING;
1183
f337b084 1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1185 else if (need_utf8(pat, patend)) {
1186 /* We probably should try to avoid this in case a scalar context call
1187 wouldn't get to the "U0" */
1188 STRLEN len = strend - s;
230e1fce 1189 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1190 SAVEFREEPV(s);
1191 strend = s + len;
f337b084 1192 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1193 }
1194
f337b084
TH
1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1197
f7fe979e 1198 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1199
1200 return unpack_rec(&sym, s, s, strend, NULL );
1201}
1202
4136a0f7 1203STATIC I32
f7fe979e 1204S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1205{
27da23d5 1206 dVAR; dSP;
18529408 1207 SV *sv;
f7fe979e 1208 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1209 howlen_t howlen;
a6ec74c1 1210 I32 checksum = 0;
92d41999 1211 UV cuv = 0;
a6ec74c1 1212 NV cdouble = 0.0;
f337b084 1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1214 bool beyond = FALSE;
21c16052 1215 bool explicit_length;
9e27e96a 1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1218
1219 PERL_ARGS_ASSERT_UNPACK_REC;
1220
28be1210 1221 symptr->strbeg = s - strbeg;
49704364 1222
49704364 1223 while (next_symbol(symptr)) {
a7a3cfaa 1224 packprops_t props;
9e27e96a 1225 I32 len;
f337b084 1226 I32 datumtype = symptr->code;
206947d2 1227 /* do first one only unless in list context
08ca2aa3 1228 / is implemented by unpacking the count, then popping it from the
206947d2 1229 stack, so must check that we're not in the middle of a / */
49704364 1230 if ( unpack_only_one
206947d2 1231 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1232 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1233 break;
49704364 1234
f337b084 1235 switch (howlen = symptr->howlen) {
fc241834
RGS
1236 case e_star:
1237 len = strend - strbeg; /* long enough */
49704364 1238 break;
f337b084
TH
1239 default:
1240 /* e_no_len and e_number */
1241 len = symptr->length;
1242 break;
49704364 1243 }
18529408 1244
21c16052 1245 explicit_length = TRUE;
a6ec74c1 1246 redo_switch:
49704364 1247 beyond = s >= strend;
a7a3cfaa
TH
1248
1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1250 if (props) {
1251 /* props nonzero means we can process this letter. */
9e27e96a
AL
1252 const long size = props & PACK_SIZE_MASK;
1253 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1254 if (len > howmany)
1255 len = howmany;
1256
1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 if (len && unpack_only_one) len = 1;
1259 EXTEND(SP, len);
1260 EXTEND_MORTAL(len);
78d46eaa
NC
1261 }
1262 }
a7a3cfaa 1263
1109a392 1264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1265 default:
1109a392 1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1267
a6ec74c1 1268 case '%':
49704364 1269 if (howlen == e_no_len)
18529408 1270 len = 16; /* len is not specified */
a6ec74c1 1271 checksum = len;
92d41999 1272 cuv = 0;
a6ec74c1 1273 cdouble = 0;
18529408 1274 continue;
a6ec74c1 1275 break;
18529408
IZ
1276 case '(':
1277 {
49704364 1278 tempsym_t savsym = *symptr;
9e27e96a 1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1280 symptr->flags |= group_modifiers;
49704364 1281 symptr->patend = savsym.grpend;
28be1210 1282 symptr->previous = &savsym;
49704364 1283 symptr->level++;
18529408 1284 PUTBACK;
c6f750d1 1285 if (len && unpack_only_one) len = 1;
18529408 1286 while (len--) {
49704364 1287 symptr->patptr = savsym.grpbeg;
f337b084
TH
1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1289 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1290 unpack_rec(symptr, s, strbeg, strend, &s);
1291 if (s == strend && savsym.howlen == e_star)
49704364 1292 break; /* No way to continue */
18529408
IZ
1293 }
1294 SPAGAIN;
28be1210 1295 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1296 *symptr = savsym;
18529408
IZ
1297 break;
1298 }
28be1210
TH
1299#ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '.' | TYPE_IS_SHRIEKING:
1301#endif
1302 case '.': {
9e27e96a 1303 const char *from;
28be1210
TH
1304 SV *sv;
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1307#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1308 const bool u8 = utf8;
28be1210
TH
1309#endif
1310 if (howlen == e_star) from = strbeg;
1311 else if (len <= 0) from = s;
1312 else {
1313 tempsym_t *group = symptr;
1314
1315 while (--len && group) group = group->previous;
1316 from = group ? strbeg + group->strbeg : strbeg;
1317 }
1318 sv = from <= s ?
00646304
CB
1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1321 mXPUSHs(sv);
28be1210
TH
1322 break;
1323 }
1324#ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 case '@' | TYPE_IS_SHRIEKING:
1326#endif
a6ec74c1 1327 case '@':
28be1210
TH
1328 s = strbeg + symptr->strbeg;
1329#ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1331#else /* PERL_PACK_CAN_SHRIEKSIGN */
1332 if (utf8)
1333#endif
1334 {
08ca2aa3
TH
1335 while (len > 0) {
1336 if (s >= strend)
1337 Perl_croak(aTHX_ "'@' outside of string in unpack");
1338 s += UTF8SKIP(s);
1339 len--;
1340 }
1341 if (s > strend)
1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1343 } else {
28be1210 1344 if (strend-s < len)
fc241834 1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1346 s += len;
08ca2aa3 1347 }
a6ec74c1 1348 break;
62f95557
IZ
1349 case 'X' | TYPE_IS_SHRIEKING:
1350 if (!len) /* Avoid division by 0 */
1351 len = 1;
08ca2aa3 1352 if (utf8) {
f7fe979e 1353 const char *hop, *last;
f337b084
TH
1354 I32 l = len;
1355 hop = last = strbeg;
1356 while (hop < s) {
1357 hop += UTF8SKIP(hop);
1358 if (--l == 0) {
08ca2aa3 1359 last = hop;
f337b084
TH
1360 l = len;
1361 }
fc241834 1362 }
f337b084
TH
1363 if (last > s)
1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1365 s = last;
1366 break;
f337b084
TH
1367 }
1368 len = (s - strbeg) % len;
62f95557 1369 /* FALL THROUGH */
a6ec74c1 1370 case 'X':
08ca2aa3
TH
1371 if (utf8) {
1372 while (len > 0) {
1373 if (s <= strbeg)
1374 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1375 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1376 if (s <= strbeg)
1377 Perl_croak(aTHX_ "'X' outside of string in unpack");
1378 }
1379 len--;
1380 }
1381 } else {
fc241834
RGS
1382 if (len > s - strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1384 s -= len;
08ca2aa3 1385 }
a6ec74c1 1386 break;
9e27e96a
AL
1387 case 'x' | TYPE_IS_SHRIEKING: {
1388 I32 ai32;
62f95557
IZ
1389 if (!len) /* Avoid division by 0 */
1390 len = 1;
230e1fce
NC
1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1393 if (ai32 == 0) break;
1394 len -= ai32;
9e27e96a 1395 }
62f95557 1396 /* FALL THROUGH */
a6ec74c1 1397 case 'x':
08ca2aa3
TH
1398 if (utf8) {
1399 while (len>0) {
1400 if (s >= strend)
1401 Perl_croak(aTHX_ "'x' outside of string in unpack");
1402 s += UTF8SKIP(s);
1403 len--;
1404 }
1405 } else {
fc241834
RGS
1406 if (len > strend - s)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1408 s += len;
f337b084 1409 }
a6ec74c1
JH
1410 break;
1411 case '/':
49704364
WL
1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1413 break;
a6ec74c1
JH
1414 case 'A':
1415 case 'Z':
1416 case 'a':
08ca2aa3
TH
1417 if (checksum) {
1418 /* Preliminary length estimate is assumed done in 'W' */
1419 if (len > strend - s) len = strend - s;
1420 goto W_checksum;
1421 }
1422 if (utf8) {
1423 I32 l;
f7fe979e 1424 const char *hop;
08ca2aa3
TH
1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 if (hop >= strend) {
1427 if (hop > strend)
1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1429 break;
fc241834 1430 }
a6ec74c1 1431 }
08ca2aa3
TH
1432 if (hop > strend)
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 len = hop - s;
1435 } else if (len > strend - s)
1436 len = strend - s;
1437
1438 if (datumtype == 'Z') {
1439 /* 'Z' strips stuff after first null */
f7fe979e 1440 const char *ptr, *end;
f337b084
TH
1441 end = s + len;
1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1443 sv = newSVpvn(s, ptr-s);
1444 if (howlen == e_star) /* exact for 'Z*' */
1445 len = ptr-s + (ptr != strend ? 1 : 0);
1446 } else if (datumtype == 'A') {
1447 /* 'A' strips both nulls and spaces */
f7fe979e 1448 const char *ptr;
18bdf90a
TH
1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 for (ptr = s+len-1; ptr >= s; ptr--)
1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1452 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1453 if (ptr >= s) ptr += UTF8SKIP(ptr);
1454 else ptr++;
28be1210 1455 if (ptr > s+len)
18bdf90a
TH
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 } else {
1458 for (ptr = s+len-1; ptr >= s; ptr--)
1459 if (*ptr != 0 && !isSPACE(*ptr)) break;
1460 ptr++;
1461 }
08ca2aa3
TH
1462 sv = newSVpvn(s, ptr-s);
1463 } else sv = newSVpvn(s, len);
1464
1465 if (utf8) {
1466 SvUTF8_on(sv);
1467 /* Undo any upgrade done due to need_utf8() */
f337b084 1468 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1469 sv_utf8_downgrade(sv, 0);
a6ec74c1 1470 }
6e449a3a 1471 mXPUSHs(sv);
08ca2aa3 1472 s += len;
a6ec74c1
JH
1473 break;
1474 case 'B':
08ca2aa3
TH
1475 case 'b': {
1476 char *str;
49704364 1477 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1478 len = (strend - s) * 8;
1479 if (checksum) {
f337b084 1480 if (utf8)
08ca2aa3 1481 while (len >= 8 && s < strend) {
f337b084 1482 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1483 len -= 8;
1484 }
f337b084 1485 else
fc241834 1486 while (len >= 8) {
08ca2aa3 1487 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1488 len -= 8;
1489 }
08ca2aa3
TH
1490 if (len && s < strend) {
1491 U8 bits;
f337b084
TH
1492 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1493 if (datumtype == 'b')
a6ec74c1 1494 while (len-- > 0) {
92d41999 1495 if (bits & 1) cuv++;
a6ec74c1
JH
1496 bits >>= 1;
1497 }
f337b084 1498 else
a6ec74c1 1499 while (len-- > 0) {
08ca2aa3 1500 if (bits & 0x80) cuv++;
a6ec74c1
JH
1501 bits <<= 1;
1502 }
fc241834 1503 }
a6ec74c1
JH
1504 break;
1505 }
08ca2aa3 1506
561b68a9 1507 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1508 SvPOK_on(sv);
1509 str = SvPVX(sv);
1510 if (datumtype == 'b') {
f337b084 1511 U8 bits = 0;
f7fe979e 1512 const I32 ai32 = len;
08ca2aa3
TH
1513 for (len = 0; len < ai32; len++) {
1514 if (len & 7) bits >>= 1;
1515 else if (utf8) {
1516 if (s >= strend) break;
f337b084 1517 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1518 } else bits = *(U8 *) s++;
1519 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1520 }
08ca2aa3 1521 } else {
f337b084 1522 U8 bits = 0;
f7fe979e 1523 const I32 ai32 = len;
08ca2aa3
TH
1524 for (len = 0; len < ai32; len++) {
1525 if (len & 7) bits <<= 1;
1526 else if (utf8) {
1527 if (s >= strend) break;
f337b084 1528 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1529 } else bits = *(U8 *) s++;
1530 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1531 }
1532 }
1533 *str = '\0';
aa07b2f6 1534 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1535 XPUSHs(sv);
a6ec74c1 1536 break;
08ca2aa3 1537 }
a6ec74c1 1538 case 'H':
08ca2aa3
TH
1539 case 'h': {
1540 char *str;
fc241834 1541 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1542 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1543 len = (strend - s) * 2;
561b68a9 1544 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1545 SvPOK_on(sv);
1546 str = SvPVX(sv);
1547 if (datumtype == 'h') {
f337b084 1548 U8 bits = 0;
9e27e96a 1549 I32 ai32 = len;
fc241834
RGS
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 1) bits >>= 4;
1552 else if (utf8) {
1553 if (s >= strend) break;
f337b084 1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1555 } else bits = * (U8 *) s++;
a6ec74c1
JH
1556 *str++ = PL_hexdigit[bits & 15];
1557 }
08ca2aa3 1558 } else {
f337b084 1559 U8 bits = 0;
f7fe979e 1560 const I32 ai32 = len;
08ca2aa3
TH
1561 for (len = 0; len < ai32; len++) {
1562 if (len & 1) bits <<= 4;
1563 else if (utf8) {
1564 if (s >= strend) break;
f337b084 1565 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1566 } else bits = *(U8 *) s++;
a6ec74c1
JH
1567 *str++ = PL_hexdigit[(bits >> 4) & 15];
1568 }
1569 }
1570 *str = '\0';
aa07b2f6 1571 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1572 XPUSHs(sv);
a6ec74c1 1573 break;
08ca2aa3 1574 }
1651fc44
ML
1575 case 'C':
1576 if (len == 0) {
1577 if (explicit_length)
1578 /* Switch to "character" mode */
1579 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1580 break;
1581 }
1582 /* FALL THROUGH */
a6ec74c1 1583 case 'c':
1651fc44
ML
1584 while (len-- > 0 && s < strend) {
1585 int aint;
1586 if (utf8)
1587 {
1588 STRLEN retlen;
1589 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1590 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1591 if (retlen == (STRLEN) -1 || retlen == 0)
1592 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1593 s += retlen;
1594 }
1595 else
1596 aint = *(U8 *)(s)++;
1597 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1598 aint -= 256;
08ca2aa3 1599 if (!checksum)
6e449a3a 1600 mPUSHi(aint);
73cb7263
NC
1601 else if (checksum > bits_in_uv)
1602 cdouble += (NV)aint;
1603 else
1604 cuv += aint;
a6ec74c1
JH
1605 }
1606 break;
08ca2aa3
TH
1607 case 'W':
1608 W_checksum:
1651fc44 1609 if (utf8) {
08ca2aa3 1610 while (len-- > 0 && s < strend) {
08ca2aa3 1611 STRLEN retlen;
f7fe979e 1612 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1613 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1614 if (retlen == (STRLEN) -1 || retlen == 0)
1615 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1616 s += retlen;
1617 if (!checksum)
6e449a3a 1618 mPUSHu(val);
08ca2aa3
TH
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV) val;
d6d3e8bd 1621 else
08ca2aa3 1622 cuv += val;
fc241834 1623 }
08ca2aa3 1624 } else if (!checksum)
a6ec74c1 1625 while (len-- > 0) {
f7fe979e 1626 const U8 ch = *(U8 *) s++;
6e449a3a 1627 mPUSHu(ch);
a6ec74c1 1628 }
08ca2aa3
TH
1629 else if (checksum > bits_in_uv)
1630 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1631 else
1632 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1633 break;
1634 case 'U':
35bcd338 1635 if (len == 0) {
08ca2aa3
TH
1636 if (explicit_length) {
1637 /* Switch to "bytes in UTF-8" mode */
f337b084 1638 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1639 else
1640 /* Should be impossible due to the need_utf8() test */
1641 Perl_croak(aTHX_ "U0 mode on a byte string");
1642 }
35bcd338
JH
1643 break;
1644 }
08ca2aa3 1645 if (len > strend - s) len = strend - s;
fc241834 1646 if (!checksum) {
08ca2aa3
TH
1647 if (len && unpack_only_one) len = 1;
1648 EXTEND(SP, len);
1649 EXTEND_MORTAL(len);
fc241834 1650 }
08ca2aa3
TH
1651 while (len-- > 0 && s < strend) {
1652 STRLEN retlen;
1653 UV auv;
1654 if (utf8) {
1655 U8 result[UTF8_MAXLEN];
f7fe979e 1656 const char *ptr = s;
08ca2aa3 1657 STRLEN len;
08ca2aa3
TH
1658 /* Bug: warns about bad utf8 even if we are short on bytes
1659 and will break out of the loop */
230e1fce
NC
1660 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1661 'U'))
08ca2aa3
TH
1662 break;
1663 len = UTF8SKIP(result);
fc241834 1664 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1665 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1666 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1667 s = ptr;
1668 } else {
1669 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1670 if (retlen == (STRLEN) -1 || retlen == 0)
1671 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1672 s += retlen;
1673 }
1674 if (!checksum)
6e449a3a 1675 mPUSHu(auv);
73cb7263 1676 else if (checksum > bits_in_uv)
08ca2aa3 1677 cdouble += (NV) auv;
73cb7263 1678 else
08ca2aa3 1679 cuv += auv;
a6ec74c1
JH
1680 }
1681 break;
49704364
WL
1682 case 's' | TYPE_IS_SHRIEKING:
1683#if SHORTSIZE != SIZE16
73cb7263 1684 while (len-- > 0) {
08ca2aa3 1685 short ashort;
f337b084
TH
1686 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1687 DO_BO_UNPACK(ashort, s);
08ca2aa3 1688 if (!checksum)
6e449a3a 1689 mPUSHi(ashort);
73cb7263
NC
1690 else if (checksum > bits_in_uv)
1691 cdouble += (NV)ashort;
1692 else
1693 cuv += ashort;
49704364
WL
1694 }
1695 break;
1696#else
1697 /* Fallthrough! */
a6ec74c1 1698#endif
49704364 1699 case 's':
73cb7263 1700 while (len-- > 0) {
08ca2aa3
TH
1701 I16 ai16;
1702
1703#if U16SIZE > SIZE16
1704 ai16 = 0;
1705#endif
f337b084 1706 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1707 DO_BO_UNPACK(ai16, 16);
1109a392 1708#if U16SIZE > SIZE16
73cb7263
NC
1709 if (ai16 > 32767)
1710 ai16 -= 65536;
a6ec74c1 1711#endif
08ca2aa3 1712 if (!checksum)
6e449a3a 1713 mPUSHi(ai16);
73cb7263
NC
1714 else if (checksum > bits_in_uv)
1715 cdouble += (NV)ai16;
1716 else
1717 cuv += ai16;
a6ec74c1
JH
1718 }
1719 break;
49704364
WL
1720 case 'S' | TYPE_IS_SHRIEKING:
1721#if SHORTSIZE != SIZE16
73cb7263 1722 while (len-- > 0) {
08ca2aa3 1723 unsigned short aushort;
f337b084
TH
1724 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1725 DO_BO_UNPACK(aushort, s);
08ca2aa3 1726 if (!checksum)
6e449a3a 1727 mPUSHu(aushort);
73cb7263
NC
1728 else if (checksum > bits_in_uv)
1729 cdouble += (NV)aushort;
1730 else
1731 cuv += aushort;
49704364
WL
1732 }
1733 break;
1734#else
1735 /* Fallhrough! */
1736#endif
a6ec74c1
JH
1737 case 'v':
1738 case 'n':
1739 case 'S':
73cb7263 1740 while (len-- > 0) {
08ca2aa3
TH
1741 U16 au16;
1742#if U16SIZE > SIZE16
1743 au16 = 0;
1744#endif
f337b084 1745 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1746 DO_BO_UNPACK(au16, 16);
a6ec74c1 1747#ifdef HAS_NTOHS
73cb7263
NC
1748 if (datumtype == 'n')
1749 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1750#endif
1751#ifdef HAS_VTOHS
73cb7263
NC
1752 if (datumtype == 'v')
1753 au16 = vtohs(au16);
a6ec74c1 1754#endif
08ca2aa3 1755 if (!checksum)
6e449a3a 1756 mPUSHu(au16);
73cb7263 1757 else if (checksum > bits_in_uv)
f337b084 1758 cdouble += (NV) au16;
73cb7263
NC
1759 else
1760 cuv += au16;
a6ec74c1
JH
1761 }
1762 break;
7212898e 1763#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1764 case 'v' | TYPE_IS_SHRIEKING:
1765 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1766 while (len-- > 0) {
08ca2aa3
TH
1767 I16 ai16;
1768# if U16SIZE > SIZE16
1769 ai16 = 0;
1770# endif
f337b084 1771 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1772# ifdef HAS_NTOHS
73cb7263 1773 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1774 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1775# endif /* HAS_NTOHS */
1776# ifdef HAS_VTOHS
73cb7263 1777 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1778 ai16 = (I16) vtohs((U16) ai16);
1779# endif /* HAS_VTOHS */
1780 if (!checksum)
6e449a3a 1781 mPUSHi(ai16);
73cb7263 1782 else if (checksum > bits_in_uv)
08ca2aa3 1783 cdouble += (NV) ai16;
73cb7263
NC
1784 else
1785 cuv += ai16;
068bd2e7
MHM
1786 }
1787 break;
08ca2aa3 1788#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1789 case 'i':
49704364 1790 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1791 while (len-- > 0) {
08ca2aa3 1792 int aint;
f337b084
TH
1793 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1794 DO_BO_UNPACK(aint, i);
08ca2aa3 1795 if (!checksum)
6e449a3a 1796 mPUSHi(aint);
73cb7263
NC
1797 else if (checksum > bits_in_uv)
1798 cdouble += (NV)aint;
1799 else
1800 cuv += aint;
a6ec74c1
JH
1801 }
1802 break;
1803 case 'I':
49704364 1804 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1805 while (len-- > 0) {
08ca2aa3 1806 unsigned int auint;
f337b084
TH
1807 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1808 DO_BO_UNPACK(auint, i);
08ca2aa3 1809 if (!checksum)
6e449a3a 1810 mPUSHu(auint);
73cb7263
NC
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)auint;
1813 else
1814 cuv += auint;
a6ec74c1
JH
1815 }
1816 break;
92d41999 1817 case 'j':
73cb7263 1818 while (len-- > 0) {
08ca2aa3 1819 IV aiv;
f337b084 1820 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1821#if IVSIZE == INTSIZE
f337b084 1822 DO_BO_UNPACK(aiv, i);
1109a392 1823#elif IVSIZE == LONGSIZE
f337b084 1824 DO_BO_UNPACK(aiv, l);
1109a392 1825#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1826 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1827#else
1828 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1829#endif
08ca2aa3 1830 if (!checksum)
6e449a3a 1831 mPUSHi(aiv);
73cb7263
NC
1832 else if (checksum > bits_in_uv)
1833 cdouble += (NV)aiv;
1834 else
1835 cuv += aiv;
92d41999
JH
1836 }
1837 break;
1838 case 'J':
73cb7263 1839 while (len-- > 0) {
08ca2aa3 1840 UV auv;
f337b084 1841 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1842#if IVSIZE == INTSIZE
f337b084 1843 DO_BO_UNPACK(auv, i);
08ca2aa3 1844#elif IVSIZE == LONGSIZE
f337b084 1845 DO_BO_UNPACK(auv, l);
08ca2aa3 1846#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1847 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1848#else
1849 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1850#endif
08ca2aa3 1851 if (!checksum)
6e449a3a 1852 mPUSHu(auv);
73cb7263
NC
1853 else if (checksum > bits_in_uv)
1854 cdouble += (NV)auv;
1855 else
1856 cuv += auv;
92d41999
JH
1857 }
1858 break;
49704364
WL
1859 case 'l' | TYPE_IS_SHRIEKING:
1860#if LONGSIZE != SIZE32
73cb7263 1861 while (len-- > 0) {
08ca2aa3 1862 long along;
f337b084
TH
1863 SHIFT_VAR(utf8, s, strend, along, datumtype);
1864 DO_BO_UNPACK(along, l);
08ca2aa3 1865 if (!checksum)
6e449a3a 1866 mPUSHi(along);
73cb7263
NC
1867 else if (checksum > bits_in_uv)
1868 cdouble += (NV)along;
1869 else
1870 cuv += along;
49704364
WL
1871 }
1872 break;
1873#else
1874 /* Fallthrough! */
a6ec74c1 1875#endif
49704364 1876 case 'l':
73cb7263 1877 while (len-- > 0) {
08ca2aa3
TH
1878 I32 ai32;
1879#if U32SIZE > SIZE32
1880 ai32 = 0;
1881#endif
f337b084 1882 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1883 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1884#if U32SIZE > SIZE32
08ca2aa3 1885 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1886#endif
08ca2aa3 1887 if (!checksum)
6e449a3a 1888 mPUSHi(ai32);
73cb7263
NC
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)ai32;
1891 else
1892 cuv += ai32;
a6ec74c1
JH
1893 }
1894 break;
49704364
WL
1895 case 'L' | TYPE_IS_SHRIEKING:
1896#if LONGSIZE != SIZE32
73cb7263 1897 while (len-- > 0) {
08ca2aa3 1898 unsigned long aulong;
f337b084
TH
1899 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1900 DO_BO_UNPACK(aulong, l);
08ca2aa3 1901 if (!checksum)
6e449a3a 1902 mPUSHu(aulong);
73cb7263
NC
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)aulong;
1905 else
1906 cuv += aulong;
49704364
WL
1907 }
1908 break;
1909#else
1910 /* Fall through! */
1911#endif
a6ec74c1
JH
1912 case 'V':
1913 case 'N':
1914 case 'L':
73cb7263 1915 while (len-- > 0) {
08ca2aa3
TH
1916 U32 au32;
1917#if U32SIZE > SIZE32
1918 au32 = 0;
1919#endif
f337b084 1920 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1921 DO_BO_UNPACK(au32, 32);
a6ec74c1 1922#ifdef HAS_NTOHL
73cb7263
NC
1923 if (datumtype == 'N')
1924 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1925#endif
1926#ifdef HAS_VTOHL
73cb7263
NC
1927 if (datumtype == 'V')
1928 au32 = vtohl(au32);
a6ec74c1 1929#endif
08ca2aa3 1930 if (!checksum)
6e449a3a 1931 mPUSHu(au32);
fc241834
RGS
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)au32;
1934 else
1935 cuv += au32;
a6ec74c1
JH
1936 }
1937 break;
7212898e 1938#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1939 case 'V' | TYPE_IS_SHRIEKING:
1940 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1941 while (len-- > 0) {
08ca2aa3
TH
1942 I32 ai32;
1943# if U32SIZE > SIZE32
1944 ai32 = 0;
1945# endif
f337b084 1946 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1947# ifdef HAS_NTOHL
73cb7263
NC
1948 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1949 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1950# endif
1951# ifdef HAS_VTOHL
73cb7263
NC
1952 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1953 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1954# endif
1955 if (!checksum)
6e449a3a 1956 mPUSHi(ai32);
73cb7263
NC
1957 else if (checksum > bits_in_uv)
1958 cdouble += (NV)ai32;
1959 else
1960 cuv += ai32;
068bd2e7
MHM
1961 }
1962 break;
08ca2aa3 1963#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1964 case 'p':
a6ec74c1 1965 while (len-- > 0) {
f7fe979e 1966 const char *aptr;
f337b084 1967 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1968 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1969 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1970 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1971 }
1972 break;
1973 case 'w':
a6ec74c1
JH
1974 {
1975 UV auv = 0;
1976 U32 bytes = 0;
fc241834 1977
08ca2aa3
TH
1978 while (len > 0 && s < strend) {
1979 U8 ch;
f337b084 1980 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1981 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1982 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1983 if (ch < 0x80) {
a6ec74c1 1984 bytes = 0;
6e449a3a 1985 mPUSHu(auv);
a6ec74c1
JH
1986 len--;
1987 auv = 0;
08ca2aa3 1988 continue;
a6ec74c1 1989 }
08ca2aa3 1990 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1991 const char *t;
a6ec74c1 1992
f5992bc4 1993 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1994 while (s < strend) {
f337b084 1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1996 sv = mul128(sv, (U8)(ch & 0x7f));
1997 if (!(ch & 0x80)) {
a6ec74c1
JH
1998 bytes = 0;
1999 break;
2000 }
2001 }
10516c54 2002 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2003 while (*t == '0')
2004 t++;
2005 sv_chop(sv, t);
6e449a3a 2006 mPUSHs(sv);
a6ec74c1
JH
2007 len--;
2008 auv = 0;
2009 }
2010 }
2011 if ((s >= strend) && bytes)
49704364 2012 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2013 }
2014 break;
2015 case 'P':
49704364
WL
2016 if (symptr->howlen == e_star)
2017 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2018 EXTEND(SP, 1);
2d3e0934 2019 if (s + sizeof(char*) <= strend) {
08ca2aa3 2020 char *aptr;
f337b084 2021 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2022 DO_BO_UNPACK_PC(aptr);
fc241834 2023 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2024 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2025 }
a6ec74c1
JH
2026 break;
2027#ifdef HAS_QUAD
2028 case 'q':
73cb7263 2029 while (len-- > 0) {
08ca2aa3 2030 Quad_t aquad;
f337b084
TH
2031 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2032 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2033 if (!checksum)
6e449a3a
MHM
2034 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2035 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2036 else if (checksum > bits_in_uv)
2037 cdouble += (NV)aquad;
2038 else
2039 cuv += aquad;
2040 }
a6ec74c1
JH
2041 break;
2042 case 'Q':
73cb7263 2043 while (len-- > 0) {
08ca2aa3 2044 Uquad_t auquad;
f337b084
TH
2045 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2046 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2047 if (!checksum)
6e449a3a
MHM
2048 mPUSHs(auquad <= UV_MAX ?
2049 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2050 else if (checksum > bits_in_uv)
2051 cdouble += (NV)auquad;
2052 else
2053 cuv += auquad;
a6ec74c1
JH
2054 }
2055 break;
08ca2aa3 2056#endif /* HAS_QUAD */
a6ec74c1
JH
2057 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2058 case 'f':
73cb7263 2059 while (len-- > 0) {
08ca2aa3 2060 float afloat;
f337b084 2061 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2062 DO_BO_UNPACK_N(afloat, float);
2063 if (!checksum)
6e449a3a 2064 mPUSHn(afloat);
08ca2aa3 2065 else
73cb7263 2066 cdouble += afloat;
fc241834 2067 }
a6ec74c1
JH
2068 break;
2069 case 'd':
73cb7263 2070 while (len-- > 0) {
08ca2aa3 2071 double adouble;
f337b084 2072 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2073 DO_BO_UNPACK_N(adouble, double);
2074 if (!checksum)
6e449a3a 2075 mPUSHn(adouble);
08ca2aa3 2076 else
73cb7263 2077 cdouble += adouble;
fc241834 2078 }
a6ec74c1 2079 break;
92d41999 2080 case 'F':
73cb7263 2081 while (len-- > 0) {
08ca2aa3 2082 NV anv;
f337b084 2083 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2084 DO_BO_UNPACK_N(anv, NV);
2085 if (!checksum)
6e449a3a 2086 mPUSHn(anv);
08ca2aa3 2087 else
73cb7263 2088 cdouble += anv;
fc241834 2089 }
92d41999
JH
2090 break;
2091#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2092 case 'D':
73cb7263 2093 while (len-- > 0) {
08ca2aa3 2094 long double aldouble;
f337b084 2095 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2096 DO_BO_UNPACK_N(aldouble, long double);
2097 if (!checksum)
6e449a3a 2098 mPUSHn(aldouble);
08ca2aa3
TH
2099 else
2100 cdouble += aldouble;
92d41999
JH
2101 }
2102 break;
2103#endif
a6ec74c1 2104 case 'u':
08ca2aa3 2105 {
f7fe979e 2106 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2107 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2108 if (l) SvPOK_on(sv);
2109 }
2110 if (utf8) {
2111 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2112 I32 a, b, c, d;
db187877 2113 char hunk[3];
08ca2aa3 2114
08ca2aa3
TH
2115 while (len > 0) {
2116 next_uni_uu(aTHX_ &s, strend, &a);
2117 next_uni_uu(aTHX_ &s, strend, &b);
2118 next_uni_uu(aTHX_ &s, strend, &c);
2119 next_uni_uu(aTHX_ &s, strend, &d);
2120 hunk[0] = (char)((a << 2) | (b >> 4));
2121 hunk[1] = (char)((b << 4) | (c >> 2));
2122 hunk[2] = (char)((c << 6) | d);
2123 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2124 len -= 3;
2125 }
2126 if (s < strend) {
f7fe979e
AL
2127 if (*s == '\n') {
2128 s++;
2129 }
08ca2aa3
TH
2130 else {
2131 /* possible checksum byte */
f7fe979e
AL
2132 const char *skip = s+UTF8SKIP(s);
2133 if (skip < strend && *skip == '\n')
2134 s = skip+1;
08ca2aa3
TH
2135 }
2136 }
2137 }
2138 } else {
fc241834
RGS
2139 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2140 I32 a, b, c, d;
db187877 2141 char hunk[3];
a6ec74c1 2142
fc241834
RGS
2143 len = PL_uudmap[*(U8*)s++] & 077;
2144 while (len > 0) {
2145 if (s < strend && ISUUCHAR(*s))
2146 a = PL_uudmap[*(U8*)s++] & 077;
2147 else
2148 a = 0;
2149 if (s < strend && ISUUCHAR(*s))
2150 b = PL_uudmap[*(U8*)s++] & 077;
2151 else
2152 b = 0;
2153 if (s < strend && ISUUCHAR(*s))
2154 c = PL_uudmap[*(U8*)s++] & 077;
2155 else
2156 c = 0;
2157 if (s < strend && ISUUCHAR(*s))
2158 d = PL_uudmap[*(U8*)s++] & 077;
2159 else
2160 d = 0;
2161 hunk[0] = (char)((a << 2) | (b >> 4));
2162 hunk[1] = (char)((b << 4) | (c >> 2));
2163 hunk[2] = (char)((c << 6) | d);
2164 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2165 len -= 3;
2166 }
2167 if (*s == '\n')
2168 s++;
2169 else /* possible checksum byte */
2170 if (s + 1 < strend && s[1] == '\n')
2171 s += 2;
a6ec74c1 2172 }
08ca2aa3
TH
2173 }
2174 XPUSHs(sv);
a6ec74c1
JH
2175 break;
2176 }
49704364 2177
a6ec74c1 2178 if (checksum) {
1109a392 2179 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2180 (checksum > bits_in_uv &&
08ca2aa3
TH
2181 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2182 NV trouble, anv;
a6ec74c1 2183
08ca2aa3 2184 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2185 while (checksum >= 16) {
2186 checksum -= 16;
08ca2aa3 2187 anv *= 65536.0;
a6ec74c1 2188 }
a6ec74c1 2189 while (cdouble < 0.0)
08ca2aa3
TH
2190 cdouble += anv;
2191 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2192 sv = newSVnv(cdouble);
a6ec74c1
JH
2193 }
2194 else {
fa8ec7c1
NC
2195 if (checksum < bits_in_uv) {
2196 UV mask = ((UV)1 << checksum) - 1;
92d41999 2197 cuv &= mask;
a6ec74c1 2198 }
c4c5f44a 2199 sv = newSVuv(cuv);
a6ec74c1 2200 }
6e449a3a 2201 mXPUSHs(sv);
a6ec74c1
JH
2202 checksum = 0;
2203 }
fc241834 2204
49704364
WL
2205 if (symptr->flags & FLAG_SLASH){
2206 if (SP - PL_stack_base - start_sp_offset <= 0)
2207 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2208 if( next_symbol(symptr) ){
2209 if( symptr->howlen == e_number )
2210 Perl_croak(aTHX_ "Count after length/code in unpack" );
2211 if( beyond ){
2212 /* ...end of char buffer then no decent length available */
2213 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2214 } else {
2215 /* take top of stack (hope it's numeric) */
2216 len = POPi;
2217 if( len < 0 )
2218 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2219 }
2220 } else {
2221 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2222 }
2223 datumtype = symptr->code;
21c16052 2224 explicit_length = FALSE;
49704364
WL
2225 goto redo_switch;
2226 }
a6ec74c1 2227 }
49704364 2228
18529408
IZ
2229 if (new_s)
2230 *new_s = s;
2231 PUTBACK;
2232 return SP - PL_stack_base - start_sp_offset;
2233}
2234
2235PP(pp_unpack)
2236{
97aff369 2237 dVAR;
18529408 2238 dSP;
bab9c0ac 2239 dPOPPOPssrl;
18529408
IZ
2240 I32 gimme = GIMME_V;
2241 STRLEN llen;
2242 STRLEN rlen;
5c144d81
NC
2243 const char *pat = SvPV_const(left, llen);
2244 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2245 const char *strend = s + rlen;
2246 const char *patend = pat + llen;
08ca2aa3 2247 I32 cnt;
18529408
IZ
2248
2249 PUTBACK;
7accc089 2250 cnt = unpackstring(pat, patend, s, strend,
49704364 2251 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2252 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2253
18529408
IZ
2254 SPAGAIN;
2255 if ( !cnt && gimme == G_SCALAR )
2256 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2257 RETURN;
2258}
2259
f337b084 2260STATIC U8 *
f7fe979e 2261doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2262{
f337b084 2263 *h++ = PL_uuemap[len];
a6ec74c1 2264 while (len > 2) {
f337b084
TH
2265 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2266 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2267 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2268 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2269 s += 3;
2270 len -= 3;
2271 }
2272 if (len > 0) {
f7fe979e 2273 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2274 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2275 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2276 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2277 *h++ = PL_uuemap[0];
a6ec74c1 2278 }
f337b084
TH
2279 *h++ = '\n';
2280 return h;
a6ec74c1
JH
2281}
2282
2283STATIC SV *
f7fe979e 2284S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2285{
8b6e33c7
AL
2286 SV *result = newSVpvn(s, l);
2287 char *const result_c = SvPV_nolen(result); /* convenience */
2288 char *out = result_c;
2289 bool skip = 1;
2290 bool ignore = 0;
a6ec74c1 2291
7918f24d
NC
2292 PERL_ARGS_ASSERT_IS_AN_INT;
2293
a6ec74c1
JH
2294 while (*s) {
2295 switch (*s) {
2296 case ' ':
2297 break;
2298 case '+':
2299 if (!skip) {
2300 SvREFCNT_dec(result);
2301 return (NULL);
2302 }
2303 break;
2304 case '0':
2305 case '1':
2306 case '2':
2307 case '3':
2308 case '4':
2309 case '5':
2310 case '6':
2311 case '7':
2312 case '8':
2313 case '9':
2314 skip = 0;
2315 if (!ignore) {
2316 *(out++) = *s;
2317 }
2318 break;
2319 case '.':
2320 ignore = 1;
2321 break;
2322 default:
2323 SvREFCNT_dec(result);
2324 return (NULL);
2325 }
2326 s++;
2327 }
2328 *(out++) = '\0';
2329 SvCUR_set(result, out - result_c);
2330 return (result);
2331}
2332
2333/* pnum must be '\0' terminated */
2334STATIC int
2335S_div128(pTHX_ SV *pnum, bool *done)
2336{
8b6e33c7
AL
2337 STRLEN len;
2338 char * const s = SvPV(pnum, len);
2339 char *t = s;
2340 int m = 0;
2341
7918f24d
NC
2342 PERL_ARGS_ASSERT_DIV128;
2343
8b6e33c7
AL
2344 *done = 1;
2345 while (*t) {
2346 const int i = m * 10 + (*t - '0');
2347 const int r = (i >> 7); /* r < 10 */
2348 m = i & 0x7F;
2349 if (r) {
2350 *done = 0;
2351 }
2352 *(t++) = '0' + r;
a6ec74c1 2353 }
8b6e33c7
AL
2354 *(t++) = '\0';
2355 SvCUR_set(pnum, (STRLEN) (t - s));
2356 return (m);
a6ec74c1
JH
2357}
2358
18529408 2359/*
7accc089
JH
2360=for apidoc packlist
2361
2362The engine implementing pack() Perl function.
2363
bfce84ec
AL
2364=cut
2365*/
7accc089
JH
2366
2367void
f7fe979e 2368Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2369{
97aff369 2370 dVAR;
aadb217d
JH
2371 tempsym_t sym;
2372
7918f24d
NC
2373 PERL_ARGS_ASSERT_PACKLIST;
2374
f7fe979e 2375 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2376
f337b084
TH
2377 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2378 Also make sure any UTF8 flag is loaded */
56eb0262 2379 SvPV_force_nolen(cat);
bfce84ec
AL
2380 if (DO_UTF8(cat))
2381 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2382
49704364
WL
2383 (void)pack_rec( cat, &sym, beglist, endlist );
2384}
2385
f337b084
TH
2386/* like sv_utf8_upgrade, but also repoint the group start markers */
2387STATIC void
2388marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2389 STRLEN len;
2390 tempsym_t *group;
f7fe979e
AL
2391 const char *from_ptr, *from_start, *from_end, **marks, **m;
2392 char *to_start, *to_ptr;
f337b084
TH
2393
2394 if (SvUTF8(sv)) return;
2395
aa07b2f6 2396 from_start = SvPVX_const(sv);
f337b084
TH
2397 from_end = from_start + SvCUR(sv);
2398 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2399 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2400 if (from_ptr == from_end) {
2401 /* Simple case: no character needs to be changed */
2402 SvUTF8_on(sv);
2403 return;
2404 }
2405
3473cf63 2406 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2407 Newx(to_start, len, char);
f337b084
TH
2408 Copy(from_start, to_start, from_ptr-from_start, char);
2409 to_ptr = to_start + (from_ptr-from_start);
2410
a02a5408 2411 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2412 for (group=sym_ptr; group; group = group->previous)
2413 marks[group->level] = from_start + group->strbeg;
2414 marks[sym_ptr->level+1] = from_end+1;
2415 for (m = marks; *m < from_ptr; m++)
2416 *m = to_start + (*m-from_start);
2417
2418 for (;from_ptr < from_end; from_ptr++) {
2419 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2420 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2421 }
2422 *to_ptr = 0;
2423
2424 while (*m == from_ptr) *m++ = to_ptr;
2425 if (m != marks + sym_ptr->level+1) {
2426 Safefree(marks);
2427 Safefree(to_start);
2428 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2429 }
2430 for (group=sym_ptr; group; group = group->previous)
2431 group->strbeg = marks[group->level] - to_start;
2432 Safefree(marks);
2433
2434 if (SvOOK(sv)) {
2435 if (SvIVX(sv)) {
b162af07 2436 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2437 from_start -= SvIVX(sv);
2438 SvIV_set(sv, 0);
2439 }
2440 SvFLAGS(sv) &= ~SVf_OOK;
2441 }
2442 if (SvLEN(sv) != 0)
2443 Safefree(from_start);
f880fe2f 2444 SvPV_set(sv, to_start);
b162af07
SP
2445 SvCUR_set(sv, to_ptr - to_start);
2446 SvLEN_set(sv, len);
f337b084
TH
2447 SvUTF8_on(sv);
2448}
2449
2450/* Exponential string grower. Makes string extension effectively O(n)
2451 needed says how many extra bytes we need (not counting the final '\0')
2452 Only grows the string if there is an actual lack of space
2453*/
2454STATIC char *
0bd48802 2455S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2456 const STRLEN cur = SvCUR(sv);
2457 const STRLEN len = SvLEN(sv);
f337b084 2458 STRLEN extend;
7918f24d
NC
2459
2460 PERL_ARGS_ASSERT_SV_EXP_GROW;
2461
f337b084
TH
2462 if (len - cur > needed) return SvPVX(sv);
2463 extend = needed > len ? needed : len;
2464 return SvGROW(sv, len+extend+1);
2465}
49704364
WL
2466
2467STATIC
2468SV **
f337b084 2469S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2470{
97aff369 2471 dVAR;
49704364 2472 tempsym_t lookahead;
f337b084
TH
2473 I32 items = endlist - beglist;
2474 bool found = next_symbol(symptr);
2475 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2476 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2477
7918f24d
NC
2478 PERL_ARGS_ASSERT_PACK_REC;
2479
f337b084
TH
2480 if (symptr->level == 0 && found && symptr->code == 'U') {
2481 marked_upgrade(aTHX_ cat, symptr);
2482 symptr->flags |= FLAG_DO_UTF8;
2483 utf8 = 0;
49704364 2484 }
f337b084 2485 symptr->strbeg = SvCUR(cat);
49704364
WL
2486
2487 while (found) {
f337b084
TH
2488 SV *fromstr;
2489 STRLEN fromlen;
2490 I32 len;
a0714e2c 2491 SV *lengthcode = NULL;
49704364 2492 I32 datumtype = symptr->code;
f337b084
TH
2493 howlen_t howlen = symptr->howlen;
2494 char *start = SvPVX(cat);
2495 char *cur = start + SvCUR(cat);
49704364 2496
f337b084
TH
2497#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2498
2499 switch (howlen) {
fc241834 2500 case e_star:
f337b084
TH
2501 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2502 0 : items;
2503 break;
2504 default:
2505 /* e_no_len and e_number */
2506 len = symptr->length;
49704364
WL
2507 break;
2508 }
2509
f337b084 2510 if (len) {
a7a3cfaa 2511 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2512
a7a3cfaa
TH
2513 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2514 /* We can process this letter. */
2515 STRLEN size = props & PACK_SIZE_MASK;
2516 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2517 }
f337b084
TH
2518 }
2519
49704364
WL
2520 /* Look ahead for next symbol. Do we have code/code? */
2521 lookahead = *symptr;
2522 found = next_symbol(&lookahead);
246f24af
TH
2523 if (symptr->flags & FLAG_SLASH) {
2524 IV count;
f337b084 2525 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2526 if (strchr("aAZ", lookahead.code)) {
2527 if (lookahead.howlen == e_number) count = lookahead.length;
2528 else {
ce399ba6
NC
2529 if (items > 0) {
2530 if (SvGAMAGIC(*beglist)) {
2531 /* Avoid reading the active data more than once
2532 by copying it to a temporary. */
2533 STRLEN len;
2534 const char *const pv = SvPV_const(*beglist, len);
740cce10 2535 SV *const temp
59cd0e26
NC
2536 = newSVpvn_flags(pv, len,
2537 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2538 *beglist = temp;
2539 }
246f24af
TH
2540 count = DO_UTF8(*beglist) ?
2541 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2542 }
246f24af
TH
2543 else count = 0;
2544 if (lookahead.code == 'Z') count++;
2545 }
2546 } else {
2547 if (lookahead.howlen == e_number && lookahead.length < items)
2548 count = lookahead.length;
2549 else count = items;
2550 }
2551 lookahead.howlen = e_number;
2552 lookahead.length = count;
2553 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2554 }
49704364 2555
fc241834
RGS
2556 /* Code inside the switch must take care to properly update
2557 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2558 doesn't simply leave using break */
1109a392 2559 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2560 default:
f337b084
TH
2561 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2562 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2563 case '%':
49704364 2564 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2565 {
2566 char *from;
2567#ifdef PERL_PACK_CAN_SHRIEKSIGN
2568 case '.' | TYPE_IS_SHRIEKING:
2569#endif
2570 case '.':
2571 if (howlen == e_star) from = start;
2572 else if (len == 0) from = cur;
2573 else {
2574 tempsym_t *group = symptr;
2575
2576 while (--len && group) group = group->previous;
2577 from = group ? start + group->strbeg : start;
2578 }
2579 fromstr = NEXTFROM;
2580 len = SvIV(fromstr);
2581 goto resize;
2582#ifdef PERL_PACK_CAN_SHRIEKSIGN
2583 case '@' | TYPE_IS_SHRIEKING:
2584#endif
a6ec74c1 2585 case '@':
28be1210
TH
2586 from = start + symptr->strbeg;
2587 resize:
2588#ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2590#else /* PERL_PACK_CAN_SHRIEKSIGN */
2591 if (utf8)
2592#endif
2593 if (len >= 0) {
2594 while (len && from < cur) {
2595 from += UTF8SKIP(from);
2596 len--;
2597 }
2598 if (from > cur)
2599 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2600 if (len) {
2601 /* Here we know from == cur */
2602 grow:
2603 GROWING(0, cat, start, cur, len);
2604 Zero(cur, len, char);
2605 cur += len;
2606 } else if (from < cur) {
2607 len = cur - from;
2608 goto shrink;
2609 } else goto no_change;
2610 } else {
2611 cur = from;
2612 len = -len;
2613 goto utf8_shrink;
f337b084 2614 }
28be1210
TH
2615 else {
2616 len -= cur - from;
f337b084 2617 if (len > 0) goto grow;
28be1210 2618 if (len == 0) goto no_change;
fc241834 2619 len = -len;
28be1210 2620 goto shrink;
f337b084 2621 }
a6ec74c1 2622 break;
28be1210 2623 }
fc241834 2624 case '(': {
49704364 2625 tempsym_t savsym = *symptr;
66c611c5
MHM
2626 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2627 symptr->flags |= group_modifiers;
49704364
WL
2628 symptr->patend = savsym.grpend;
2629 symptr->level++;
f337b084 2630 symptr->previous = &lookahead;
18529408 2631 while (len--) {
f337b084
TH
2632 U32 was_utf8;
2633 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2634 else symptr->flags &= ~FLAG_PARSE_UTF8;
2635 was_utf8 = SvUTF8(cat);
49704364 2636 symptr->patptr = savsym.grpbeg;
f337b084
TH
2637 beglist = pack_rec(cat, symptr, beglist, endlist);
2638 if (SvUTF8(cat) != was_utf8)
2639 /* This had better be an upgrade while in utf8==0 mode */
2640 utf8 = 1;
2641
49704364 2642 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2643 break; /* No way to continue */
2644 }
ee790063 2645 items = endlist - beglist;
f337b084
TH
2646 lookahead.flags = symptr->flags & ~group_modifiers;
2647 goto no_change;
18529408 2648 }
62f95557
IZ
2649 case 'X' | TYPE_IS_SHRIEKING:
2650 if (!len) /* Avoid division by 0 */
2651 len = 1;
f337b084
TH
2652 if (utf8) {
2653 char *hop, *last;
2654 I32 l = len;
2655 hop = last = start;
2656 while (hop < cur) {
2657 hop += UTF8SKIP(hop);
2658 if (--l == 0) {
2659 last = hop;
2660 l = len;
2661 }
2662 }
2663 if (last > cur)
2664 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2665 cur = last;
2666 break;
2667 }
2668 len = (cur-start) % len;
62f95557 2669 /* FALL THROUGH */
a6ec74c1 2670 case 'X':
f337b084
TH
2671 if (utf8) {
2672 if (len < 1) goto no_change;
28be1210 2673 utf8_shrink:
f337b084
TH
2674 while (len > 0) {
2675 if (cur <= start)
28be1210
TH
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2678 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2679 if (cur <= start)
28be1210
TH
2680 Perl_croak(aTHX_ "'%c' outside of string in pack",
2681 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2682 }
2683 len--;
2684 }
2685 } else {
fc241834 2686 shrink:
f337b084 2687 if (cur - start < len)
28be1210
TH
2688 Perl_croak(aTHX_ "'%c' outside of string in pack",
2689 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2690 cur -= len;
2691 }
2692 if (cur < start+symptr->strbeg) {
2693 /* Make sure group starts don't point into the void */
2694 tempsym_t *group;
9e27e96a 2695 const STRLEN length = cur-start;
f337b084
TH
2696 for (group = symptr;
2697 group && length < group->strbeg;
2698 group = group->previous) group->strbeg = length;
2699 lookahead.strbeg = length;
2700 }
a6ec74c1 2701 break;
fc241834
RGS
2702 case 'x' | TYPE_IS_SHRIEKING: {
2703 I32 ai32;
62f95557
IZ
2704 if (!len) /* Avoid division by 0 */
2705 len = 1;
230e1fce 2706 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2707 else ai32 = (cur - start) % len;
2708 if (ai32 == 0) goto no_change;
2709 len -= ai32;
2710 }
2711 /* FALL THROUGH */
a6ec74c1 2712 case 'x':
f337b084 2713 goto grow;
a6ec74c1
JH
2714 case 'A':
2715 case 'Z':
f337b084 2716 case 'a': {
f7fe979e 2717 const char *aptr;
f337b084 2718
a6ec74c1 2719 fromstr = NEXTFROM;
e62f0680 2720 aptr = SvPV_const(fromstr, fromlen);
f337b084 2721 if (DO_UTF8(fromstr)) {
f7fe979e 2722 const char *end, *s;
f337b084
TH
2723
2724 if (!utf8 && !SvUTF8(cat)) {
2725 marked_upgrade(aTHX_ cat, symptr);
2726 lookahead.flags |= FLAG_DO_UTF8;
2727 lookahead.strbeg = symptr->strbeg;
2728 utf8 = 1;
2729 start = SvPVX(cat);
2730 cur = start + SvCUR(cat);
2731 }
fc241834 2732 if (howlen == e_star) {
f337b084
TH
2733 if (utf8) goto string_copy;
2734 len = fromlen+1;
2735 }
2736 s = aptr;
2737 end = aptr + fromlen;
2738 fromlen = datumtype == 'Z' ? len-1 : len;
2739 while ((I32) fromlen > 0 && s < end) {
2740 s += UTF8SKIP(s);
2741 fromlen--;
2742 }
2743 if (s > end)
2744 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2745 if (utf8) {
fc241834 2746 len = fromlen;
f337b084
TH
2747 if (datumtype == 'Z') len++;
2748 fromlen = s-aptr;
2749 len += fromlen;
fc241834 2750
f337b084 2751 goto string_copy;
fc241834 2752 }
f337b084
TH
2753 fromlen = len - fromlen;
2754 if (datumtype == 'Z') fromlen--;
2755 if (howlen == e_star) {
2756 len = fromlen;
2757 if (datumtype == 'Z') len++;
fc241834 2758 }
f337b084 2759 GROWING(0, cat, start, cur, len);
fc241834 2760 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2761 datumtype | TYPE_IS_PACK))
2762 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2763 cur += fromlen;
a6ec74c1 2764 len -= fromlen;
f337b084
TH
2765 } else if (utf8) {
2766 if (howlen == e_star) {
2767 len = fromlen;
2768 if (datumtype == 'Z') len++;
a6ec74c1 2769 }
f337b084
TH
2770 if (len <= (I32) fromlen) {
2771 fromlen = len;
2772 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2773 }
fc241834 2774 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2775 upgrade, so:
2776 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2777 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2778 len -= fromlen;
2779 while (fromlen > 0) {
230e1fce 2780 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2781 aptr++;
2782 fromlen--;
fc241834 2783 }
f337b084
TH
2784 } else {
2785 string_copy:
2786 if (howlen == e_star) {
2787 len = fromlen;
2788 if (datumtype == 'Z') len++;
2789 }
2790 if (len <= (I32) fromlen) {
2791 fromlen = len;
2792 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2793 }
f337b084
TH
2794 GROWING(0, cat, start, cur, len);
2795 Copy(aptr, cur, fromlen, char);
2796 cur += fromlen;
2797 len -= fromlen;
a6ec74c1 2798 }
f337b084
TH
2799 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2800 cur += len;
3c4fb04a 2801 SvTAINT(cat);
a6ec74c1 2802 break;
f337b084 2803 }
a6ec74c1 2804 case 'B':
f337b084 2805 case 'b': {
b83604b4 2806 const char *str, *end;
f337b084
TH
2807 I32 l, field_len;
2808 U8 bits;
2809 bool utf8_source;
2810 U32 utf8_flags;
a6ec74c1 2811
fc241834 2812 fromstr = NEXTFROM;
b83604b4 2813 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2814 end = str + fromlen;
2815 if (DO_UTF8(fromstr)) {
2816 utf8_source = TRUE;
041457d9 2817 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2818 } else {
2819 utf8_source = FALSE;
2820 utf8_flags = 0; /* Unused, but keep compilers happy */
2821 }
2822 if (howlen == e_star) len = fromlen;
2823 field_len = (len+7)/8;
2824 GROWING(utf8, cat, start, cur, field_len);
2825 if (len > (I32)fromlen) len = fromlen;
2826 bits = 0;
2827 l = 0;
2828 if (datumtype == 'B')
2829 while (l++ < len) {
2830 if (utf8_source) {
95b63a38 2831 UV val = 0;
f337b084
TH
2832 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2833 bits |= val & 1;
2834 } else bits |= *str++ & 1;
2835 if (l & 7) bits <<= 1;
fc241834 2836 else {
f337b084
TH
2837 PUSH_BYTE(utf8, cur, bits);
2838 bits = 0;
a6ec74c1
JH
2839 }
2840 }
f337b084
TH
2841 else
2842 /* datumtype == 'b' */
2843 while (l++ < len) {
2844 if (utf8_source) {
95b63a38 2845 UV val = 0;
f337b084
TH
2846 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2847 if (val & 1) bits |= 0x80;
2848 } else if (*str++ & 1)
2849 bits |= 0x80;
2850 if (l & 7) bits >>= 1;
fc241834 2851 else {
f337b084
TH
2852 PUSH_BYTE(utf8, cur, bits);
2853 bits = 0;
a6ec74c1
JH
2854 }
2855 }
f337b084
TH
2856 l--;
2857 if (l & 7) {
fc241834 2858 if (datumtype == 'B')
f337b084 2859 bits <<= 7 - (l & 7);
fc241834 2860 else
f337b084
TH
2861 bits >>= 7 - (l & 7);
2862 PUSH_BYTE(utf8, cur, bits);
2863 l += 7;
a6ec74c1 2864 }
f337b084
TH
2865 /* Determine how many chars are left in the requested field */
2866 l /= 8;
2867 if (howlen == e_star) field_len = 0;
2868 else field_len -= l;
2869 Zero(cur, field_len, char);
2870 cur += field_len;
a6ec74c1 2871 break;
f337b084 2872 }
a6ec74c1 2873 case 'H':
f337b084 2874 case 'h': {
10516c54 2875 const char *str, *end;
f337b084
TH
2876 I32 l, field_len;
2877 U8 bits;
2878 bool utf8_source;
2879 U32 utf8_flags;
a6ec74c1 2880
fc241834 2881 fromstr = NEXTFROM;
10516c54 2882 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2883 end = str + fromlen;
2884 if (DO_UTF8(fromstr)) {
2885 utf8_source = TRUE;
041457d9 2886 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2887 } else {
2888 utf8_source = FALSE;
2889 utf8_flags = 0; /* Unused, but keep compilers happy */
2890 }
2891 if (howlen == e_star) len = fromlen;
2892 field_len = (len+1)/2;
2893 GROWING(utf8, cat, start, cur, field_len);
2894 if (!utf8 && len > (I32)fromlen) len = fromlen;
2895 bits = 0;
2896 l = 0;
2897 if (datumtype == 'H')
2898 while (l++ < len) {
2899 if (utf8_source) {
95b63a38 2900 UV val = 0;
f337b084
TH
2901 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2902 if (val < 256 && isALPHA(val))
2903 bits |= (val + 9) & 0xf;
a6ec74c1 2904 else
f337b084
TH
2905 bits |= val & 0xf;
2906 } else if (isALPHA(*str))
2907 bits |= (*str++ + 9) & 0xf;
2908 else
2909 bits |= *str++ & 0xf;
2910 if (l & 1) bits <<= 4;
fc241834 2911 else {
f337b084
TH
2912 PUSH_BYTE(utf8, cur, bits);
2913 bits = 0;
a6ec74c1
JH
2914 }
2915 }
f337b084
TH
2916 else
2917 while (l++ < len) {
2918 if (utf8_source) {
95b63a38 2919 UV val = 0;
f337b084
TH
2920 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2921 if (val < 256 && isALPHA(val))
2922 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2923 else
f337b084
TH
2924 bits |= (val & 0xf) << 4;
2925 } else if (isALPHA(*str))
2926 bits |= ((*str++ + 9) & 0xf) << 4;
2927 else
2928 bits |= (*str++ & 0xf) << 4;
2929 if (l & 1) bits >>= 4;
fc241834 2930 else {
f337b084
TH
2931 PUSH_BYTE(utf8, cur, bits);
2932 bits = 0;
a6ec74c1 2933 }
fc241834 2934 }
f337b084
TH
2935 l--;
2936 if (l & 1) {
2937 PUSH_BYTE(utf8, cur, bits);
2938 l++;
2939 }
2940 /* Determine how many chars are left in the requested field */
2941 l /= 2;
2942 if (howlen == e_star) field_len = 0;
2943 else field_len -= l;
2944 Zero(cur, field_len, char);
2945 cur += field_len;
2946 break;
fc241834
RGS
2947 }
2948 case 'c':
f337b084
TH
2949 while (len-- > 0) {
2950 IV aiv;
2951 fromstr = NEXTFROM;
2952 aiv = SvIV(fromstr);
2953 if ((-128 > aiv || aiv > 127) &&
2954 ckWARN(WARN_PACK))
2955 Perl_warner(aTHX_ packWARN(WARN_PACK),
2956 "Character in 'c' format wrapped in pack");
585ec06d 2957 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2958 }
2959 break;
2960 case 'C':
f337b084
TH
2961 if (len == 0) {
2962 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2963 break;
2964 }
a6ec74c1 2965 while (len-- > 0) {
f337b084 2966 IV aiv;
a6ec74c1 2967 fromstr = NEXTFROM;
f337b084
TH
2968 aiv = SvIV(fromstr);
2969 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2970 ckWARN(WARN_PACK))
2971 Perl_warner(aTHX_ packWARN(WARN_PACK),
2972 "Character in 'C' format wrapped in pack");
1651fc44 2973 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2974 }
fc241834
RGS
2975 break;
2976 case 'W': {
2977 char *end;
670f1322 2978 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2979
2980 end = start+SvLEN(cat)-1;
2981 if (utf8) end -= UTF8_MAXLEN-1;
2982 while (len-- > 0) {
2983 UV auv;
2984 fromstr = NEXTFROM;
2985 auv = SvUV(fromstr);
2986 if (in_bytes) auv = auv % 0x100;
2987 if (utf8) {
2988 W_utf8:
2989 if (cur > end) {
2990 *cur = '\0';
b162af07 2991 SvCUR_set(cat, cur - start);
fc241834
RGS
2992
2993 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2994 end = start+SvLEN(cat)-UTF8_MAXLEN;
2995 }
230e1fce
NC
2996 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2997 NATIVE_TO_UNI(auv),
041457d9 2998 warn_utf8 ?
230e1fce 2999 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3000 } else {
3001 if (auv >= 0x100) {
3002 if (!SvUTF8(cat)) {
3003 *cur = '\0';
b162af07 3004 SvCUR_set(cat, cur - start);
fc241834
RGS
3005 marked_upgrade(aTHX_ cat, symptr);
3006 lookahead.flags |= FLAG_DO_UTF8;
3007 lookahead.strbeg = symptr->strbeg;
3008 utf8 = 1;
3009 start = SvPVX(cat);
3010 cur = start + SvCUR(cat);
3011 end = start+SvLEN(cat)-UTF8_MAXLEN;
3012 goto W_utf8;
3013 }
3014 if (ckWARN(WARN_PACK))
3015 Perl_warner(aTHX_ packWARN(WARN_PACK),
3016 "Character in 'W' format wrapped in pack");
3017 auv &= 0xff;
3018 }
3019 if (cur >= end) {
3020 *cur = '\0';
b162af07 3021 SvCUR_set(cat, cur - start);
fc241834
RGS
3022 GROWING(0, cat, start, cur, len+1);
3023 end = start+SvLEN(cat)-1;
3024 }
fe2774ed 3025 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3026 }
3027 }
3028 break;
fc241834
RGS
3029 }
3030 case 'U': {
3031 char *end;
3032
3033 if (len == 0) {
3034 if (!(symptr->flags & FLAG_DO_UTF8)) {
3035 marked_upgrade(aTHX_ cat, symptr);
3036 lookahead.flags |= FLAG_DO_UTF8;
3037 lookahead.strbeg = symptr->strbeg;
3038 }
3039 utf8 = 0;
3040 goto no_change;
3041 }
3042
3043 end = start+SvLEN(cat);
3044 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3045 while (len-- > 0) {
fc241834 3046 UV auv;
a6ec74c1 3047 fromstr = NEXTFROM;
fc241834
RGS
3048 auv = SvUV(fromstr);
3049 if (utf8) {
230e1fce 3050 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3051 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3052 warn_utf8 ?
fc241834
RGS
3053 0 : UNICODE_ALLOW_ANY);
3054 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3055 *cur = '\0';
b162af07 3056 SvCUR_set(cat, cur - start);
fc241834
RGS
3057 GROWING(0, cat, start, cur,
3058 len+(endb-buffer)*UTF8_EXPAND);
3059 end = start+SvLEN(cat);
3060 }
64844641 3061 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3062 } else {
3063 if (cur >= end) {
3064 *cur = '\0';
b162af07 3065 SvCUR_set(cat, cur - start);
fc241834
RGS
3066 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3067 end = start+SvLEN(cat)-UTF8_MAXLEN;
3068 }
230e1fce 3069 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3070 warn_utf8 ?
230e1fce 3071 0 : UNICODE_ALLOW_ANY);
fc241834 3072 }
a6ec74c1 3073 }
a6ec74c1 3074 break;
fc241834 3075 }
a6ec74c1
JH
3076 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3077 case 'f':
a6ec74c1 3078 while (len-- > 0) {
f337b084
TH
3079 float afloat;
3080 NV anv;
a6ec74c1 3081 fromstr = NEXTFROM;
f337b084 3082 anv = SvNV(fromstr);
5cdb9e01 3083#ifdef __VOS__
f337b084 3084 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3085 during conversion from double to float into infinity, so we
3086 do it by hand. This code should either be generalized for
3087 any OS that needs it, or removed if and when VOS implements
3088 posix-976 (suggestion to support mapping to infinity).
3089 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3090{
3091extern const float _float_constants[];
f337b084 3092 if (anv > FLT_MAX)
fc241834 3093 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3094 else if (anv < -FLT_MAX)
fc241834 3095 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3096 else afloat = (float) anv;
3722f0dc 3097}
f337b084 3098#else /* __VOS__ */
baf3cf9c 3099# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3100 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3101 * on Alpha; fake it if we don't have them.
3102 */
f337b084 3103 if (anv > FLT_MAX)
fc241834 3104 afloat = FLT_MAX;
f337b084 3105 else if (anv < -FLT_MAX)
fc241834 3106 afloat = -FLT_MAX;
f337b084 3107 else afloat = (float)anv;
baf3cf9c 3108# else
f337b084 3109 afloat = (float)anv;
baf3cf9c 3110# endif
f337b084 3111#endif /* __VOS__ */
1109a392 3112 DO_BO_PACK_N(afloat, float);
f337b084 3113 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3114 }
3115 break;
3116 case 'd':
a6ec74c1 3117 while (len-- > 0) {
f337b084
TH
3118 double adouble;
3119 NV anv;
a6ec74c1 3120 fromstr = NEXTFROM;
f337b084 3121 anv = SvNV(fromstr);
5cdb9e01 3122#ifdef __VOS__
f337b084 3123 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3124 during conversion from long double to double into infinity,
3125 so we do it by hand. This code should either be generalized
3126 for any OS that needs it, or removed if and when VOS
3127 implements posix-976 (suggestion to support mapping to
3128 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3129{
3130extern const double _double_constants[];
f337b084 3131 if (anv > DBL_MAX)
fc241834 3132 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3133 else if (anv < -DBL_MAX)
fc241834 3134 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3135 else adouble = (double) anv;
3722f0dc 3136}
f337b084 3137#else /* __VOS__ */
baf3cf9c 3138# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3139 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3140 * on Alpha; fake it if we don't have them.
3141 */
f337b084 3142 if (anv > DBL_MAX)
fc241834 3143 adouble = DBL_MAX;
f337b084 3144 else if (anv < -DBL_MAX)
fc241834 3145 adouble = -DBL_MAX;
f337b084 3146 else adouble = (double)anv;
baf3cf9c 3147# else
f337b084 3148 adouble = (double)anv;
baf3cf9c 3149# endif
f337b084 3150#endif /* __VOS__ */
1109a392 3151 DO_BO_PACK_N(adouble, double);
f337b084 3152 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3153 }
3154 break;
fc241834
RGS
3155 case 'F': {
3156 NV anv;
1109a392 3157 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3158 while (len-- > 0) {
3159 fromstr = NEXTFROM;
3160 anv = SvNV(fromstr);
1109a392 3161 DO_BO_PACK_N(anv, NV);
fc241834 3162 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3163 }
3164 break;
fc241834 3165 }
92d41999 3166#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3167 case 'D': {
3168 long double aldouble;
1109a392
MHM
3169 /* long doubles can have unused bits, which may be nonzero */
3170 Zero(&aldouble, 1, long double);
92d41999
JH
3171 while (len-- > 0) {
3172 fromstr = NEXTFROM;
3173 aldouble = (long double)SvNV(fromstr);
1109a392 3174 DO_BO_PACK_N(aldouble, long double);
fc241834 3175 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3176 }
3177 break;
fc241834 3178 }
92d41999 3179#endif
7212898e 3180#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3181 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3182#endif
a6ec74c1
JH
3183 case 'n':
3184 while (len-- > 0) {
f337b084 3185 I16 ai16;
a6ec74c1 3186 fromstr = NEXTFROM;
ef108786 3187 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3188#ifdef HAS_HTONS
ef108786 3189 ai16 = PerlSock_htons(ai16);
a6ec74c1 3190#endif
f337b084 3191 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3192 }
3193 break;
7212898e 3194#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3195 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3196#endif
a6ec74c1
JH
3197 case 'v':
3198 while (len-- > 0) {
f337b084 3199 I16 ai16;
a6ec74c1 3200 fromstr = NEXTFROM;
ef108786 3201 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3202#ifdef HAS_HTOVS
ef108786 3203 ai16 = htovs(ai16);
a6ec74c1 3204#endif
f337b084 3205 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3206 }
3207 break;
49704364 3208 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3209#if SHORTSIZE != SIZE16
fc241834 3210 while (len-- > 0) {
f337b084 3211 unsigned short aushort;
fc241834
RGS
3212 fromstr = NEXTFROM;
3213 aushort = SvUV(fromstr);
3214 DO_BO_PACK(aushort, s);
f337b084 3215 PUSH_VAR(utf8, cur, aushort);
fc241834 3216 }
49704364
WL
3217 break;
3218#else
3219 /* Fall through! */
a6ec74c1 3220#endif
49704364 3221 case 'S':
fc241834 3222 while (len-- > 0) {
f337b084 3223 U16 au16;
fc241834
RGS
3224 fromstr = NEXTFROM;
3225 au16 = (U16)SvUV(fromstr);
3226 DO_BO_PACK(au16, 16);
f337b084 3227 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3228 }
3229 break;
49704364 3230 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3231#if SHORTSIZE != SIZE16
fc241834 3232 while (len-- > 0) {
f337b084 3233 short ashort;
fc241834
RGS
3234 fromstr = NEXTFROM;
3235 ashort = SvIV(fromstr);
3236 DO_BO_PACK(ashort, s);
f337b084 3237 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3238 }
49704364
WL
3239 break;
3240#else
3241 /* Fall through! */
a6ec74c1 3242#endif
49704364
WL
3243 case 's':
3244 while (len-- > 0) {
f337b084 3245 I16 ai16;
49704364 3246 fromstr = NEXTFROM;
ef108786
MHM
3247 ai16 = (I16)SvIV(fromstr);
3248 DO_BO_PACK(ai16, 16);
f337b084 3249 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3250 }
3251 break;
3252 case 'I':
49704364 3253 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3254 while (len-- > 0) {
f337b084 3255 unsigned int auint;
a6ec74c1
JH
3256 fromstr = NEXTFROM;
3257 auint = SvUV(fromstr);
1109a392 3258 DO_BO_PACK(auint, i);
f337b084 3259 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3260 }
3261 break;
92d41999
JH
3262 case 'j':
3263 while (len-- > 0) {
f337b084 3264 IV aiv;
92d41999
JH
3265 fromstr = NEXTFROM;
3266 aiv = SvIV(fromstr);
1109a392
MHM
3267#if IVSIZE == INTSIZE
3268 DO_BO_PACK(aiv, i);
3269#elif IVSIZE == LONGSIZE
3270 DO_BO_PACK(aiv, l);
3271#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3272 DO_BO_PACK(aiv, 64);
f337b084
TH
3273#else
3274 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3275#endif
f337b084 3276 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3277 }
3278 break;
3279 case 'J':
3280 while (len-- > 0) {
f337b084 3281 UV auv;
92d41999
JH
3282 fromstr = NEXTFROM;
3283 auv = SvUV(fromstr);
1109a392
MHM
3284#if UVSIZE == INTSIZE
3285 DO_BO_PACK(auv, i);
3286#elif UVSIZE == LONGSIZE
3287 DO_BO_PACK(auv, l);
3288#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3289 DO_BO_PACK(auv, 64);
f337b084
TH
3290#else
3291 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3292#endif
f337b084 3293 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3294 }
3295 break;
a6ec74c1
JH
3296 case 'w':
3297 while (len-- > 0) {
f337b084 3298 NV anv;
a6ec74c1 3299 fromstr = NEXTFROM;
15e9f109 3300 anv = SvNV(fromstr);
a6ec74c1 3301
f337b084
TH
3302 if (anv < 0) {
3303 *cur = '\0';
b162af07 3304 SvCUR_set(cat, cur - start);
49704364 3305 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3306 }
a6ec74c1 3307
196b62db
NC
3308 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3309 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3310 any negative IVs will have already been got by the croak()
3311 above. IOK is untrue for fractions, so we test them
3312 against UV_MAX_P1. */
f337b084
TH
3313 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3314 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3315 char *in = buf + sizeof(buf);
196b62db 3316 UV auv = SvUV(fromstr);
a6ec74c1
JH
3317
3318 do {
eb160463 3319 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3320 auv >>= 7;
3321 } while (auv);
3322 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3323 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3324 in, (buf + sizeof(buf)) - in);
3325 } else if (SvPOKp(fromstr))
3326 goto w_string;
a6ec74c1 3327 else if (SvNOKp(fromstr)) {
0258719b
NC
3328 /* 10**NV_MAX_10_EXP is the largest power of 10
3329 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3330 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3331 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3332 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3333 Some C compilers are strict about integral constant
3334 expressions so we conservatively divide by a slightly
3335 smaller integer instead of multiplying by the exact
3336 floating-point value.
0258719b
NC
3337 */
3338#ifdef NV_MAX_10_EXP
f337b084 3339 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3340 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3341#else
f337b084 3342 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3343 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3344#endif
a6ec74c1
JH
3345 char *in = buf + sizeof(buf);
3346
8b6e33c7 3347 anv = Perl_floor(anv);
a6ec74c1 3348 do {
8b6e33c7 3349 const NV next = Perl_floor(anv / 128);
a6ec74c1 3350 if (in <= buf) /* this cannot happen ;-) */
49704364 3351 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3352 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3353 anv = next;
3354 } while (anv > 0);
a6ec74c1 3355 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3356 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3357 in, (buf + sizeof(buf)) - in);
3358 } else {
8b6e33c7
AL
3359 const char *from;
3360 char *result, *in;
735b914b
JH
3361 SV *norm;
3362 STRLEN len;
3363 bool done;
3364
f337b084 3365 w_string:
735b914b 3366 /* Copy string and check for compliance */
349d4f2f 3367 from = SvPV_const(fromstr, len);
735b914b 3368 if ((norm = is_an_int(from, len)) == NULL)
49704364 3369 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3370
a02a5408 3371 Newx(result, len, char);
735b914b
JH
3372 in = result + len;
3373 done = FALSE;
f337b084 3374 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3375 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3376 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3377 in, (result + len) - in);
735b914b
JH
3378 Safefree(result);
3379 SvREFCNT_dec(norm); /* free norm */
fc241834 3380 }
a6ec74c1
JH
3381 }
3382 break;
3383 case 'i':
49704364 3384 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3385 while (len-- > 0) {
f337b084 3386 int aint;
a6ec74c1
JH
3387 fromstr = NEXTFROM;
3388 aint = SvIV(fromstr);
1109a392 3389 DO_BO_PACK(aint, i);
f337b084 3390 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3391 }
3392 break;
7212898e 3393#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3394 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3395#endif
a6ec74c1
JH
3396 case 'N':
3397 while (len-- > 0) {
f337b084 3398 U32 au32;
a6ec74c1 3399 fromstr = NEXTFROM;
ef108786 3400 au32 = SvUV(fromstr);
a6ec74c1 3401#ifdef HAS_HTONL
ef108786 3402 au32 = PerlSock_htonl(au32);
a6ec74c1 3403#endif
f337b084 3404 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3405 }
3406 break;
7212898e 3407#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3408 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3409#endif
a6ec74c1
JH
3410 case 'V':
3411 while (len-- > 0) {
f337b084 3412 U32 au32;
a6ec74c1 3413 fromstr = NEXTFROM;
ef108786 3414 au32 = SvUV(fromstr);
a6ec74c1 3415#ifdef HAS_HTOVL
ef108786 3416 au32 = htovl(au32);
a6ec74c1 3417#endif
f337b084 3418 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3419 }
3420 break;
49704364 3421 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3422#if LONGSIZE != SIZE32
fc241834 3423 while (len-- > 0) {
f337b084 3424 unsigned long aulong;
fc241834
RGS
3425 fromstr = NEXTFROM;
3426 aulong = SvUV(fromstr);
3427 DO_BO_PACK(aulong, l);
f337b084 3428 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3429 }
49704364
WL
3430 break;
3431#else
3432 /* Fall though! */
a6ec74c1 3433#endif
49704364 3434 case 'L':
fc241834 3435 while (len-- > 0) {
f337b084 3436 U32 au32;
fc241834
RGS
3437 fromstr = NEXTFROM;
3438 au32 = SvUV(fromstr);
3439 DO_BO_PACK(au32, 32);
f337b084 3440 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3441 }
3442 break;
49704364 3443 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3444#if LONGSIZE != SIZE32
fc241834 3445 while (len-- > 0) {
f337b084 3446 long along;
fc241834
RGS
3447 fromstr = NEXTFROM;
3448 along = SvIV(fromstr);
3449 DO_BO_PACK(along, l);
f337b084 3450 PUSH_VAR(utf8, cur, along);
a6ec74c1 3451 }
49704364
WL
3452 break;
3453#else
3454 /* Fall though! */
a6ec74c1 3455#endif
49704364
WL
3456 case 'l':
3457 while (len-- > 0) {
f337b084 3458 I32 ai32;
49704364 3459 fromstr = NEXTFROM;
ef108786
MHM
3460 ai32 = SvIV(fromstr);
3461 DO_BO_PACK(ai32, 32);
f337b084 3462 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3463 }
3464 break;
3465#ifdef HAS_QUAD
3466 case 'Q':
3467 while (len-- > 0) {
f337b084 3468 Uquad_t auquad;
a6ec74c1 3469 fromstr = NEXTFROM;
f337b084 3470 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3471 DO_BO_PACK(auquad, 64);
f337b084 3472 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3473 }
3474 break;
3475 case 'q':
3476 while (len-- > 0) {
f337b084 3477 Quad_t aquad;
a6ec74c1
JH
3478 fromstr = NEXTFROM;
3479 aquad = (Quad_t)SvIV(fromstr);
1109a392 3480 DO_BO_PACK(aquad, 64);
f337b084 3481 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3482 }
3483 break;
f337b084 3484#endif /* HAS_QUAD */
a6ec74c1
JH
3485 case 'P':
3486 len = 1; /* assume SV is correct length */
f337b084 3487 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3488 /* Fall through! */
a6ec74c1
JH
3489 case 'p':
3490 while (len-- > 0) {
83003860 3491 const char *aptr;
f337b084 3492
a6ec74c1 3493 fromstr = NEXTFROM;
28a4f200
TH
3494 SvGETMAGIC(fromstr);
3495 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3496 else {
a6ec74c1
JH
3497 /* XXX better yet, could spirit away the string to
3498 * a safe spot and hang on to it until the result
3499 * of pack() (and all copies of the result) are
3500 * gone.
3501 */
041457d9
DM
3502 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3503 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3504 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3505 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3506 }
3507 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3508 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3509 else
2596d9fe 3510 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3511 }
07409e01 3512 DO_BO_PACK_PC(aptr);
f337b084 3513 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3514 }
3515 break;
fc241834 3516 case 'u': {
f7fe979e 3517 const char *aptr, *aend;
fc241834 3518 bool from_utf8;
f337b084 3519
a6ec74c1 3520 fromstr = NEXTFROM;
fc241834
RGS
3521 if (len <= 2) len = 45;
3522 else len = len / 3 * 3;
3523 if (len >= 64) {
0fdc08b6
RGS
3524 if (ckWARN(WARN_PACK))
3525 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3526 "Field too wide in 'u' format in pack");
3527 len = 63;
3528 }
83003860 3529 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3530 from_utf8 = DO_UTF8(fromstr);
3531 if (from_utf8) {
3532 aend = aptr + fromlen;
3533 fromlen = sv_len_utf8(fromstr);
3534 } else aend = NULL; /* Unused, but keep compilers happy */
3535 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3536 while (fromlen > 0) {
fc241834 3537 U8 *end;
a6ec74c1 3538 I32 todo;
fc241834 3539 U8 hunk[1+63/3*4+1];
a6ec74c1 3540
eb160463 3541 if ((I32)fromlen > len)
a6ec74c1
JH
3542 todo = len;
3543 else
3544 todo = fromlen;
fc241834
RGS
3545 if (from_utf8) {
3546 char buffer[64];
3547 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3548 'u' | TYPE_IS_PACK)) {
3549 *cur = '\0';
b162af07 3550 SvCUR_set(cat, cur - start);
fc241834
RGS
3551 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3552 }
3553 end = doencodes(hunk, buffer, todo);
3554 } else {
3555 end = doencodes(hunk, aptr, todo);
3556 aptr += todo;
3557 }
3558 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3559 fromlen -= todo;
3560 }
a6ec74c1
JH
3561 break;
3562 }
f337b084
TH
3563 }
3564 *cur = '\0';
b162af07 3565 SvCUR_set(cat, cur - start);
f337b084 3566 no_change:
49704364 3567 *symptr = lookahead;
a6ec74c1 3568 }
49704364 3569 return beglist;
18529408
IZ
3570}
3571#undef NEXTFROM
3572
3573
3574PP(pp_pack)
3575{
97aff369 3576 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3577 register SV *cat = TARG;
3578 STRLEN fromlen;
349d4f2f
NC
3579 SV *pat_sv = *++MARK;
3580 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3581 register const char *patend = pat + fromlen;
18529408
IZ
3582
3583 MARK++;
76f68e9b 3584 sv_setpvs(cat, "");
f337b084 3585 SvUTF8_off(cat);
18529408 3586
7accc089 3587 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3588
a6ec74c1
JH
3589 SvSETMAGIC(cat);
3590 SP = ORIGMARK;
3591 PUSHs(cat);
3592 RETURN;
3593}
a6ec74c1 3594
73cb7263
NC
3595/*
3596 * Local variables:
3597 * c-indentation-style: bsd
3598 * c-basic-offset: 4
3599 * indent-tabs-mode: t
3600 * End:
3601 *
37442d52
RGS
3602 * ex: set ts=8 sts=4 sw=4 noet:
3603 */