This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove Test-Simple's 00compile.t from core
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
7212898e 73#if PERL_VERSION >= 9
f337b084
TH
74# define PERL_PACK_CAN_BYTEORDER
75# define PERL_PACK_CAN_SHRIEKSIGN
76#endif
77
78#ifndef CHAR_BIT
79# define CHAR_BIT 8
7212898e 80#endif
3473cf63
RGS
81/* Maximum number of bytes to which a byte can grow due to upgrade */
82#define UTF8_EXPAND 2
7212898e 83
a6ec74c1 84/*
a6ec74c1
JH
85 * Offset for integer pack/unpack.
86 *
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
89 */
90
91/*
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
97 */
98/*
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
101 */
102#define SIZE16 2
103#define SIZE32 4
104
105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 --jhi Feb 1999 */
107
1109a392
MHM
108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
109# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
110# define OFF16(p) ((char*)(p))
111# define OFF32(p) ((char*)(p))
a6ec74c1 112# else
1109a392 113# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
114# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116# else
08ca2aa3 117 ++++ bad cray byte order
a6ec74c1
JH
118# endif
119# endif
a6ec74c1 120#else
08ca2aa3
TH
121# define OFF16(p) ((char *) (p))
122# define OFF32(p) ((char *) (p))
a6ec74c1
JH
123#endif
124
f337b084
TH
125/* Only to be used inside a loop (see the break) */
126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (utf8) { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 } else { \
130 Copy(s, OFF16(p), SIZE16, char); \
131 (s) += SIZE16; \
132 } \
133} STMT_END
134
135/* Only to be used inside a loop (see the break) */
136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (utf8) { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 } else { \
140 Copy(s, OFF32(p), SIZE32, char); \
141 (s) += SIZE32; \
142 } \
143} STMT_END
144
145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
147
148/* Only to be used inside a loop (see the break) */
f337b084 149#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084
TH
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
154 } else { \
155 Copy(s, (char *) &var, sizeof(var), char); \
156 s += sizeof(var); \
157 } \
08ca2aa3
TH
158} STMT_END
159
f337b084 160#define PUSH_VAR(utf8, aptr, var) \
230e1fce 161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 162
49704364
WL
163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164#define MAX_SUB_TEMPLATE_LEVEL 100
165
66c611c5 166/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
167#define FLAG_WAS_UTF8 0x40
168#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 169#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 170#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
171#define FLAG_SLASH 0x04
172#define FLAG_COMMA 0x02
173#define FLAG_PACK 0x01
174
a6ec74c1
JH
175STATIC SV *
176S_mul128(pTHX_ SV *sv, U8 m)
177{
178 STRLEN len;
179 char *s = SvPV(sv, len);
180 char *t;
a6ec74c1 181
7918f24d
NC
182 PERL_ARGS_ASSERT_MUL128;
183
a6ec74c1 184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 185 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
186
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
189 sv = tmpNew;
190 s = SvPV(sv, len);
191 }
192 t = s + len - 1;
193 while (!*t) /* trailing '\0'? */
194 t--;
195 while (t > s) {
f7fe979e 196 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
197 *(t--) = '0' + (char)(i % 10);
198 m = (char)(i / 10);
a6ec74c1
JH
199 }
200 return (sv);
201}
202
203/* Explosives and implosives. */
204
205#if 'I' == 73 && 'J' == 74
206/* On an ASCII/ISO kind of system */
207#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208#else
209/*
210 Some other sort of character set - use memchr() so we don't match
211 the null byte.
212 */
213#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214#endif
215
66c611c5 216/* type modifiers */
62f95557 217#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
218#define TYPE_IS_BIG_ENDIAN 0x200
219#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 220#define TYPE_IS_PACK 0x800
1109a392 221#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 222#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
223#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
224
7212898e 225#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 227#else
28be1210 228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
229#endif
230
231#ifndef PERL_PACK_CAN_BYTEORDER
232/* Put "can't" first because it is shorter */
233# define TYPE_ENDIANNESS(t) 0
234# define TYPE_NO_ENDIANNESS(t) (t)
235
236# define ENDIANNESS_ALLOWED_TYPES ""
237
238# define DO_BO_UNPACK(var, type)
239# define DO_BO_PACK(var, type)
07409e01
NC
240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
242# define DO_BO_UNPACK_N(var, type)
243# define DO_BO_PACK_N(var, type)
244# define DO_BO_UNPACK_P(var)
245# define DO_BO_PACK_P(var)
45f723e1
NC
246# define DO_BO_UNPACK_PC(var)
247# define DO_BO_PACK_PC(var)
66c611c5 248
f337b084 249#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
250
251# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
253
254# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
255
256# define DO_BO_UNPACK(var, type) \
1109a392 257 STMT_START { \
66c611c5 258 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 default: break; \
262 } \
263 } STMT_END
264
7212898e 265# define DO_BO_PACK(var, type) \
1109a392 266 STMT_START { \
66c611c5 267 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 default: break; \
271 } \
272 } STMT_END
273
07409e01 274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 275 STMT_START { \
66c611c5 276 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 277 case TYPE_IS_BIG_ENDIAN: \
07409e01 278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
282 break; \
283 default: \
284 break; \
285 } \
286 } STMT_END
287
07409e01 288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 289 STMT_START { \
66c611c5 290 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 291 case TYPE_IS_BIG_ENDIAN: \
07409e01 292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
303 STMT_START { \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 default: \
314 break; \
315 } \
316 } STMT_END
317
7212898e 318# if PTRSIZE == INTSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 323# elif PTRSIZE == LONGSIZE
07409e01
NC
324# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
328# elif PTRSIZE == IVSIZE
329# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
333# else
334# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
336# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 338# endif
1109a392 339
7212898e 340# if defined(my_htolen) && defined(my_letohn) && \
1109a392 341 defined(my_htoben) && defined(my_betohn)
7212898e 342# define DO_BO_UNPACK_N(var, type) \
1109a392 343 STMT_START { \
66c611c5 344 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347 default: break; \
348 } \
349 } STMT_END
350
7212898e 351# define DO_BO_PACK_N(var, type) \
1109a392 352 STMT_START { \
66c611c5 353 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356 default: break; \
357 } \
358 } STMT_END
7212898e
NC
359# else
360# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
362# endif
363
f337b084 364#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 365
78d46eaa 366#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 367#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
368#define PACK_SIZE_MASK 0x3F
369
78d46eaa
NC
370/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
372
373#if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
375#endif
376
377typedef U8 packprops_t;
78d46eaa
NC
378#if 'J'-'I' == 1
379/* ASCII */
f43a9a31 380STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
381 /* normal */
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0,
1651fc44 387 /* C */ sizeof(unsigned char),
80a13697 388#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 389 /* D */ LONG_DOUBLESIZE,
80a13697 390#else
a7a3cfaa 391 0,
80a13697 392#endif
a7a3cfaa
TH
393 0,
394 /* F */ NVSIZE,
395 0, 0,
396 /* I */ sizeof(unsigned int),
397 /* J */ UVSIZE,
398 0,
399 /* L */ SIZE32,
400 0,
401 /* N */ SIZE32,
402 0, 0,
80a13697 403#if defined(HAS_QUAD)
a7a3cfaa 404 /* Q */ sizeof(Uquad_t),
80a13697 405#else
a7a3cfaa 406 0,
80a13697 407#endif
a7a3cfaa
TH
408 0,
409 /* S */ SIZE16,
410 0,
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
412 /* V */ SIZE32,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
417 0,
418 /* f */ sizeof(float),
419 0, 0,
420 /* i */ sizeof(int),
421 /* j */ IVSIZE,
422 0,
423 /* l */ SIZE32,
424 0,
425 /* n */ SIZE16,
426 0,
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 428#if defined(HAS_QUAD)
a7a3cfaa 429 /* q */ sizeof(Quad_t),
80a13697 430#else
a7a3cfaa 431 0,
80a13697 432#endif
a7a3cfaa
TH
433 0,
434 /* s */ SIZE16,
435 0, 0,
436 /* v */ SIZE16,
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
447 /* shrieking */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
454 0, 0,
455 /* L */ sizeof(unsigned long),
456 0,
7212898e 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 458 /* N */ SIZE32,
7212898e 459#else
a7a3cfaa 460 0,
7212898e 461#endif
a7a3cfaa
TH
462 0, 0, 0, 0,
463 /* S */ sizeof(unsigned short),
464 0, 0,
7212898e 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 466 /* V */ SIZE32,
7212898e 467#else
a7a3cfaa 468 0,
7212898e 469#endif
a7a3cfaa
TH
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0,
472 /* i */ sizeof(int),
473 0, 0,
474 /* l */ sizeof(long),
475 0,
7212898e 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 477 /* n */ SIZE16,
7212898e 478#else
a7a3cfaa 479 0,
7212898e 480#endif
a7a3cfaa
TH
481 0, 0, 0, 0,
482 /* s */ sizeof(short),
483 0, 0,
7212898e 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 485 /* v */ SIZE16,
7212898e 486#else
a7a3cfaa 487 0,
7212898e 488#endif
a7a3cfaa
TH
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
498};
499#else
500/* EBCDIC (or bust) */
f43a9a31 501STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
502 /* normal */
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
514 0,
515 /* f */ sizeof(float),
516 0, 0,
517 /* i */ sizeof(int),
518 0, 0, 0, 0, 0, 0, 0,
519 /* j */ IVSIZE,
520 0,
521 /* l */ SIZE32,
522 0,
523 /* n */ SIZE16,
524 0,
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 526#if defined(HAS_QUAD)
a7a3cfaa 527 /* q */ sizeof(Quad_t),
80a13697 528#else
a7a3cfaa 529 0,
80a13697 530#endif
a7a3cfaa
TH
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* s */ SIZE16,
533 0, 0,
534 /* v */ SIZE16,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 538 /* C */ sizeof(unsigned char),
80a13697 539#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 540 /* D */ LONG_DOUBLESIZE,
80a13697 541#else
a7a3cfaa 542 0,
80a13697 543#endif
a7a3cfaa
TH
544 0,
545 /* F */ NVSIZE,
546 0, 0,
547 /* I */ sizeof(unsigned int),
548 0, 0, 0, 0, 0, 0, 0,
549 /* J */ UVSIZE,
550 0,
551 /* L */ SIZE32,
552 0,
553 /* N */ SIZE32,
554 0, 0,
80a13697 555#if defined(HAS_QUAD)
a7a3cfaa 556 /* Q */ sizeof(Uquad_t),
80a13697 557#else
a7a3cfaa 558 0,
80a13697 559#endif
a7a3cfaa
TH
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 /* S */ SIZE16,
562 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
564 /* V */ SIZE32,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* shrieking */
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* i */ sizeof(int),
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
581 0,
7212898e 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 583 /* n */ SIZE16,
7212898e 584#else
a7a3cfaa 585 0,
7212898e 586#endif
a7a3cfaa
TH
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
589 0, 0,
7212898e 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 591 /* v */ SIZE16,
7212898e 592#else
a7a3cfaa 593 0,
7212898e 594#endif
a7a3cfaa
TH
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
601 0,
7212898e 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 603 /* N */ SIZE32,
7212898e 604#else
a7a3cfaa 605 0,
7212898e 606#endif
a7a3cfaa
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
609 0, 0,
7212898e 610#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 611 /* V */ SIZE32,
7212898e 612#else
a7a3cfaa 613 0,
7212898e 614#endif
a7a3cfaa
TH
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
617};
618#endif
619
08ca2aa3 620STATIC U8
f7fe979e 621uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 622{
08ca2aa3 623 STRLEN retlen;
0bcc34c2 624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 632 if (val >= 0x100) {
a2a5de95
NC
633 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
634 "Character in '%c' format wrapped in unpack",
635 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
636 val &= 0xff;
637 }
638 *s += retlen;
fe2774ed 639 return (U8)val;
08ca2aa3
TH
640}
641
f337b084
TH
642#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
643 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
644 *(U8 *)(s)++)
645
646STATIC bool
f7fe979e 647uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
648{
649 UV val;
650 STRLEN retlen;
f7fe979e 651 const char *from = *s;
08ca2aa3 652 int bad = 0;
f7fe979e 653 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
654 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
655 for (;buf_len > 0; buf_len--) {
656 if (from >= end) return FALSE;
f337b084 657 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
658 if (retlen == (STRLEN) -1 || retlen == 0) {
659 from += UTF8SKIP(from);
660 bad |= 1;
661 } else from += retlen;
662 if (val >= 0x100) {
663 bad |= 2;
664 val &= 0xff;
665 }
fe2774ed 666 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
667 }
668 /* We have enough characters for the buffer. Did we have problems ? */
669 if (bad) {
670 if (bad & 1) {
671 /* Rewalk the string fragment while warning */
f7fe979e 672 const char *ptr;
9e27e96a 673 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
674 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
675 if (ptr >= end) break;
f337b084 676 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
677 }
678 if (from > end) from = end;
679 }
a2a5de95
NC
680 if ((bad & 2))
681 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 682 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
683 "Character(s) in '%c' format wrapped in %s",
684 (int) TYPE_NO_MODIFIERS(datumtype),
685 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
686 }
687 *s = from;
688 return TRUE;
689}
690
691STATIC bool
f7fe979e 692next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 693{
97aff369 694 dVAR;
08ca2aa3 695 STRLEN retlen;
0bcc34c2 696 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
697 if (val >= 0x100 || !ISUUCHAR(val) ||
698 retlen == (STRLEN) -1 || retlen == 0) {
699 *out = 0;
700 return FALSE;
701 }
702 *out = PL_uudmap[val] & 077;
f337b084 703 *s += retlen;
08ca2aa3
TH
704 return TRUE;
705}
78d46eaa 706
64844641 707STATIC char *
14333449 708S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 709 const U8 * const end = start + len;
64844641 710
7918f24d
NC
711 PERL_ARGS_ASSERT_BYTES_TO_UNI;
712
f337b084 713 while (start < end) {
48fa4626
TS
714 const UV uv = NATIVE_TO_ASCII(*start);
715 if (UNI_IS_INVARIANT(uv))
716 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
717 else {
718 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
720 }
721 start++;
722 }
64844641 723 return dest;
f337b084
TH
724}
725
230e1fce
NC
726#define PUSH_BYTES(utf8, cur, buf, len) \
727STMT_START { \
64844641
AL
728 if (utf8) \
729 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
730 else { \
731 Copy(buf, cur, len, char); \
732 (cur) += (len); \
733 } \
f337b084
TH
734} STMT_END
735
736#define GROWING(utf8, cat, start, cur, in_len) \
737STMT_START { \
738 STRLEN glen = (in_len); \
3473cf63 739 if (utf8) glen *= UTF8_EXPAND; \
f337b084 740 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 741 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
742 (cur) = (start) + SvCUR(cat); \
743 } \
744} STMT_END
745
746#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
747STMT_START { \
f7fe979e 748 const STRLEN glen = (in_len); \
f337b084 749 STRLEN gl = glen; \
3473cf63 750 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
751 if ((cur) + gl >= (start) + SvLEN(cat)) { \
752 *cur = '\0'; \
b162af07 753 SvCUR_set((cat), (cur) - (start)); \
0bd48802 754 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
755 (cur) = (start) + SvCUR(cat); \
756 } \
757 PUSH_BYTES(utf8, cur, buf, glen); \
758} STMT_END
759
760#define PUSH_BYTE(utf8, s, byte) \
761STMT_START { \
762 if (utf8) { \
f7fe979e 763 const U8 au8 = (byte); \
64844641 764 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
765 } else *(U8 *)(s)++ = (byte); \
766} STMT_END
767
768/* Only to be used inside a loop (see the break) */
769#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
770STMT_START { \
771 STRLEN retlen; \
772 if (str >= end) break; \
773 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
774 if (retlen == (STRLEN) -1 || retlen == 0) { \
775 *cur = '\0'; \
776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
777 } \
778 str += retlen; \
779} STMT_END
780
f7fe979e
AL
781static const char *_action( const tempsym_t* symptr )
782{
10edeb5d 783 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
784}
785
206947d2 786/* Returns the sizeof() struct described by pat */
028d1f6d 787STATIC I32
f337b084 788S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 789{
f337b084 790 I32 total = 0;
206947d2 791
7918f24d
NC
792 PERL_ARGS_ASSERT_MEASURE_STRUCT;
793
49704364 794 while (next_symbol(symptr)) {
f337b084 795 I32 len;
f7fe979e 796 int size;
f337b084
TH
797
798 switch (symptr->howlen) {
fc241834 799 case e_star:
49704364 800 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 801 _action( symptr ) );
49704364 802 break;
f337b084
TH
803 default:
804 /* e_no_len and e_number */
805 len = symptr->length;
806 break;
49704364
WL
807 }
808
a7a3cfaa 809 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 810 if (!size) {
f7fe979e 811 int star;
80a13697
NC
812 /* endianness doesn't influence the size of a type */
813 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
814 default:
815 Perl_croak(aTHX_ "Invalid type '%c' in %s",
816 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 817 _action( symptr ) );
28be1210
TH
818#ifdef PERL_PACK_CAN_SHRIEKSIGN
819 case '.' | TYPE_IS_SHRIEKING:
820 case '@' | TYPE_IS_SHRIEKING:
821#endif
80a13697 822 case '@':
28be1210 823 case '.':
80a13697
NC
824 case '/':
825 case 'U': /* XXXX Is it correct? */
826 case 'w':
827 case 'u':
828 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 829 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 830 _action( symptr ) );
80a13697
NC
831 case '%':
832 size = 0;
833 break;
834 case '(':
fc241834
RGS
835 {
836 tempsym_t savsym = *symptr;
837 symptr->patptr = savsym.grpbeg;
838 symptr->patend = savsym.grpend;
839 /* XXXX Theoretically, we need to measure many times at
840 different positions, since the subexpression may contain
841 alignment commands, but be not of aligned length.
842 Need to detect this and croak(). */
843 size = measure_struct(symptr);
844 *symptr = savsym;
845 break;
846 }
80a13697
NC
847 case 'X' | TYPE_IS_SHRIEKING:
848 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
849 */
850 if (!len) /* Avoid division by 0 */
851 len = 1;
852 len = total % len; /* Assumed: the start is aligned. */
853 /* FALL THROUGH */
854 case 'X':
855 size = -1;
856 if (total < len)
f7fe979e 857 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
858 break;
859 case 'x' | TYPE_IS_SHRIEKING:
860 if (!len) /* Avoid division by 0 */
861 len = 1;
862 star = total % len; /* Assumed: the start is aligned. */
863 if (star) /* Other portable ways? */
864 len = len - star;
865 else
866 len = 0;
867 /* FALL THROUGH */
868 case 'x':
869 case 'A':
870 case 'Z':
871 case 'a':
80a13697
NC
872 size = 1;
873 break;
874 case 'B':
875 case 'b':
876 len = (len + 7)/8;
877 size = 1;
878 break;
879 case 'H':
880 case 'h':
881 len = (len + 1)/2;
882 size = 1;
883 break;
78d46eaa 884
80a13697
NC
885 case 'P':
886 len = 1;
887 size = sizeof(char*);
78d46eaa
NC
888 break;
889 }
206947d2
IZ
890 }
891 total += len * size;
892 }
893 return total;
894}
895
49704364
WL
896
897/* locate matching closing parenthesis or bracket
898 * returns char pointer to char after match, or NULL
899 */
f7fe979e
AL
900STATIC const char *
901S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 902{
7918f24d
NC
903 PERL_ARGS_ASSERT_GROUP_END;
904
49704364 905 while (patptr < patend) {
f7fe979e 906 const char c = *patptr++;
49704364
WL
907
908 if (isSPACE(c))
909 continue;
910 else if (c == ender)
911 return patptr-1;
912 else if (c == '#') {
913 while (patptr < patend && *patptr != '\n')
914 patptr++;
915 continue;
916 } else if (c == '(')
917 patptr = group_end(patptr, patend, ')') + 1;
918 else if (c == '[')
919 patptr = group_end(patptr, patend, ']') + 1;
18529408 920 }
49704364
WL
921 Perl_croak(aTHX_ "No group ending character '%c' found in template",
922 ender);
923 return 0;
18529408
IZ
924}
925
49704364
WL
926
927/* Convert unsigned decimal number to binary.
928 * Expects a pointer to the first digit and address of length variable
929 * Advances char pointer to 1st non-digit char and returns number
fc241834 930 */
f7fe979e
AL
931STATIC const char *
932S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
933{
934 I32 len = *patptr++ - '0';
7918f24d
NC
935
936 PERL_ARGS_ASSERT_GET_NUM;
937
49704364
WL
938 while (isDIGIT(*patptr)) {
939 if (len >= 0x7FFFFFFF/10)
940 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
941 len = (len * 10) + (*patptr++ - '0');
942 }
943 *lenptr = len;
944 return patptr;
945}
946
947/* The marvellous template parsing routine: Using state stored in *symptr,
948 * locates next template code and count
949 */
950STATIC bool
f337b084 951S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 952{
f7fe979e 953 const char* patptr = symptr->patptr;
0bcc34c2 954 const char* const patend = symptr->patend;
49704364 955
7918f24d
NC
956 PERL_ARGS_ASSERT_NEXT_SYMBOL;
957
49704364
WL
958 symptr->flags &= ~FLAG_SLASH;
959
960 while (patptr < patend) {
961 if (isSPACE(*patptr))
962 patptr++;
963 else if (*patptr == '#') {
964 patptr++;
965 while (patptr < patend && *patptr != '\n')
966 patptr++;
967 if (patptr < patend)
968 patptr++;
969 } else {
fc241834 970 /* We should have found a template code */
49704364 971 I32 code = *patptr++ & 0xFF;
66c611c5 972 U32 inherited_modifiers = 0;
49704364
WL
973
974 if (code == ','){ /* grandfather in commas but with a warning */
975 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
976 symptr->flags |= FLAG_COMMA;
977 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 978 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
979 }
980 continue;
981 }
fc241834 982
49704364 983 /* for '(', skip to ')' */
fc241834 984 if (code == '(') {
49704364
WL
985 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
986 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 987 _action( symptr ) );
49704364
WL
988 symptr->grpbeg = patptr;
989 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
990 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
991 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 992 _action( symptr ) );
49704364
WL
993 }
994
66c611c5
MHM
995 /* look for group modifiers to inherit */
996 if (TYPE_ENDIANNESS(symptr->flags)) {
997 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
998 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
999 }
1000
1109a392
MHM
1001 /* look for modifiers */
1002 while (patptr < patend) {
b7787f18
AL
1003 const char *allowed;
1004 I32 modifier;
1109a392
MHM
1005 switch (*patptr) {
1006 case '!':
1007 modifier = TYPE_IS_SHRIEKING;
7212898e 1008 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1009 break;
7212898e 1010#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1011 case '>':
1012 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1013 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1014 break;
1015 case '<':
1016 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1017 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1018 break;
f337b084 1019#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1020 default:
b7787f18
AL
1021 allowed = "";
1022 modifier = 0;
1109a392
MHM
1023 break;
1024 }
66c611c5 1025
1109a392
MHM
1026 if (modifier == 0)
1027 break;
66c611c5 1028
1109a392
MHM
1029 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1030 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1031 allowed, _action( symptr ) );
66c611c5
MHM
1032
1033 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1034 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1035 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1036 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1037 TYPE_ENDIANNESS_MASK)
1038 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1039 *patptr, _action( symptr ) );
66c611c5 1040
a2a5de95
NC
1041 if ((code & modifier)) {
1042 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1043 "Duplicate modifier '%c' after '%c' in %s",
1044 *patptr, (int) TYPE_NO_MODIFIERS(code),
1045 _action( symptr ) );
1109a392 1046 }
66c611c5 1047
1109a392
MHM
1048 code |= modifier;
1049 patptr++;
49704364
WL
1050 }
1051
66c611c5
MHM
1052 /* inherit modifiers */
1053 code |= inherited_modifiers;
1054
fc241834 1055 /* look for count and/or / */
49704364
WL
1056 if (patptr < patend) {
1057 if (isDIGIT(*patptr)) {
1058 patptr = get_num( patptr, &symptr->length );
1059 symptr->howlen = e_number;
1060
1061 } else if (*patptr == '*') {
1062 patptr++;
1063 symptr->howlen = e_star;
1064
1065 } else if (*patptr == '[') {
f7fe979e 1066 const char* lenptr = ++patptr;
49704364
WL
1067 symptr->howlen = e_number;
1068 patptr = group_end( patptr, patend, ']' ) + 1;
1069 /* what kind of [] is it? */
1070 if (isDIGIT(*lenptr)) {
1071 lenptr = get_num( lenptr, &symptr->length );
1072 if( *lenptr != ']' )
1073 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1074 _action( symptr ) );
49704364
WL
1075 } else {
1076 tempsym_t savsym = *symptr;
1077 symptr->patend = patptr-1;
1078 symptr->patptr = lenptr;
1079 savsym.length = measure_struct(symptr);
1080 *symptr = savsym;
1081 }
1082 } else {
1083 symptr->howlen = e_no_len;
1084 symptr->length = 1;
1085 }
1086
1087 /* try to find / */
1088 while (patptr < patend) {
1089 if (isSPACE(*patptr))
1090 patptr++;
1091 else if (*patptr == '#') {
1092 patptr++;
1093 while (patptr < patend && *patptr != '\n')
1094 patptr++;
1095 if (patptr < patend)
1096 patptr++;
1097 } else {
66c611c5 1098 if (*patptr == '/') {
49704364
WL
1099 symptr->flags |= FLAG_SLASH;
1100 patptr++;
66c611c5
MHM
1101 if (patptr < patend &&
1102 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1103 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1104 _action( symptr ) );
49704364
WL
1105 }
1106 break;
1107 }
18529408 1108 }
49704364
WL
1109 } else {
1110 /* at end - no count, no / */
1111 symptr->howlen = e_no_len;
1112 symptr->length = 1;
1113 }
1114
1115 symptr->code = code;
fc241834 1116 symptr->patptr = patptr;
49704364 1117 return TRUE;
18529408 1118 }
49704364 1119 }
fc241834 1120 symptr->patptr = patptr;
49704364 1121 return FALSE;
18529408
IZ
1122}
1123
18529408 1124/*
fc241834 1125 There is no way to cleanly handle the case where we should process the
08ca2aa3 1126 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1127 (e.g. estimates like strend-s as an upper bound for the number of
1128 characters left wouldn't work). So if we foresee the need of this
1129 (pattern starts with U or contains U0), we want to work on the encoded
1130 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1131 themselves if they need to do a lot of unpacks like this on it
1132*/
fc241834 1133STATIC bool
08ca2aa3
TH
1134need_utf8(const char *pat, const char *patend)
1135{
1136 bool first = TRUE;
7918f24d
NC
1137
1138 PERL_ARGS_ASSERT_NEED_UTF8;
1139
08ca2aa3
TH
1140 while (pat < patend) {
1141 if (pat[0] == '#') {
1142 pat++;
f7fe979e 1143 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1144 if (!pat) return FALSE;
1145 } else if (pat[0] == 'U') {
1146 if (first || pat[1] == '0') return TRUE;
1147 } else first = FALSE;
1148 pat++;
1149 }
1150 return FALSE;
1151}
1152
1153STATIC char
1154first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1155 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1156
08ca2aa3
TH
1157 while (pat < patend) {
1158 if (pat[0] != '#') return pat[0];
1159 pat++;
f7fe979e 1160 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1161 if (!pat) return 0;
1162 pat++;
1163 }
1164 return 0;
1165}
1166
1167/*
7accc089
JH
1168=for apidoc unpackstring
1169
608d3aed
WL
1170The engine implementing unpack() Perl function. C<unpackstring> puts the
1171extracted list items on the stack and returns the number of elements.
1172Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1173
1174=cut */
1175
1176I32
f7fe979e 1177Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1178{
f7fe979e 1179 tempsym_t sym;
08ca2aa3 1180
7918f24d
NC
1181 PERL_ARGS_ASSERT_UNPACKSTRING;
1182
f337b084 1183 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1184 else if (need_utf8(pat, patend)) {
1185 /* We probably should try to avoid this in case a scalar context call
1186 wouldn't get to the "U0" */
1187 STRLEN len = strend - s;
230e1fce 1188 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1189 SAVEFREEPV(s);
1190 strend = s + len;
f337b084 1191 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1192 }
1193
f337b084
TH
1194 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1195 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1196
f7fe979e 1197 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1198
1199 return unpack_rec(&sym, s, s, strend, NULL );
1200}
1201
4136a0f7 1202STATIC I32
f7fe979e 1203S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1204{
27da23d5 1205 dVAR; dSP;
18529408 1206 SV *sv;
f7fe979e 1207 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1208 howlen_t howlen;
a6ec74c1 1209 I32 checksum = 0;
92d41999 1210 UV cuv = 0;
a6ec74c1 1211 NV cdouble = 0.0;
f337b084 1212 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1213 bool beyond = FALSE;
21c16052 1214 bool explicit_length;
9e27e96a 1215 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1216 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1217
1218 PERL_ARGS_ASSERT_UNPACK_REC;
1219
28be1210 1220 symptr->strbeg = s - strbeg;
49704364 1221
49704364 1222 while (next_symbol(symptr)) {
a7a3cfaa 1223 packprops_t props;
9e27e96a 1224 I32 len;
f337b084 1225 I32 datumtype = symptr->code;
206947d2 1226 /* do first one only unless in list context
08ca2aa3 1227 / is implemented by unpacking the count, then popping it from the
206947d2 1228 stack, so must check that we're not in the middle of a / */
49704364 1229 if ( unpack_only_one
206947d2 1230 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1231 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1232 break;
49704364 1233
f337b084 1234 switch (howlen = symptr->howlen) {
fc241834
RGS
1235 case e_star:
1236 len = strend - strbeg; /* long enough */
49704364 1237 break;
f337b084
TH
1238 default:
1239 /* e_no_len and e_number */
1240 len = symptr->length;
1241 break;
49704364 1242 }
18529408 1243
21c16052 1244 explicit_length = TRUE;
a6ec74c1 1245 redo_switch:
49704364 1246 beyond = s >= strend;
a7a3cfaa
TH
1247
1248 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1249 if (props) {
1250 /* props nonzero means we can process this letter. */
9e27e96a
AL
1251 const long size = props & PACK_SIZE_MASK;
1252 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1253 if (len > howmany)
1254 len = howmany;
1255
1256 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1257 if (len && unpack_only_one) len = 1;
1258 EXTEND(SP, len);
1259 EXTEND_MORTAL(len);
78d46eaa
NC
1260 }
1261 }
a7a3cfaa 1262
1109a392 1263 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1264 default:
1109a392 1265 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1266
a6ec74c1 1267 case '%':
49704364 1268 if (howlen == e_no_len)
18529408 1269 len = 16; /* len is not specified */
a6ec74c1 1270 checksum = len;
92d41999 1271 cuv = 0;
a6ec74c1 1272 cdouble = 0;
18529408 1273 continue;
a6ec74c1 1274 break;
18529408
IZ
1275 case '(':
1276 {
49704364 1277 tempsym_t savsym = *symptr;
9e27e96a 1278 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1279 symptr->flags |= group_modifiers;
49704364 1280 symptr->patend = savsym.grpend;
28be1210 1281 symptr->previous = &savsym;
49704364 1282 symptr->level++;
18529408 1283 PUTBACK;
c6f750d1 1284 if (len && unpack_only_one) len = 1;
18529408 1285 while (len--) {
49704364 1286 symptr->patptr = savsym.grpbeg;
f337b084
TH
1287 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1288 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1289 unpack_rec(symptr, s, strbeg, strend, &s);
1290 if (s == strend && savsym.howlen == e_star)
49704364 1291 break; /* No way to continue */
18529408
IZ
1292 }
1293 SPAGAIN;
28be1210 1294 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1295 *symptr = savsym;
18529408
IZ
1296 break;
1297 }
28be1210
TH
1298#ifdef PERL_PACK_CAN_SHRIEKSIGN
1299 case '.' | TYPE_IS_SHRIEKING:
1300#endif
1301 case '.': {
9e27e96a 1302 const char *from;
28be1210
TH
1303 SV *sv;
1304#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1305 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1306#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1307 const bool u8 = utf8;
28be1210
TH
1308#endif
1309 if (howlen == e_star) from = strbeg;
1310 else if (len <= 0) from = s;
1311 else {
1312 tempsym_t *group = symptr;
1313
1314 while (--len && group) group = group->previous;
1315 from = group ? strbeg + group->strbeg : strbeg;
1316 }
1317 sv = from <= s ?
00646304
CB
1318 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1319 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1320 mXPUSHs(sv);
28be1210
TH
1321 break;
1322 }
1323#ifdef PERL_PACK_CAN_SHRIEKSIGN
1324 case '@' | TYPE_IS_SHRIEKING:
1325#endif
a6ec74c1 1326 case '@':
28be1210
TH
1327 s = strbeg + symptr->strbeg;
1328#ifdef PERL_PACK_CAN_SHRIEKSIGN
1329 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1330#else /* PERL_PACK_CAN_SHRIEKSIGN */
1331 if (utf8)
1332#endif
1333 {
08ca2aa3
TH
1334 while (len > 0) {
1335 if (s >= strend)
1336 Perl_croak(aTHX_ "'@' outside of string in unpack");
1337 s += UTF8SKIP(s);
1338 len--;
1339 }
1340 if (s > strend)
1341 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1342 } else {
28be1210 1343 if (strend-s < len)
fc241834 1344 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1345 s += len;
08ca2aa3 1346 }
a6ec74c1 1347 break;
62f95557
IZ
1348 case 'X' | TYPE_IS_SHRIEKING:
1349 if (!len) /* Avoid division by 0 */
1350 len = 1;
08ca2aa3 1351 if (utf8) {
f7fe979e 1352 const char *hop, *last;
f337b084
TH
1353 I32 l = len;
1354 hop = last = strbeg;
1355 while (hop < s) {
1356 hop += UTF8SKIP(hop);
1357 if (--l == 0) {
08ca2aa3 1358 last = hop;
f337b084
TH
1359 l = len;
1360 }
fc241834 1361 }
f337b084
TH
1362 if (last > s)
1363 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1364 s = last;
1365 break;
f337b084
TH
1366 }
1367 len = (s - strbeg) % len;
62f95557 1368 /* FALL THROUGH */
a6ec74c1 1369 case 'X':
08ca2aa3
TH
1370 if (utf8) {
1371 while (len > 0) {
1372 if (s <= strbeg)
1373 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1374 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1375 if (s <= strbeg)
1376 Perl_croak(aTHX_ "'X' outside of string in unpack");
1377 }
1378 len--;
1379 }
1380 } else {
fc241834
RGS
1381 if (len > s - strbeg)
1382 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1383 s -= len;
08ca2aa3 1384 }
a6ec74c1 1385 break;
9e27e96a
AL
1386 case 'x' | TYPE_IS_SHRIEKING: {
1387 I32 ai32;
62f95557
IZ
1388 if (!len) /* Avoid division by 0 */
1389 len = 1;
230e1fce
NC
1390 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1391 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1392 if (ai32 == 0) break;
1393 len -= ai32;
9e27e96a 1394 }
62f95557 1395 /* FALL THROUGH */
a6ec74c1 1396 case 'x':
08ca2aa3
TH
1397 if (utf8) {
1398 while (len>0) {
1399 if (s >= strend)
1400 Perl_croak(aTHX_ "'x' outside of string in unpack");
1401 s += UTF8SKIP(s);
1402 len--;
1403 }
1404 } else {
fc241834
RGS
1405 if (len > strend - s)
1406 Perl_croak(aTHX_ "'x' outside of string in unpack");
1407 s += len;
f337b084 1408 }
a6ec74c1
JH
1409 break;
1410 case '/':
49704364
WL
1411 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1412 break;
a6ec74c1
JH
1413 case 'A':
1414 case 'Z':
1415 case 'a':
08ca2aa3
TH
1416 if (checksum) {
1417 /* Preliminary length estimate is assumed done in 'W' */
1418 if (len > strend - s) len = strend - s;
1419 goto W_checksum;
1420 }
1421 if (utf8) {
1422 I32 l;
f7fe979e 1423 const char *hop;
08ca2aa3
TH
1424 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1425 if (hop >= strend) {
1426 if (hop > strend)
1427 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1428 break;
fc241834 1429 }
a6ec74c1 1430 }
08ca2aa3
TH
1431 if (hop > strend)
1432 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 len = hop - s;
1434 } else if (len > strend - s)
1435 len = strend - s;
1436
1437 if (datumtype == 'Z') {
1438 /* 'Z' strips stuff after first null */
f7fe979e 1439 const char *ptr, *end;
f337b084
TH
1440 end = s + len;
1441 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1442 sv = newSVpvn(s, ptr-s);
1443 if (howlen == e_star) /* exact for 'Z*' */
1444 len = ptr-s + (ptr != strend ? 1 : 0);
1445 } else if (datumtype == 'A') {
1446 /* 'A' strips both nulls and spaces */
f7fe979e 1447 const char *ptr;
18bdf90a
TH
1448 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1449 for (ptr = s+len-1; ptr >= s; ptr--)
1450 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1451 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1452 if (ptr >= s) ptr += UTF8SKIP(ptr);
1453 else ptr++;
28be1210 1454 if (ptr > s+len)
18bdf90a
TH
1455 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1456 } else {
1457 for (ptr = s+len-1; ptr >= s; ptr--)
1458 if (*ptr != 0 && !isSPACE(*ptr)) break;
1459 ptr++;
1460 }
08ca2aa3
TH
1461 sv = newSVpvn(s, ptr-s);
1462 } else sv = newSVpvn(s, len);
1463
1464 if (utf8) {
1465 SvUTF8_on(sv);
1466 /* Undo any upgrade done due to need_utf8() */
f337b084 1467 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1468 sv_utf8_downgrade(sv, 0);
a6ec74c1 1469 }
6e449a3a 1470 mXPUSHs(sv);
08ca2aa3 1471 s += len;
a6ec74c1
JH
1472 break;
1473 case 'B':
08ca2aa3
TH
1474 case 'b': {
1475 char *str;
49704364 1476 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1477 len = (strend - s) * 8;
1478 if (checksum) {
f337b084 1479 if (utf8)
08ca2aa3 1480 while (len >= 8 && s < strend) {
f337b084 1481 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1482 len -= 8;
1483 }
f337b084 1484 else
fc241834 1485 while (len >= 8) {
08ca2aa3 1486 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1487 len -= 8;
1488 }
08ca2aa3
TH
1489 if (len && s < strend) {
1490 U8 bits;
f337b084
TH
1491 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1492 if (datumtype == 'b')
a6ec74c1 1493 while (len-- > 0) {
92d41999 1494 if (bits & 1) cuv++;
a6ec74c1
JH
1495 bits >>= 1;
1496 }
f337b084 1497 else
a6ec74c1 1498 while (len-- > 0) {
08ca2aa3 1499 if (bits & 0x80) cuv++;
a6ec74c1
JH
1500 bits <<= 1;
1501 }
fc241834 1502 }
a6ec74c1
JH
1503 break;
1504 }
08ca2aa3 1505
561b68a9 1506 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1507 SvPOK_on(sv);
1508 str = SvPVX(sv);
1509 if (datumtype == 'b') {
f337b084 1510 U8 bits = 0;
f7fe979e 1511 const I32 ai32 = len;
08ca2aa3
TH
1512 for (len = 0; len < ai32; len++) {
1513 if (len & 7) bits >>= 1;
1514 else if (utf8) {
1515 if (s >= strend) break;
f337b084 1516 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1517 } else bits = *(U8 *) s++;
1518 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1519 }
08ca2aa3 1520 } else {
f337b084 1521 U8 bits = 0;
f7fe979e 1522 const I32 ai32 = len;
08ca2aa3
TH
1523 for (len = 0; len < ai32; len++) {
1524 if (len & 7) bits <<= 1;
1525 else if (utf8) {
1526 if (s >= strend) break;
f337b084 1527 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1528 } else bits = *(U8 *) s++;
1529 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1530 }
1531 }
1532 *str = '\0';
aa07b2f6 1533 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1534 XPUSHs(sv);
a6ec74c1 1535 break;
08ca2aa3 1536 }
a6ec74c1 1537 case 'H':
08ca2aa3
TH
1538 case 'h': {
1539 char *str;
fc241834 1540 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1541 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1542 len = (strend - s) * 2;
561b68a9 1543 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1544 SvPOK_on(sv);
1545 str = SvPVX(sv);
1546 if (datumtype == 'h') {
f337b084 1547 U8 bits = 0;
9e27e96a 1548 I32 ai32 = len;
fc241834
RGS
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 1) bits >>= 4;
1551 else if (utf8) {
1552 if (s >= strend) break;
f337b084 1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1554 } else bits = * (U8 *) s++;
a6ec74c1
JH
1555 *str++ = PL_hexdigit[bits & 15];
1556 }
08ca2aa3 1557 } else {
f337b084 1558 U8 bits = 0;
f7fe979e 1559 const I32 ai32 = len;
08ca2aa3
TH
1560 for (len = 0; len < ai32; len++) {
1561 if (len & 1) bits <<= 4;
1562 else if (utf8) {
1563 if (s >= strend) break;
f337b084 1564 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1565 } else bits = *(U8 *) s++;
a6ec74c1
JH
1566 *str++ = PL_hexdigit[(bits >> 4) & 15];
1567 }
1568 }
1569 *str = '\0';
aa07b2f6 1570 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1571 XPUSHs(sv);
a6ec74c1 1572 break;
08ca2aa3 1573 }
1651fc44
ML
1574 case 'C':
1575 if (len == 0) {
1576 if (explicit_length)
1577 /* Switch to "character" mode */
1578 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1579 break;
1580 }
1581 /* FALL THROUGH */
a6ec74c1 1582 case 'c':
1651fc44
ML
1583 while (len-- > 0 && s < strend) {
1584 int aint;
1585 if (utf8)
1586 {
1587 STRLEN retlen;
1588 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1589 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1590 if (retlen == (STRLEN) -1 || retlen == 0)
1591 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1592 s += retlen;
1593 }
1594 else
1595 aint = *(U8 *)(s)++;
1596 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1597 aint -= 256;
08ca2aa3 1598 if (!checksum)
6e449a3a 1599 mPUSHi(aint);
73cb7263
NC
1600 else if (checksum > bits_in_uv)
1601 cdouble += (NV)aint;
1602 else
1603 cuv += aint;
a6ec74c1
JH
1604 }
1605 break;
08ca2aa3
TH
1606 case 'W':
1607 W_checksum:
1651fc44 1608 if (utf8) {
08ca2aa3 1609 while (len-- > 0 && s < strend) {
08ca2aa3 1610 STRLEN retlen;
f7fe979e 1611 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1612 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1613 if (retlen == (STRLEN) -1 || retlen == 0)
1614 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1615 s += retlen;
1616 if (!checksum)
6e449a3a 1617 mPUSHu(val);
08ca2aa3
TH
1618 else if (checksum > bits_in_uv)
1619 cdouble += (NV) val;
d6d3e8bd 1620 else
08ca2aa3 1621 cuv += val;
fc241834 1622 }
08ca2aa3 1623 } else if (!checksum)
a6ec74c1 1624 while (len-- > 0) {
f7fe979e 1625 const U8 ch = *(U8 *) s++;
6e449a3a 1626 mPUSHu(ch);
a6ec74c1 1627 }
08ca2aa3
TH
1628 else if (checksum > bits_in_uv)
1629 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1630 else
1631 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1632 break;
1633 case 'U':
35bcd338 1634 if (len == 0) {
08ca2aa3
TH
1635 if (explicit_length) {
1636 /* Switch to "bytes in UTF-8" mode */
f337b084 1637 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1638 else
1639 /* Should be impossible due to the need_utf8() test */
1640 Perl_croak(aTHX_ "U0 mode on a byte string");
1641 }
35bcd338
JH
1642 break;
1643 }
08ca2aa3 1644 if (len > strend - s) len = strend - s;
fc241834 1645 if (!checksum) {
08ca2aa3
TH
1646 if (len && unpack_only_one) len = 1;
1647 EXTEND(SP, len);
1648 EXTEND_MORTAL(len);
fc241834 1649 }
08ca2aa3
TH
1650 while (len-- > 0 && s < strend) {
1651 STRLEN retlen;
1652 UV auv;
1653 if (utf8) {
1654 U8 result[UTF8_MAXLEN];
f7fe979e 1655 const char *ptr = s;
08ca2aa3 1656 STRLEN len;
08ca2aa3
TH
1657 /* Bug: warns about bad utf8 even if we are short on bytes
1658 and will break out of the loop */
230e1fce
NC
1659 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1660 'U'))
08ca2aa3
TH
1661 break;
1662 len = UTF8SKIP(result);
fc241834 1663 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1664 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1665 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1666 s = ptr;
1667 } else {
1668 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1669 if (retlen == (STRLEN) -1 || retlen == 0)
1670 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1671 s += retlen;
1672 }
1673 if (!checksum)
6e449a3a 1674 mPUSHu(auv);
73cb7263 1675 else if (checksum > bits_in_uv)
08ca2aa3 1676 cdouble += (NV) auv;
73cb7263 1677 else
08ca2aa3 1678 cuv += auv;
a6ec74c1
JH
1679 }
1680 break;
49704364
WL
1681 case 's' | TYPE_IS_SHRIEKING:
1682#if SHORTSIZE != SIZE16
73cb7263 1683 while (len-- > 0) {
08ca2aa3 1684 short ashort;
f337b084
TH
1685 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1686 DO_BO_UNPACK(ashort, s);
08ca2aa3 1687 if (!checksum)
6e449a3a 1688 mPUSHi(ashort);
73cb7263
NC
1689 else if (checksum > bits_in_uv)
1690 cdouble += (NV)ashort;
1691 else
1692 cuv += ashort;
49704364
WL
1693 }
1694 break;
1695#else
1696 /* Fallthrough! */
a6ec74c1 1697#endif
49704364 1698 case 's':
73cb7263 1699 while (len-- > 0) {
08ca2aa3
TH
1700 I16 ai16;
1701
1702#if U16SIZE > SIZE16
1703 ai16 = 0;
1704#endif
f337b084 1705 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1706 DO_BO_UNPACK(ai16, 16);
1109a392 1707#if U16SIZE > SIZE16
73cb7263
NC
1708 if (ai16 > 32767)
1709 ai16 -= 65536;
a6ec74c1 1710#endif
08ca2aa3 1711 if (!checksum)
6e449a3a 1712 mPUSHi(ai16);
73cb7263
NC
1713 else if (checksum > bits_in_uv)
1714 cdouble += (NV)ai16;
1715 else
1716 cuv += ai16;
a6ec74c1
JH
1717 }
1718 break;
49704364
WL
1719 case 'S' | TYPE_IS_SHRIEKING:
1720#if SHORTSIZE != SIZE16
73cb7263 1721 while (len-- > 0) {
08ca2aa3 1722 unsigned short aushort;
f337b084
TH
1723 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1724 DO_BO_UNPACK(aushort, s);
08ca2aa3 1725 if (!checksum)
6e449a3a 1726 mPUSHu(aushort);
73cb7263
NC
1727 else if (checksum > bits_in_uv)
1728 cdouble += (NV)aushort;
1729 else
1730 cuv += aushort;
49704364
WL
1731 }
1732 break;
1733#else
1734 /* Fallhrough! */
1735#endif
a6ec74c1
JH
1736 case 'v':
1737 case 'n':
1738 case 'S':
73cb7263 1739 while (len-- > 0) {
08ca2aa3
TH
1740 U16 au16;
1741#if U16SIZE > SIZE16
1742 au16 = 0;
1743#endif
f337b084 1744 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1745 DO_BO_UNPACK(au16, 16);
a6ec74c1 1746#ifdef HAS_NTOHS
73cb7263
NC
1747 if (datumtype == 'n')
1748 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1749#endif
1750#ifdef HAS_VTOHS
73cb7263
NC
1751 if (datumtype == 'v')
1752 au16 = vtohs(au16);
a6ec74c1 1753#endif
08ca2aa3 1754 if (!checksum)
6e449a3a 1755 mPUSHu(au16);
73cb7263 1756 else if (checksum > bits_in_uv)
f337b084 1757 cdouble += (NV) au16;
73cb7263
NC
1758 else
1759 cuv += au16;
a6ec74c1
JH
1760 }
1761 break;
7212898e 1762#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1763 case 'v' | TYPE_IS_SHRIEKING:
1764 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1765 while (len-- > 0) {
08ca2aa3
TH
1766 I16 ai16;
1767# if U16SIZE > SIZE16
1768 ai16 = 0;
1769# endif
f337b084 1770 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1771# ifdef HAS_NTOHS
73cb7263 1772 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1773 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1774# endif /* HAS_NTOHS */
1775# ifdef HAS_VTOHS
73cb7263 1776 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1777 ai16 = (I16) vtohs((U16) ai16);
1778# endif /* HAS_VTOHS */
1779 if (!checksum)
6e449a3a 1780 mPUSHi(ai16);
73cb7263 1781 else if (checksum > bits_in_uv)
08ca2aa3 1782 cdouble += (NV) ai16;
73cb7263
NC
1783 else
1784 cuv += ai16;
068bd2e7
MHM
1785 }
1786 break;
08ca2aa3 1787#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1788 case 'i':
49704364 1789 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1790 while (len-- > 0) {
08ca2aa3 1791 int aint;
f337b084
TH
1792 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1793 DO_BO_UNPACK(aint, i);
08ca2aa3 1794 if (!checksum)
6e449a3a 1795 mPUSHi(aint);
73cb7263
NC
1796 else if (checksum > bits_in_uv)
1797 cdouble += (NV)aint;
1798 else
1799 cuv += aint;
a6ec74c1
JH
1800 }
1801 break;
1802 case 'I':
49704364 1803 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1804 while (len-- > 0) {
08ca2aa3 1805 unsigned int auint;
f337b084
TH
1806 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1807 DO_BO_UNPACK(auint, i);
08ca2aa3 1808 if (!checksum)
6e449a3a 1809 mPUSHu(auint);
73cb7263
NC
1810 else if (checksum > bits_in_uv)
1811 cdouble += (NV)auint;
1812 else
1813 cuv += auint;
a6ec74c1
JH
1814 }
1815 break;
92d41999 1816 case 'j':
73cb7263 1817 while (len-- > 0) {
08ca2aa3 1818 IV aiv;
f337b084 1819 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1820#if IVSIZE == INTSIZE
f337b084 1821 DO_BO_UNPACK(aiv, i);
1109a392 1822#elif IVSIZE == LONGSIZE
f337b084 1823 DO_BO_UNPACK(aiv, l);
1109a392 1824#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1825 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1826#else
1827 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1828#endif
08ca2aa3 1829 if (!checksum)
6e449a3a 1830 mPUSHi(aiv);
73cb7263
NC
1831 else if (checksum > bits_in_uv)
1832 cdouble += (NV)aiv;
1833 else
1834 cuv += aiv;
92d41999
JH
1835 }
1836 break;
1837 case 'J':
73cb7263 1838 while (len-- > 0) {
08ca2aa3 1839 UV auv;
f337b084 1840 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1841#if IVSIZE == INTSIZE
f337b084 1842 DO_BO_UNPACK(auv, i);
08ca2aa3 1843#elif IVSIZE == LONGSIZE
f337b084 1844 DO_BO_UNPACK(auv, l);
08ca2aa3 1845#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1846 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1847#else
1848 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1849#endif
08ca2aa3 1850 if (!checksum)
6e449a3a 1851 mPUSHu(auv);
73cb7263
NC
1852 else if (checksum > bits_in_uv)
1853 cdouble += (NV)auv;
1854 else
1855 cuv += auv;
92d41999
JH
1856 }
1857 break;
49704364
WL
1858 case 'l' | TYPE_IS_SHRIEKING:
1859#if LONGSIZE != SIZE32
73cb7263 1860 while (len-- > 0) {
08ca2aa3 1861 long along;
f337b084
TH
1862 SHIFT_VAR(utf8, s, strend, along, datumtype);
1863 DO_BO_UNPACK(along, l);
08ca2aa3 1864 if (!checksum)
6e449a3a 1865 mPUSHi(along);
73cb7263
NC
1866 else if (checksum > bits_in_uv)
1867 cdouble += (NV)along;
1868 else
1869 cuv += along;
49704364
WL
1870 }
1871 break;
1872#else
1873 /* Fallthrough! */
a6ec74c1 1874#endif
49704364 1875 case 'l':
73cb7263 1876 while (len-- > 0) {
08ca2aa3
TH
1877 I32 ai32;
1878#if U32SIZE > SIZE32
1879 ai32 = 0;
1880#endif
f337b084 1881 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1882 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1883#if U32SIZE > SIZE32
08ca2aa3 1884 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1885#endif
08ca2aa3 1886 if (!checksum)
6e449a3a 1887 mPUSHi(ai32);
73cb7263
NC
1888 else if (checksum > bits_in_uv)
1889 cdouble += (NV)ai32;
1890 else
1891 cuv += ai32;
a6ec74c1
JH
1892 }
1893 break;
49704364
WL
1894 case 'L' | TYPE_IS_SHRIEKING:
1895#if LONGSIZE != SIZE32
73cb7263 1896 while (len-- > 0) {
08ca2aa3 1897 unsigned long aulong;
f337b084
TH
1898 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1899 DO_BO_UNPACK(aulong, l);
08ca2aa3 1900 if (!checksum)
6e449a3a 1901 mPUSHu(aulong);
73cb7263
NC
1902 else if (checksum > bits_in_uv)
1903 cdouble += (NV)aulong;
1904 else
1905 cuv += aulong;
49704364
WL
1906 }
1907 break;
1908#else
1909 /* Fall through! */
1910#endif
a6ec74c1
JH
1911 case 'V':
1912 case 'N':
1913 case 'L':
73cb7263 1914 while (len-- > 0) {
08ca2aa3
TH
1915 U32 au32;
1916#if U32SIZE > SIZE32
1917 au32 = 0;
1918#endif
f337b084 1919 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1920 DO_BO_UNPACK(au32, 32);
a6ec74c1 1921#ifdef HAS_NTOHL
73cb7263
NC
1922 if (datumtype == 'N')
1923 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1924#endif
1925#ifdef HAS_VTOHL
73cb7263
NC
1926 if (datumtype == 'V')
1927 au32 = vtohl(au32);
a6ec74c1 1928#endif
08ca2aa3 1929 if (!checksum)
6e449a3a 1930 mPUSHu(au32);
fc241834
RGS
1931 else if (checksum > bits_in_uv)
1932 cdouble += (NV)au32;
1933 else
1934 cuv += au32;
a6ec74c1
JH
1935 }
1936 break;
7212898e 1937#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1938 case 'V' | TYPE_IS_SHRIEKING:
1939 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1940 while (len-- > 0) {
08ca2aa3
TH
1941 I32 ai32;
1942# if U32SIZE > SIZE32
1943 ai32 = 0;
1944# endif
f337b084 1945 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1946# ifdef HAS_NTOHL
73cb7263
NC
1947 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1948 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1949# endif
1950# ifdef HAS_VTOHL
73cb7263
NC
1951 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1952 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1953# endif
1954 if (!checksum)
6e449a3a 1955 mPUSHi(ai32);
73cb7263
NC
1956 else if (checksum > bits_in_uv)
1957 cdouble += (NV)ai32;
1958 else
1959 cuv += ai32;
068bd2e7
MHM
1960 }
1961 break;
08ca2aa3 1962#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1963 case 'p':
a6ec74c1 1964 while (len-- > 0) {
f7fe979e 1965 const char *aptr;
f337b084 1966 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1967 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1968 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1969 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1970 }
1971 break;
1972 case 'w':
a6ec74c1
JH
1973 {
1974 UV auv = 0;
1975 U32 bytes = 0;
fc241834 1976
08ca2aa3
TH
1977 while (len > 0 && s < strend) {
1978 U8 ch;
f337b084 1979 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1980 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1981 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1982 if (ch < 0x80) {
a6ec74c1 1983 bytes = 0;
6e449a3a 1984 mPUSHu(auv);
a6ec74c1
JH
1985 len--;
1986 auv = 0;
08ca2aa3 1987 continue;
a6ec74c1 1988 }
08ca2aa3 1989 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1990 const char *t;
a6ec74c1 1991
f5992bc4 1992 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1993 while (s < strend) {
f337b084 1994 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1995 sv = mul128(sv, (U8)(ch & 0x7f));
1996 if (!(ch & 0x80)) {
a6ec74c1
JH
1997 bytes = 0;
1998 break;
1999 }
2000 }
10516c54 2001 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2002 while (*t == '0')
2003 t++;
2004 sv_chop(sv, t);
6e449a3a 2005 mPUSHs(sv);
a6ec74c1
JH
2006 len--;
2007 auv = 0;
2008 }
2009 }
2010 if ((s >= strend) && bytes)
49704364 2011 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2012 }
2013 break;
2014 case 'P':
49704364
WL
2015 if (symptr->howlen == e_star)
2016 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2017 EXTEND(SP, 1);
2d3e0934 2018 if (s + sizeof(char*) <= strend) {
08ca2aa3 2019 char *aptr;
f337b084 2020 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2021 DO_BO_UNPACK_PC(aptr);
fc241834 2022 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2023 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2024 }
a6ec74c1
JH
2025 break;
2026#ifdef HAS_QUAD
2027 case 'q':
73cb7263 2028 while (len-- > 0) {
08ca2aa3 2029 Quad_t aquad;
f337b084
TH
2030 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2031 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2032 if (!checksum)
6e449a3a
MHM
2033 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2034 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2035 else if (checksum > bits_in_uv)
2036 cdouble += (NV)aquad;
2037 else
2038 cuv += aquad;
2039 }
a6ec74c1
JH
2040 break;
2041 case 'Q':
73cb7263 2042 while (len-- > 0) {
08ca2aa3 2043 Uquad_t auquad;
f337b084
TH
2044 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2045 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2046 if (!checksum)
6e449a3a
MHM
2047 mPUSHs(auquad <= UV_MAX ?
2048 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2049 else if (checksum > bits_in_uv)
2050 cdouble += (NV)auquad;
2051 else
2052 cuv += auquad;
a6ec74c1
JH
2053 }
2054 break;
08ca2aa3 2055#endif /* HAS_QUAD */
a6ec74c1
JH
2056 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2057 case 'f':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 float afloat;
f337b084 2060 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2061 DO_BO_UNPACK_N(afloat, float);
2062 if (!checksum)
6e449a3a 2063 mPUSHn(afloat);
08ca2aa3 2064 else
73cb7263 2065 cdouble += afloat;
fc241834 2066 }
a6ec74c1
JH
2067 break;
2068 case 'd':
73cb7263 2069 while (len-- > 0) {
08ca2aa3 2070 double adouble;
f337b084 2071 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2072 DO_BO_UNPACK_N(adouble, double);
2073 if (!checksum)
6e449a3a 2074 mPUSHn(adouble);
08ca2aa3 2075 else
73cb7263 2076 cdouble += adouble;
fc241834 2077 }
a6ec74c1 2078 break;
92d41999 2079 case 'F':
73cb7263 2080 while (len-- > 0) {
08ca2aa3 2081 NV anv;
f337b084 2082 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2083 DO_BO_UNPACK_N(anv, NV);
2084 if (!checksum)
6e449a3a 2085 mPUSHn(anv);
08ca2aa3 2086 else
73cb7263 2087 cdouble += anv;
fc241834 2088 }
92d41999
JH
2089 break;
2090#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2091 case 'D':
73cb7263 2092 while (len-- > 0) {
08ca2aa3 2093 long double aldouble;
f337b084 2094 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2095 DO_BO_UNPACK_N(aldouble, long double);
2096 if (!checksum)
6e449a3a 2097 mPUSHn(aldouble);
08ca2aa3
TH
2098 else
2099 cdouble += aldouble;
92d41999
JH
2100 }
2101 break;
2102#endif
a6ec74c1 2103 case 'u':
08ca2aa3 2104 {
f7fe979e 2105 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2106 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2107 if (l) SvPOK_on(sv);
2108 }
2109 if (utf8) {
2110 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2111 I32 a, b, c, d;
db187877 2112 char hunk[3];
08ca2aa3 2113
08ca2aa3
TH
2114 while (len > 0) {
2115 next_uni_uu(aTHX_ &s, strend, &a);
2116 next_uni_uu(aTHX_ &s, strend, &b);
2117 next_uni_uu(aTHX_ &s, strend, &c);
2118 next_uni_uu(aTHX_ &s, strend, &d);
2119 hunk[0] = (char)((a << 2) | (b >> 4));
2120 hunk[1] = (char)((b << 4) | (c >> 2));
2121 hunk[2] = (char)((c << 6) | d);
2122 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2123 len -= 3;
2124 }
2125 if (s < strend) {
f7fe979e
AL
2126 if (*s == '\n') {
2127 s++;
2128 }
08ca2aa3
TH
2129 else {
2130 /* possible checksum byte */
f7fe979e
AL
2131 const char *skip = s+UTF8SKIP(s);
2132 if (skip < strend && *skip == '\n')
2133 s = skip+1;
08ca2aa3
TH
2134 }
2135 }
2136 }
2137 } else {
fc241834
RGS
2138 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2139 I32 a, b, c, d;
db187877 2140 char hunk[3];
a6ec74c1 2141
fc241834
RGS
2142 len = PL_uudmap[*(U8*)s++] & 077;
2143 while (len > 0) {
2144 if (s < strend && ISUUCHAR(*s))
2145 a = PL_uudmap[*(U8*)s++] & 077;
2146 else
2147 a = 0;
2148 if (s < strend && ISUUCHAR(*s))
2149 b = PL_uudmap[*(U8*)s++] & 077;
2150 else
2151 b = 0;
2152 if (s < strend && ISUUCHAR(*s))
2153 c = PL_uudmap[*(U8*)s++] & 077;
2154 else
2155 c = 0;
2156 if (s < strend && ISUUCHAR(*s))
2157 d = PL_uudmap[*(U8*)s++] & 077;
2158 else
2159 d = 0;
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2164 len -= 3;
2165 }
2166 if (*s == '\n')
2167 s++;
2168 else /* possible checksum byte */
2169 if (s + 1 < strend && s[1] == '\n')
2170 s += 2;
a6ec74c1 2171 }
08ca2aa3
TH
2172 }
2173 XPUSHs(sv);
a6ec74c1
JH
2174 break;
2175 }
49704364 2176
a6ec74c1 2177 if (checksum) {
1109a392 2178 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2179 (checksum > bits_in_uv &&
08ca2aa3
TH
2180 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2181 NV trouble, anv;
a6ec74c1 2182
08ca2aa3 2183 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2184 while (checksum >= 16) {
2185 checksum -= 16;
08ca2aa3 2186 anv *= 65536.0;
a6ec74c1 2187 }
a6ec74c1 2188 while (cdouble < 0.0)
08ca2aa3
TH
2189 cdouble += anv;
2190 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2191 sv = newSVnv(cdouble);
a6ec74c1
JH
2192 }
2193 else {
fa8ec7c1
NC
2194 if (checksum < bits_in_uv) {
2195 UV mask = ((UV)1 << checksum) - 1;
92d41999 2196 cuv &= mask;
a6ec74c1 2197 }
c4c5f44a 2198 sv = newSVuv(cuv);
a6ec74c1 2199 }
6e449a3a 2200 mXPUSHs(sv);
a6ec74c1
JH
2201 checksum = 0;
2202 }
fc241834 2203
49704364
WL
2204 if (symptr->flags & FLAG_SLASH){
2205 if (SP - PL_stack_base - start_sp_offset <= 0)
2206 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2207 if( next_symbol(symptr) ){
2208 if( symptr->howlen == e_number )
2209 Perl_croak(aTHX_ "Count after length/code in unpack" );
2210 if( beyond ){
2211 /* ...end of char buffer then no decent length available */
2212 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2213 } else {
2214 /* take top of stack (hope it's numeric) */
2215 len = POPi;
2216 if( len < 0 )
2217 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2218 }
2219 } else {
2220 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2221 }
2222 datumtype = symptr->code;
21c16052 2223 explicit_length = FALSE;
49704364
WL
2224 goto redo_switch;
2225 }
a6ec74c1 2226 }
49704364 2227
18529408
IZ
2228 if (new_s)
2229 *new_s = s;
2230 PUTBACK;
2231 return SP - PL_stack_base - start_sp_offset;
2232}
2233
2234PP(pp_unpack)
2235{
97aff369 2236 dVAR;
18529408 2237 dSP;
bab9c0ac 2238 dPOPPOPssrl;
18529408
IZ
2239 I32 gimme = GIMME_V;
2240 STRLEN llen;
2241 STRLEN rlen;
5c144d81
NC
2242 const char *pat = SvPV_const(left, llen);
2243 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2244 const char *strend = s + rlen;
2245 const char *patend = pat + llen;
08ca2aa3 2246 I32 cnt;
18529408
IZ
2247
2248 PUTBACK;
7accc089 2249 cnt = unpackstring(pat, patend, s, strend,
49704364 2250 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2251 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2252
18529408
IZ
2253 SPAGAIN;
2254 if ( !cnt && gimme == G_SCALAR )
2255 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2256 RETURN;
2257}
2258
f337b084 2259STATIC U8 *
f7fe979e 2260doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2261{
f337b084 2262 *h++ = PL_uuemap[len];
a6ec74c1 2263 while (len > 2) {
f337b084
TH
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2267 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2268 s += 3;
2269 len -= 3;
2270 }
2271 if (len > 0) {
f7fe979e 2272 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2273 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2274 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2275 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2276 *h++ = PL_uuemap[0];
a6ec74c1 2277 }
f337b084
TH
2278 *h++ = '\n';
2279 return h;
a6ec74c1
JH
2280}
2281
2282STATIC SV *
f7fe979e 2283S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2284{
8b6e33c7
AL
2285 SV *result = newSVpvn(s, l);
2286 char *const result_c = SvPV_nolen(result); /* convenience */
2287 char *out = result_c;
2288 bool skip = 1;
2289 bool ignore = 0;
a6ec74c1 2290
7918f24d
NC
2291 PERL_ARGS_ASSERT_IS_AN_INT;
2292
a6ec74c1
JH
2293 while (*s) {
2294 switch (*s) {
2295 case ' ':
2296 break;
2297 case '+':
2298 if (!skip) {
2299 SvREFCNT_dec(result);
2300 return (NULL);
2301 }
2302 break;
2303 case '0':
2304 case '1':
2305 case '2':
2306 case '3':
2307 case '4':
2308 case '5':
2309 case '6':
2310 case '7':
2311 case '8':
2312 case '9':
2313 skip = 0;
2314 if (!ignore) {
2315 *(out++) = *s;
2316 }
2317 break;
2318 case '.':
2319 ignore = 1;
2320 break;
2321 default:
2322 SvREFCNT_dec(result);
2323 return (NULL);
2324 }
2325 s++;
2326 }
2327 *(out++) = '\0';
2328 SvCUR_set(result, out - result_c);
2329 return (result);
2330}
2331
2332/* pnum must be '\0' terminated */
2333STATIC int
2334S_div128(pTHX_ SV *pnum, bool *done)
2335{
8b6e33c7
AL
2336 STRLEN len;
2337 char * const s = SvPV(pnum, len);
2338 char *t = s;
2339 int m = 0;
2340
7918f24d
NC
2341 PERL_ARGS_ASSERT_DIV128;
2342
8b6e33c7
AL
2343 *done = 1;
2344 while (*t) {
2345 const int i = m * 10 + (*t - '0');
2346 const int r = (i >> 7); /* r < 10 */
2347 m = i & 0x7F;
2348 if (r) {
2349 *done = 0;
2350 }
2351 *(t++) = '0' + r;
a6ec74c1 2352 }
8b6e33c7
AL
2353 *(t++) = '\0';
2354 SvCUR_set(pnum, (STRLEN) (t - s));
2355 return (m);
a6ec74c1
JH
2356}
2357
18529408 2358/*
7accc089
JH
2359=for apidoc packlist
2360
2361The engine implementing pack() Perl function.
2362
bfce84ec
AL
2363=cut
2364*/
7accc089
JH
2365
2366void
f7fe979e 2367Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2368{
97aff369 2369 dVAR;
aadb217d
JH
2370 tempsym_t sym;
2371
7918f24d
NC
2372 PERL_ARGS_ASSERT_PACKLIST;
2373
f7fe979e 2374 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2375
f337b084
TH
2376 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2377 Also make sure any UTF8 flag is loaded */
56eb0262 2378 SvPV_force_nolen(cat);
bfce84ec
AL
2379 if (DO_UTF8(cat))
2380 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2381
49704364
WL
2382 (void)pack_rec( cat, &sym, beglist, endlist );
2383}
2384
f337b084
TH
2385/* like sv_utf8_upgrade, but also repoint the group start markers */
2386STATIC void
2387marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2388 STRLEN len;
2389 tempsym_t *group;
f7fe979e
AL
2390 const char *from_ptr, *from_start, *from_end, **marks, **m;
2391 char *to_start, *to_ptr;
f337b084
TH
2392
2393 if (SvUTF8(sv)) return;
2394
aa07b2f6 2395 from_start = SvPVX_const(sv);
f337b084
TH
2396 from_end = from_start + SvCUR(sv);
2397 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2398 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2399 if (from_ptr == from_end) {
2400 /* Simple case: no character needs to be changed */
2401 SvUTF8_on(sv);
2402 return;
2403 }
2404
3473cf63 2405 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2406 Newx(to_start, len, char);
f337b084
TH
2407 Copy(from_start, to_start, from_ptr-from_start, char);
2408 to_ptr = to_start + (from_ptr-from_start);
2409
a02a5408 2410 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2411 for (group=sym_ptr; group; group = group->previous)
2412 marks[group->level] = from_start + group->strbeg;
2413 marks[sym_ptr->level+1] = from_end+1;
2414 for (m = marks; *m < from_ptr; m++)
2415 *m = to_start + (*m-from_start);
2416
2417 for (;from_ptr < from_end; from_ptr++) {
2418 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2419 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2420 }
2421 *to_ptr = 0;
2422
2423 while (*m == from_ptr) *m++ = to_ptr;
2424 if (m != marks + sym_ptr->level+1) {
2425 Safefree(marks);
2426 Safefree(to_start);
5f5991a0 2427 Perl_croak(aTHX_ "panic: marks beyond string end");
f337b084
TH
2428 }
2429 for (group=sym_ptr; group; group = group->previous)
2430 group->strbeg = marks[group->level] - to_start;
2431 Safefree(marks);
2432
2433 if (SvOOK(sv)) {
2434 if (SvIVX(sv)) {
b162af07 2435 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2436 from_start -= SvIVX(sv);
2437 SvIV_set(sv, 0);
2438 }
2439 SvFLAGS(sv) &= ~SVf_OOK;
2440 }
2441 if (SvLEN(sv) != 0)
2442 Safefree(from_start);
f880fe2f 2443 SvPV_set(sv, to_start);
b162af07
SP
2444 SvCUR_set(sv, to_ptr - to_start);
2445 SvLEN_set(sv, len);
f337b084
TH
2446 SvUTF8_on(sv);
2447}
2448
2449/* Exponential string grower. Makes string extension effectively O(n)
2450 needed says how many extra bytes we need (not counting the final '\0')
2451 Only grows the string if there is an actual lack of space
2452*/
2453STATIC char *
0bd48802 2454S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2455 const STRLEN cur = SvCUR(sv);
2456 const STRLEN len = SvLEN(sv);
f337b084 2457 STRLEN extend;
7918f24d
NC
2458
2459 PERL_ARGS_ASSERT_SV_EXP_GROW;
2460
f337b084
TH
2461 if (len - cur > needed) return SvPVX(sv);
2462 extend = needed > len ? needed : len;
2463 return SvGROW(sv, len+extend+1);
2464}
49704364
WL
2465
2466STATIC
2467SV **
f337b084 2468S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2469{
97aff369 2470 dVAR;
49704364 2471 tempsym_t lookahead;
f337b084
TH
2472 I32 items = endlist - beglist;
2473 bool found = next_symbol(symptr);
2474 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2475 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2476
7918f24d
NC
2477 PERL_ARGS_ASSERT_PACK_REC;
2478
f337b084
TH
2479 if (symptr->level == 0 && found && symptr->code == 'U') {
2480 marked_upgrade(aTHX_ cat, symptr);
2481 symptr->flags |= FLAG_DO_UTF8;
2482 utf8 = 0;
49704364 2483 }
f337b084 2484 symptr->strbeg = SvCUR(cat);
49704364
WL
2485
2486 while (found) {
f337b084
TH
2487 SV *fromstr;
2488 STRLEN fromlen;
2489 I32 len;
a0714e2c 2490 SV *lengthcode = NULL;
49704364 2491 I32 datumtype = symptr->code;
f337b084
TH
2492 howlen_t howlen = symptr->howlen;
2493 char *start = SvPVX(cat);
2494 char *cur = start + SvCUR(cat);
49704364 2495
f337b084
TH
2496#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2497
2498 switch (howlen) {
fc241834 2499 case e_star:
f337b084
TH
2500 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2501 0 : items;
2502 break;
2503 default:
2504 /* e_no_len and e_number */
2505 len = symptr->length;
49704364
WL
2506 break;
2507 }
2508
f337b084 2509 if (len) {
a7a3cfaa 2510 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2511
a7a3cfaa
TH
2512 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2513 /* We can process this letter. */
2514 STRLEN size = props & PACK_SIZE_MASK;
2515 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2516 }
f337b084
TH
2517 }
2518
49704364
WL
2519 /* Look ahead for next symbol. Do we have code/code? */
2520 lookahead = *symptr;
2521 found = next_symbol(&lookahead);
246f24af
TH
2522 if (symptr->flags & FLAG_SLASH) {
2523 IV count;
f337b084 2524 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2525 if (strchr("aAZ", lookahead.code)) {
2526 if (lookahead.howlen == e_number) count = lookahead.length;
2527 else {
ce399ba6
NC
2528 if (items > 0) {
2529 if (SvGAMAGIC(*beglist)) {
2530 /* Avoid reading the active data more than once
2531 by copying it to a temporary. */
2532 STRLEN len;
2533 const char *const pv = SvPV_const(*beglist, len);
740cce10 2534 SV *const temp
59cd0e26
NC
2535 = newSVpvn_flags(pv, len,
2536 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2537 *beglist = temp;
2538 }
246f24af
TH
2539 count = DO_UTF8(*beglist) ?
2540 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2541 }
246f24af
TH
2542 else count = 0;
2543 if (lookahead.code == 'Z') count++;
2544 }
2545 } else {
2546 if (lookahead.howlen == e_number && lookahead.length < items)
2547 count = lookahead.length;
2548 else count = items;
2549 }
2550 lookahead.howlen = e_number;
2551 lookahead.length = count;
2552 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2553 }
49704364 2554
fc241834
RGS
2555 /* Code inside the switch must take care to properly update
2556 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2557 doesn't simply leave using break */
1109a392 2558 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2559 default:
f337b084
TH
2560 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2561 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2562 case '%':
49704364 2563 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2564 {
2565 char *from;
2566#ifdef PERL_PACK_CAN_SHRIEKSIGN
2567 case '.' | TYPE_IS_SHRIEKING:
2568#endif
2569 case '.':
2570 if (howlen == e_star) from = start;
2571 else if (len == 0) from = cur;
2572 else {
2573 tempsym_t *group = symptr;
2574
2575 while (--len && group) group = group->previous;
2576 from = group ? start + group->strbeg : start;
2577 }
2578 fromstr = NEXTFROM;
2579 len = SvIV(fromstr);
2580 goto resize;
2581#ifdef PERL_PACK_CAN_SHRIEKSIGN
2582 case '@' | TYPE_IS_SHRIEKING:
2583#endif
a6ec74c1 2584 case '@':
28be1210
TH
2585 from = start + symptr->strbeg;
2586 resize:
2587#ifdef PERL_PACK_CAN_SHRIEKSIGN
2588 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2589#else /* PERL_PACK_CAN_SHRIEKSIGN */
2590 if (utf8)
2591#endif
2592 if (len >= 0) {
2593 while (len && from < cur) {
2594 from += UTF8SKIP(from);
2595 len--;
2596 }
2597 if (from > cur)
2598 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2599 if (len) {
2600 /* Here we know from == cur */
2601 grow:
2602 GROWING(0, cat, start, cur, len);
2603 Zero(cur, len, char);
2604 cur += len;
2605 } else if (from < cur) {
2606 len = cur - from;
2607 goto shrink;
2608 } else goto no_change;
2609 } else {
2610 cur = from;
2611 len = -len;
2612 goto utf8_shrink;
f337b084 2613 }
28be1210
TH
2614 else {
2615 len -= cur - from;
f337b084 2616 if (len > 0) goto grow;
28be1210 2617 if (len == 0) goto no_change;
fc241834 2618 len = -len;
28be1210 2619 goto shrink;
f337b084 2620 }
a6ec74c1 2621 break;
28be1210 2622 }
fc241834 2623 case '(': {
49704364 2624 tempsym_t savsym = *symptr;
66c611c5
MHM
2625 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2626 symptr->flags |= group_modifiers;
49704364
WL
2627 symptr->patend = savsym.grpend;
2628 symptr->level++;
f337b084 2629 symptr->previous = &lookahead;
18529408 2630 while (len--) {
f337b084
TH
2631 U32 was_utf8;
2632 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2633 else symptr->flags &= ~FLAG_PARSE_UTF8;
2634 was_utf8 = SvUTF8(cat);
49704364 2635 symptr->patptr = savsym.grpbeg;
f337b084
TH
2636 beglist = pack_rec(cat, symptr, beglist, endlist);
2637 if (SvUTF8(cat) != was_utf8)
2638 /* This had better be an upgrade while in utf8==0 mode */
2639 utf8 = 1;
2640
49704364 2641 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2642 break; /* No way to continue */
2643 }
ee790063 2644 items = endlist - beglist;
f337b084
TH
2645 lookahead.flags = symptr->flags & ~group_modifiers;
2646 goto no_change;
18529408 2647 }
62f95557
IZ
2648 case 'X' | TYPE_IS_SHRIEKING:
2649 if (!len) /* Avoid division by 0 */
2650 len = 1;
f337b084
TH
2651 if (utf8) {
2652 char *hop, *last;
2653 I32 l = len;
2654 hop = last = start;
2655 while (hop < cur) {
2656 hop += UTF8SKIP(hop);
2657 if (--l == 0) {
2658 last = hop;
2659 l = len;
2660 }
2661 }
2662 if (last > cur)
2663 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2664 cur = last;
2665 break;
2666 }
2667 len = (cur-start) % len;
62f95557 2668 /* FALL THROUGH */
a6ec74c1 2669 case 'X':
f337b084
TH
2670 if (utf8) {
2671 if (len < 1) goto no_change;
28be1210 2672 utf8_shrink:
f337b084
TH
2673 while (len > 0) {
2674 if (cur <= start)
28be1210
TH
2675 Perl_croak(aTHX_ "'%c' outside of string in pack",
2676 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2677 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2678 if (cur <= start)
28be1210
TH
2679 Perl_croak(aTHX_ "'%c' outside of string in pack",
2680 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2681 }
2682 len--;
2683 }
2684 } else {
fc241834 2685 shrink:
f337b084 2686 if (cur - start < len)
28be1210
TH
2687 Perl_croak(aTHX_ "'%c' outside of string in pack",
2688 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2689 cur -= len;
2690 }
2691 if (cur < start+symptr->strbeg) {
2692 /* Make sure group starts don't point into the void */
2693 tempsym_t *group;
9e27e96a 2694 const STRLEN length = cur-start;
f337b084
TH
2695 for (group = symptr;
2696 group && length < group->strbeg;
2697 group = group->previous) group->strbeg = length;
2698 lookahead.strbeg = length;
2699 }
a6ec74c1 2700 break;
fc241834
RGS
2701 case 'x' | TYPE_IS_SHRIEKING: {
2702 I32 ai32;
62f95557
IZ
2703 if (!len) /* Avoid division by 0 */
2704 len = 1;
230e1fce 2705 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2706 else ai32 = (cur - start) % len;
2707 if (ai32 == 0) goto no_change;
2708 len -= ai32;
2709 }
2710 /* FALL THROUGH */
a6ec74c1 2711 case 'x':
f337b084 2712 goto grow;
a6ec74c1
JH
2713 case 'A':
2714 case 'Z':
f337b084 2715 case 'a': {
f7fe979e 2716 const char *aptr;
f337b084 2717
a6ec74c1 2718 fromstr = NEXTFROM;
e62f0680 2719 aptr = SvPV_const(fromstr, fromlen);
f337b084 2720 if (DO_UTF8(fromstr)) {
f7fe979e 2721 const char *end, *s;
f337b084
TH
2722
2723 if (!utf8 && !SvUTF8(cat)) {
2724 marked_upgrade(aTHX_ cat, symptr);
2725 lookahead.flags |= FLAG_DO_UTF8;
2726 lookahead.strbeg = symptr->strbeg;
2727 utf8 = 1;
2728 start = SvPVX(cat);
2729 cur = start + SvCUR(cat);
2730 }
fc241834 2731 if (howlen == e_star) {
f337b084
TH
2732 if (utf8) goto string_copy;
2733 len = fromlen+1;
2734 }
2735 s = aptr;
2736 end = aptr + fromlen;
2737 fromlen = datumtype == 'Z' ? len-1 : len;
2738 while ((I32) fromlen > 0 && s < end) {
2739 s += UTF8SKIP(s);
2740 fromlen--;
2741 }
2742 if (s > end)
2743 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2744 if (utf8) {
fc241834 2745 len = fromlen;
f337b084
TH
2746 if (datumtype == 'Z') len++;
2747 fromlen = s-aptr;
2748 len += fromlen;
fc241834 2749
f337b084 2750 goto string_copy;
fc241834 2751 }
f337b084
TH
2752 fromlen = len - fromlen;
2753 if (datumtype == 'Z') fromlen--;
2754 if (howlen == e_star) {
2755 len = fromlen;
2756 if (datumtype == 'Z') len++;
fc241834 2757 }
f337b084 2758 GROWING(0, cat, start, cur, len);
fc241834 2759 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2760 datumtype | TYPE_IS_PACK))
5f5991a0 2761 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
f337b084 2762 cur += fromlen;
a6ec74c1 2763 len -= fromlen;
f337b084
TH
2764 } else if (utf8) {
2765 if (howlen == e_star) {
2766 len = fromlen;
2767 if (datumtype == 'Z') len++;
a6ec74c1 2768 }
f337b084
TH
2769 if (len <= (I32) fromlen) {
2770 fromlen = len;
2771 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2772 }
fc241834 2773 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2774 upgrade, so:
2775 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2776 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2777 len -= fromlen;
2778 while (fromlen > 0) {
230e1fce 2779 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2780 aptr++;
2781 fromlen--;
fc241834 2782 }
f337b084
TH
2783 } else {
2784 string_copy:
2785 if (howlen == e_star) {
2786 len = fromlen;
2787 if (datumtype == 'Z') len++;
2788 }
2789 if (len <= (I32) fromlen) {
2790 fromlen = len;
2791 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2792 }
f337b084
TH
2793 GROWING(0, cat, start, cur, len);
2794 Copy(aptr, cur, fromlen, char);
2795 cur += fromlen;
2796 len -= fromlen;
a6ec74c1 2797 }
f337b084
TH
2798 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2799 cur += len;
3c4fb04a 2800 SvTAINT(cat);
a6ec74c1 2801 break;
f337b084 2802 }
a6ec74c1 2803 case 'B':
f337b084 2804 case 'b': {
b83604b4 2805 const char *str, *end;
f337b084
TH
2806 I32 l, field_len;
2807 U8 bits;
2808 bool utf8_source;
2809 U32 utf8_flags;
a6ec74c1 2810
fc241834 2811 fromstr = NEXTFROM;
b83604b4 2812 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2813 end = str + fromlen;
2814 if (DO_UTF8(fromstr)) {
2815 utf8_source = TRUE;
041457d9 2816 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2817 } else {
2818 utf8_source = FALSE;
2819 utf8_flags = 0; /* Unused, but keep compilers happy */
2820 }
2821 if (howlen == e_star) len = fromlen;
2822 field_len = (len+7)/8;
2823 GROWING(utf8, cat, start, cur, field_len);
2824 if (len > (I32)fromlen) len = fromlen;
2825 bits = 0;
2826 l = 0;
2827 if (datumtype == 'B')
2828 while (l++ < len) {
2829 if (utf8_source) {
95b63a38 2830 UV val = 0;
f337b084
TH
2831 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2832 bits |= val & 1;
2833 } else bits |= *str++ & 1;
2834 if (l & 7) bits <<= 1;
fc241834 2835 else {
f337b084
TH
2836 PUSH_BYTE(utf8, cur, bits);
2837 bits = 0;
a6ec74c1
JH
2838 }
2839 }
f337b084
TH
2840 else
2841 /* datumtype == 'b' */
2842 while (l++ < len) {
2843 if (utf8_source) {
95b63a38 2844 UV val = 0;
f337b084
TH
2845 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2846 if (val & 1) bits |= 0x80;
2847 } else if (*str++ & 1)
2848 bits |= 0x80;
2849 if (l & 7) bits >>= 1;
fc241834 2850 else {
f337b084
TH
2851 PUSH_BYTE(utf8, cur, bits);
2852 bits = 0;
a6ec74c1
JH
2853 }
2854 }
f337b084
TH
2855 l--;
2856 if (l & 7) {
fc241834 2857 if (datumtype == 'B')
f337b084 2858 bits <<= 7 - (l & 7);
fc241834 2859 else
f337b084
TH
2860 bits >>= 7 - (l & 7);
2861 PUSH_BYTE(utf8, cur, bits);
2862 l += 7;
a6ec74c1 2863 }
f337b084
TH
2864 /* Determine how many chars are left in the requested field */
2865 l /= 8;
2866 if (howlen == e_star) field_len = 0;
2867 else field_len -= l;
2868 Zero(cur, field_len, char);
2869 cur += field_len;
a6ec74c1 2870 break;
f337b084 2871 }
a6ec74c1 2872 case 'H':
f337b084 2873 case 'h': {
10516c54 2874 const char *str, *end;
f337b084
TH
2875 I32 l, field_len;
2876 U8 bits;
2877 bool utf8_source;
2878 U32 utf8_flags;
a6ec74c1 2879
fc241834 2880 fromstr = NEXTFROM;
10516c54 2881 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2882 end = str + fromlen;
2883 if (DO_UTF8(fromstr)) {
2884 utf8_source = TRUE;
041457d9 2885 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2886 } else {
2887 utf8_source = FALSE;
2888 utf8_flags = 0; /* Unused, but keep compilers happy */
2889 }
2890 if (howlen == e_star) len = fromlen;
2891 field_len = (len+1)/2;
2892 GROWING(utf8, cat, start, cur, field_len);
2893 if (!utf8 && len > (I32)fromlen) len = fromlen;
2894 bits = 0;
2895 l = 0;
2896 if (datumtype == 'H')
2897 while (l++ < len) {
2898 if (utf8_source) {
95b63a38 2899 UV val = 0;
f337b084
TH
2900 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2901 if (val < 256 && isALPHA(val))
2902 bits |= (val + 9) & 0xf;
a6ec74c1 2903 else
f337b084
TH
2904 bits |= val & 0xf;
2905 } else if (isALPHA(*str))
2906 bits |= (*str++ + 9) & 0xf;
2907 else
2908 bits |= *str++ & 0xf;
2909 if (l & 1) bits <<= 4;
fc241834 2910 else {
f337b084
TH
2911 PUSH_BYTE(utf8, cur, bits);
2912 bits = 0;
a6ec74c1
JH
2913 }
2914 }
f337b084
TH
2915 else
2916 while (l++ < len) {
2917 if (utf8_source) {
95b63a38 2918 UV val = 0;
f337b084
TH
2919 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2920 if (val < 256 && isALPHA(val))
2921 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2922 else
f337b084
TH
2923 bits |= (val & 0xf) << 4;
2924 } else if (isALPHA(*str))
2925 bits |= ((*str++ + 9) & 0xf) << 4;
2926 else
2927 bits |= (*str++ & 0xf) << 4;
2928 if (l & 1) bits >>= 4;
fc241834 2929 else {
f337b084
TH
2930 PUSH_BYTE(utf8, cur, bits);
2931 bits = 0;
a6ec74c1 2932 }
fc241834 2933 }
f337b084
TH
2934 l--;
2935 if (l & 1) {
2936 PUSH_BYTE(utf8, cur, bits);
2937 l++;
2938 }
2939 /* Determine how many chars are left in the requested field */
2940 l /= 2;
2941 if (howlen == e_star) field_len = 0;
2942 else field_len -= l;
2943 Zero(cur, field_len, char);
2944 cur += field_len;
2945 break;
fc241834
RGS
2946 }
2947 case 'c':
f337b084
TH
2948 while (len-- > 0) {
2949 IV aiv;
2950 fromstr = NEXTFROM;
2951 aiv = SvIV(fromstr);
a2a5de95
NC
2952 if ((-128 > aiv || aiv > 127))
2953 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2954 "Character in 'c' format wrapped in pack");
585ec06d 2955 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2956 }
2957 break;
2958 case 'C':
f337b084
TH
2959 if (len == 0) {
2960 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2961 break;
2962 }
a6ec74c1 2963 while (len-- > 0) {
f337b084 2964 IV aiv;
a6ec74c1 2965 fromstr = NEXTFROM;
f337b084 2966 aiv = SvIV(fromstr);
a2a5de95
NC
2967 if ((0 > aiv || aiv > 0xff))
2968 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2969 "Character in 'C' format wrapped in pack");
1651fc44 2970 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2971 }
fc241834
RGS
2972 break;
2973 case 'W': {
2974 char *end;
670f1322 2975 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2976
2977 end = start+SvLEN(cat)-1;
2978 if (utf8) end -= UTF8_MAXLEN-1;
2979 while (len-- > 0) {
2980 UV auv;
2981 fromstr = NEXTFROM;
2982 auv = SvUV(fromstr);
2983 if (in_bytes) auv = auv % 0x100;
2984 if (utf8) {
2985 W_utf8:
2986 if (cur > end) {
2987 *cur = '\0';
b162af07 2988 SvCUR_set(cat, cur - start);
fc241834
RGS
2989
2990 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2991 end = start+SvLEN(cat)-UTF8_MAXLEN;
2992 }
230e1fce
NC
2993 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2994 NATIVE_TO_UNI(auv),
041457d9 2995 warn_utf8 ?
230e1fce 2996 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2997 } else {
2998 if (auv >= 0x100) {
2999 if (!SvUTF8(cat)) {
3000 *cur = '\0';
b162af07 3001 SvCUR_set(cat, cur - start);
fc241834
RGS
3002 marked_upgrade(aTHX_ cat, symptr);
3003 lookahead.flags |= FLAG_DO_UTF8;
3004 lookahead.strbeg = symptr->strbeg;
3005 utf8 = 1;
3006 start = SvPVX(cat);
3007 cur = start + SvCUR(cat);
3008 end = start+SvLEN(cat)-UTF8_MAXLEN;
3009 goto W_utf8;
3010 }
a2a5de95
NC
3011 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3012 "Character in 'W' format wrapped in pack");
fc241834
RGS
3013 auv &= 0xff;
3014 }
3015 if (cur >= end) {
3016 *cur = '\0';
b162af07 3017 SvCUR_set(cat, cur - start);
fc241834
RGS
3018 GROWING(0, cat, start, cur, len+1);
3019 end = start+SvLEN(cat)-1;
3020 }
fe2774ed 3021 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3022 }
3023 }
3024 break;
fc241834
RGS
3025 }
3026 case 'U': {
3027 char *end;
3028
3029 if (len == 0) {
3030 if (!(symptr->flags & FLAG_DO_UTF8)) {
3031 marked_upgrade(aTHX_ cat, symptr);
3032 lookahead.flags |= FLAG_DO_UTF8;
3033 lookahead.strbeg = symptr->strbeg;
3034 }
3035 utf8 = 0;
3036 goto no_change;
3037 }
3038
3039 end = start+SvLEN(cat);
3040 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3041 while (len-- > 0) {
fc241834 3042 UV auv;
a6ec74c1 3043 fromstr = NEXTFROM;
fc241834
RGS
3044 auv = SvUV(fromstr);
3045 if (utf8) {
230e1fce 3046 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3047 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3048 warn_utf8 ?
fc241834
RGS
3049 0 : UNICODE_ALLOW_ANY);
3050 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3051 *cur = '\0';
b162af07 3052 SvCUR_set(cat, cur - start);
fc241834
RGS
3053 GROWING(0, cat, start, cur,
3054 len+(endb-buffer)*UTF8_EXPAND);
3055 end = start+SvLEN(cat);
3056 }
64844641 3057 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3058 } else {
3059 if (cur >= end) {
3060 *cur = '\0';
b162af07 3061 SvCUR_set(cat, cur - start);
fc241834
RGS
3062 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3063 end = start+SvLEN(cat)-UTF8_MAXLEN;
3064 }
230e1fce 3065 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3066 warn_utf8 ?
230e1fce 3067 0 : UNICODE_ALLOW_ANY);
fc241834 3068 }
a6ec74c1 3069 }
a6ec74c1 3070 break;
fc241834 3071 }
a6ec74c1
JH
3072 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3073 case 'f':
a6ec74c1 3074 while (len-- > 0) {
f337b084
TH
3075 float afloat;
3076 NV anv;
a6ec74c1 3077 fromstr = NEXTFROM;
f337b084 3078 anv = SvNV(fromstr);
5cdb9e01 3079#ifdef __VOS__
f337b084 3080 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3081 during conversion from double to float into infinity, so we
3082 do it by hand. This code should either be generalized for
3083 any OS that needs it, or removed if and when VOS implements
3084 posix-976 (suggestion to support mapping to infinity).
3085 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3086{
3087extern const float _float_constants[];
f337b084 3088 if (anv > FLT_MAX)
fc241834 3089 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3090 else if (anv < -FLT_MAX)
fc241834 3091 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3092 else afloat = (float) anv;
3722f0dc 3093}
f337b084 3094#else /* __VOS__ */
baf3cf9c 3095# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3096 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3097 * on Alpha; fake it if we don't have them.
3098 */
f337b084 3099 if (anv > FLT_MAX)
fc241834 3100 afloat = FLT_MAX;
f337b084 3101 else if (anv < -FLT_MAX)
fc241834 3102 afloat = -FLT_MAX;
f337b084 3103 else afloat = (float)anv;
baf3cf9c 3104# else
f337b084 3105 afloat = (float)anv;
baf3cf9c 3106# endif
f337b084 3107#endif /* __VOS__ */
1109a392 3108 DO_BO_PACK_N(afloat, float);
f337b084 3109 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3110 }
3111 break;
3112 case 'd':
a6ec74c1 3113 while (len-- > 0) {
f337b084
TH
3114 double adouble;
3115 NV anv;
a6ec74c1 3116 fromstr = NEXTFROM;
f337b084 3117 anv = SvNV(fromstr);
5cdb9e01 3118#ifdef __VOS__
f337b084 3119 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3120 during conversion from long double to double into infinity,
3121 so we do it by hand. This code should either be generalized
3122 for any OS that needs it, or removed if and when VOS
3123 implements posix-976 (suggestion to support mapping to
3124 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3125{
3126extern const double _double_constants[];
f337b084 3127 if (anv > DBL_MAX)
fc241834 3128 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3129 else if (anv < -DBL_MAX)
fc241834 3130 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3131 else adouble = (double) anv;
3722f0dc 3132}
f337b084 3133#else /* __VOS__ */
baf3cf9c 3134# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3135 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3136 * on Alpha; fake it if we don't have them.
3137 */
f337b084 3138 if (anv > DBL_MAX)
fc241834 3139 adouble = DBL_MAX;
f337b084 3140 else if (anv < -DBL_MAX)
fc241834 3141 adouble = -DBL_MAX;
f337b084 3142 else adouble = (double)anv;
baf3cf9c 3143# else
f337b084 3144 adouble = (double)anv;
baf3cf9c 3145# endif
f337b084 3146#endif /* __VOS__ */
1109a392 3147 DO_BO_PACK_N(adouble, double);
f337b084 3148 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3149 }
3150 break;
fc241834
RGS
3151 case 'F': {
3152 NV anv;
1109a392 3153 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3154 while (len-- > 0) {
3155 fromstr = NEXTFROM;
3156 anv = SvNV(fromstr);
1109a392 3157 DO_BO_PACK_N(anv, NV);
fc241834 3158 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3159 }
3160 break;
fc241834 3161 }
92d41999 3162#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3163 case 'D': {
3164 long double aldouble;
1109a392
MHM
3165 /* long doubles can have unused bits, which may be nonzero */
3166 Zero(&aldouble, 1, long double);
92d41999
JH
3167 while (len-- > 0) {
3168 fromstr = NEXTFROM;
3169 aldouble = (long double)SvNV(fromstr);
1109a392 3170 DO_BO_PACK_N(aldouble, long double);
fc241834 3171 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3172 }
3173 break;
fc241834 3174 }
92d41999 3175#endif
7212898e 3176#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3177 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3178#endif
a6ec74c1
JH
3179 case 'n':
3180 while (len-- > 0) {
f337b084 3181 I16 ai16;
a6ec74c1 3182 fromstr = NEXTFROM;
ef108786 3183 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3184#ifdef HAS_HTONS
ef108786 3185 ai16 = PerlSock_htons(ai16);
a6ec74c1 3186#endif
f337b084 3187 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3188 }
3189 break;
7212898e 3190#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3191 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3192#endif
a6ec74c1
JH
3193 case 'v':
3194 while (len-- > 0) {
f337b084 3195 I16 ai16;
a6ec74c1 3196 fromstr = NEXTFROM;
ef108786 3197 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3198#ifdef HAS_HTOVS
ef108786 3199 ai16 = htovs(ai16);
a6ec74c1 3200#endif
f337b084 3201 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3202 }
3203 break;
49704364 3204 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3205#if SHORTSIZE != SIZE16
fc241834 3206 while (len-- > 0) {
f337b084 3207 unsigned short aushort;
fc241834
RGS
3208 fromstr = NEXTFROM;
3209 aushort = SvUV(fromstr);
3210 DO_BO_PACK(aushort, s);
f337b084 3211 PUSH_VAR(utf8, cur, aushort);
fc241834 3212 }
49704364
WL
3213 break;
3214#else
3215 /* Fall through! */
a6ec74c1 3216#endif
49704364 3217 case 'S':
fc241834 3218 while (len-- > 0) {
f337b084 3219 U16 au16;
fc241834
RGS
3220 fromstr = NEXTFROM;
3221 au16 = (U16)SvUV(fromstr);
3222 DO_BO_PACK(au16, 16);
f337b084 3223 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3224 }
3225 break;
49704364 3226 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3227#if SHORTSIZE != SIZE16
fc241834 3228 while (len-- > 0) {
f337b084 3229 short ashort;
fc241834
RGS
3230 fromstr = NEXTFROM;
3231 ashort = SvIV(fromstr);
3232 DO_BO_PACK(ashort, s);
f337b084 3233 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3234 }
49704364
WL
3235 break;
3236#else
3237 /* Fall through! */
a6ec74c1 3238#endif
49704364
WL
3239 case 's':
3240 while (len-- > 0) {
f337b084 3241 I16 ai16;
49704364 3242 fromstr = NEXTFROM;
ef108786
MHM
3243 ai16 = (I16)SvIV(fromstr);
3244 DO_BO_PACK(ai16, 16);
f337b084 3245 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3246 }
3247 break;
3248 case 'I':
49704364 3249 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3250 while (len-- > 0) {
f337b084 3251 unsigned int auint;
a6ec74c1
JH
3252 fromstr = NEXTFROM;
3253 auint = SvUV(fromstr);
1109a392 3254 DO_BO_PACK(auint, i);
f337b084 3255 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3256 }
3257 break;
92d41999
JH
3258 case 'j':
3259 while (len-- > 0) {
f337b084 3260 IV aiv;
92d41999
JH
3261 fromstr = NEXTFROM;
3262 aiv = SvIV(fromstr);
1109a392
MHM
3263#if IVSIZE == INTSIZE
3264 DO_BO_PACK(aiv, i);
3265#elif IVSIZE == LONGSIZE
3266 DO_BO_PACK(aiv, l);
3267#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3268 DO_BO_PACK(aiv, 64);
f337b084
TH
3269#else
3270 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3271#endif
f337b084 3272 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3273 }
3274 break;
3275 case 'J':
3276 while (len-- > 0) {
f337b084 3277 UV auv;
92d41999
JH
3278 fromstr = NEXTFROM;
3279 auv = SvUV(fromstr);
1109a392
MHM
3280#if UVSIZE == INTSIZE
3281 DO_BO_PACK(auv, i);
3282#elif UVSIZE == LONGSIZE
3283 DO_BO_PACK(auv, l);
3284#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3285 DO_BO_PACK(auv, 64);
f337b084
TH
3286#else
3287 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3288#endif
f337b084 3289 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3290 }
3291 break;
a6ec74c1
JH
3292 case 'w':
3293 while (len-- > 0) {
f337b084 3294 NV anv;
a6ec74c1 3295 fromstr = NEXTFROM;
15e9f109 3296 anv = SvNV(fromstr);
a6ec74c1 3297
f337b084
TH
3298 if (anv < 0) {
3299 *cur = '\0';
b162af07 3300 SvCUR_set(cat, cur - start);
49704364 3301 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3302 }
a6ec74c1 3303
196b62db
NC
3304 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3305 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3306 any negative IVs will have already been got by the croak()
3307 above. IOK is untrue for fractions, so we test them
3308 against UV_MAX_P1. */
f337b084
TH
3309 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3310 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3311 char *in = buf + sizeof(buf);
196b62db 3312 UV auv = SvUV(fromstr);
a6ec74c1
JH
3313
3314 do {
eb160463 3315 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3316 auv >>= 7;
3317 } while (auv);
3318 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3319 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3320 in, (buf + sizeof(buf)) - in);
3321 } else if (SvPOKp(fromstr))
3322 goto w_string;
a6ec74c1 3323 else if (SvNOKp(fromstr)) {
0258719b
NC
3324 /* 10**NV_MAX_10_EXP is the largest power of 10
3325 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3326 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3327 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3328 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3329 Some C compilers are strict about integral constant
3330 expressions so we conservatively divide by a slightly
3331 smaller integer instead of multiplying by the exact
3332 floating-point value.
0258719b
NC
3333 */
3334#ifdef NV_MAX_10_EXP
f337b084 3335 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3336 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3337#else
f337b084 3338 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3339 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3340#endif
a6ec74c1
JH
3341 char *in = buf + sizeof(buf);
3342
8b6e33c7 3343 anv = Perl_floor(anv);
a6ec74c1 3344 do {
8b6e33c7 3345 const NV next = Perl_floor(anv / 128);
a6ec74c1 3346 if (in <= buf) /* this cannot happen ;-) */
49704364 3347 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3348 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3349 anv = next;
3350 } while (anv > 0);
a6ec74c1 3351 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3352 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3353 in, (buf + sizeof(buf)) - in);
3354 } else {
8b6e33c7
AL
3355 const char *from;
3356 char *result, *in;
735b914b
JH
3357 SV *norm;
3358 STRLEN len;
3359 bool done;
3360
f337b084 3361 w_string:
735b914b 3362 /* Copy string and check for compliance */
349d4f2f 3363 from = SvPV_const(fromstr, len);
735b914b 3364 if ((norm = is_an_int(from, len)) == NULL)
49704364 3365 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3366
a02a5408 3367 Newx(result, len, char);
735b914b
JH
3368 in = result + len;
3369 done = FALSE;
f337b084 3370 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3371 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3372 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3373 in, (result + len) - in);
735b914b
JH
3374 Safefree(result);
3375 SvREFCNT_dec(norm); /* free norm */
fc241834 3376 }
a6ec74c1
JH
3377 }
3378 break;
3379 case 'i':
49704364 3380 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3381 while (len-- > 0) {
f337b084 3382 int aint;
a6ec74c1
JH
3383 fromstr = NEXTFROM;
3384 aint = SvIV(fromstr);
1109a392 3385 DO_BO_PACK(aint, i);
f337b084 3386 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3387 }
3388 break;
7212898e 3389#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3390 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3391#endif
a6ec74c1
JH
3392 case 'N':
3393 while (len-- > 0) {
f337b084 3394 U32 au32;
a6ec74c1 3395 fromstr = NEXTFROM;
ef108786 3396 au32 = SvUV(fromstr);
a6ec74c1 3397#ifdef HAS_HTONL
ef108786 3398 au32 = PerlSock_htonl(au32);
a6ec74c1 3399#endif
f337b084 3400 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3401 }
3402 break;
7212898e 3403#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3404 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3405#endif
a6ec74c1
JH
3406 case 'V':
3407 while (len-- > 0) {
f337b084 3408 U32 au32;
a6ec74c1 3409 fromstr = NEXTFROM;
ef108786 3410 au32 = SvUV(fromstr);
a6ec74c1 3411#ifdef HAS_HTOVL
ef108786 3412 au32 = htovl(au32);
a6ec74c1 3413#endif
f337b084 3414 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3415 }
3416 break;
49704364 3417 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3418#if LONGSIZE != SIZE32
fc241834 3419 while (len-- > 0) {
f337b084 3420 unsigned long aulong;
fc241834
RGS
3421 fromstr = NEXTFROM;
3422 aulong = SvUV(fromstr);
3423 DO_BO_PACK(aulong, l);
f337b084 3424 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3425 }
49704364
WL
3426 break;
3427#else
3428 /* Fall though! */
a6ec74c1 3429#endif
49704364 3430 case 'L':
fc241834 3431 while (len-- > 0) {
f337b084 3432 U32 au32;
fc241834
RGS
3433 fromstr = NEXTFROM;
3434 au32 = SvUV(fromstr);
3435 DO_BO_PACK(au32, 32);
f337b084 3436 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3437 }
3438 break;
49704364 3439 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3440#if LONGSIZE != SIZE32
fc241834 3441 while (len-- > 0) {
f337b084 3442 long along;
fc241834
RGS
3443 fromstr = NEXTFROM;
3444 along = SvIV(fromstr);
3445 DO_BO_PACK(along, l);
f337b084 3446 PUSH_VAR(utf8, cur, along);
a6ec74c1 3447 }
49704364
WL
3448 break;
3449#else
3450 /* Fall though! */
a6ec74c1 3451#endif
49704364
WL
3452 case 'l':
3453 while (len-- > 0) {
f337b084 3454 I32 ai32;
49704364 3455 fromstr = NEXTFROM;
ef108786
MHM
3456 ai32 = SvIV(fromstr);
3457 DO_BO_PACK(ai32, 32);
f337b084 3458 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3459 }
3460 break;
3461#ifdef HAS_QUAD
3462 case 'Q':
3463 while (len-- > 0) {
f337b084 3464 Uquad_t auquad;
a6ec74c1 3465 fromstr = NEXTFROM;
f337b084 3466 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3467 DO_BO_PACK(auquad, 64);
f337b084 3468 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3469 }
3470 break;
3471 case 'q':
3472 while (len-- > 0) {
f337b084 3473 Quad_t aquad;
a6ec74c1
JH
3474 fromstr = NEXTFROM;
3475 aquad = (Quad_t)SvIV(fromstr);
1109a392 3476 DO_BO_PACK(aquad, 64);
f337b084 3477 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3478 }
3479 break;
f337b084 3480#endif /* HAS_QUAD */
a6ec74c1
JH
3481 case 'P':
3482 len = 1; /* assume SV is correct length */
f337b084 3483 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3484 /* Fall through! */
a6ec74c1
JH
3485 case 'p':
3486 while (len-- > 0) {
83003860 3487 const char *aptr;
f337b084 3488
a6ec74c1 3489 fromstr = NEXTFROM;
28a4f200
TH
3490 SvGETMAGIC(fromstr);
3491 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3492 else {
a6ec74c1
JH
3493 /* XXX better yet, could spirit away the string to
3494 * a safe spot and hang on to it until the result
3495 * of pack() (and all copies of the result) are
3496 * gone.
3497 */
041457d9 3498 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3499 !SvREADONLY(fromstr)))) {
3500 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3501 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3502 }
3503 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3504 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3505 else
2596d9fe 3506 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3507 }
07409e01 3508 DO_BO_PACK_PC(aptr);
f337b084 3509 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3510 }
3511 break;
fc241834 3512 case 'u': {
f7fe979e 3513 const char *aptr, *aend;
fc241834 3514 bool from_utf8;
f337b084 3515
a6ec74c1 3516 fromstr = NEXTFROM;
fc241834
RGS
3517 if (len <= 2) len = 45;
3518 else len = len / 3 * 3;
3519 if (len >= 64) {
a2a5de95
NC
3520 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3521 "Field too wide in 'u' format in pack");
fc241834
RGS
3522 len = 63;
3523 }
83003860 3524 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3525 from_utf8 = DO_UTF8(fromstr);
3526 if (from_utf8) {
3527 aend = aptr + fromlen;
3528 fromlen = sv_len_utf8(fromstr);
3529 } else aend = NULL; /* Unused, but keep compilers happy */
3530 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3531 while (fromlen > 0) {
fc241834 3532 U8 *end;
a6ec74c1 3533 I32 todo;
fc241834 3534 U8 hunk[1+63/3*4+1];
a6ec74c1 3535
eb160463 3536 if ((I32)fromlen > len)
a6ec74c1
JH
3537 todo = len;
3538 else
3539 todo = fromlen;
fc241834
RGS
3540 if (from_utf8) {
3541 char buffer[64];
3542 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3543 'u' | TYPE_IS_PACK)) {
3544 *cur = '\0';
b162af07 3545 SvCUR_set(cat, cur - start);
5f5991a0 3546 Perl_croak(aTHX_ "panic: string is shorter than advertised");
fc241834
RGS
3547 }
3548 end = doencodes(hunk, buffer, todo);
3549 } else {
3550 end = doencodes(hunk, aptr, todo);
3551 aptr += todo;
3552 }
3553 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3554 fromlen -= todo;
3555 }
a6ec74c1
JH
3556 break;
3557 }
f337b084
TH
3558 }
3559 *cur = '\0';
b162af07 3560 SvCUR_set(cat, cur - start);
f337b084 3561 no_change:
49704364 3562 *symptr = lookahead;
a6ec74c1 3563 }
49704364 3564 return beglist;
18529408
IZ
3565}
3566#undef NEXTFROM
3567
3568
3569PP(pp_pack)
3570{
97aff369 3571 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
18529408
IZ
3572 register SV *cat = TARG;
3573 STRLEN fromlen;
349d4f2f
NC
3574 SV *pat_sv = *++MARK;
3575 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3576 register const char *patend = pat + fromlen;
18529408
IZ
3577
3578 MARK++;
76f68e9b 3579 sv_setpvs(cat, "");
f337b084 3580 SvUTF8_off(cat);
18529408 3581
7accc089 3582 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3583
a6ec74c1
JH
3584 SvSETMAGIC(cat);
3585 SP = ORIGMARK;
3586 PUSHs(cat);
3587 RETURN;
3588}
a6ec74c1 3589
73cb7263
NC
3590/*
3591 * Local variables:
3592 * c-indentation-style: bsd
3593 * c-basic-offset: 4
3594 * indent-tabs-mode: t
3595 * End:
3596 *
37442d52
RGS
3597 * ex: set ts=8 sts=4 sw=4 noet:
3598 */