This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
combopatch
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084 121#define PUSH_VAR(utf8, aptr, var) \
230e1fce 122 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 123
49704364
LW
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e 185#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 186# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 187#else
28be1210 188# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
07409e01
NC
200# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
07409e01 232# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 235 case TYPE_IS_BIG_ENDIAN: \
07409e01 236 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 239 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
07409e01 246# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 249 case TYPE_IS_BIG_ENDIAN: \
07409e01 250 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 253 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e 276# if PTRSIZE == INTSIZE
07409e01
NC
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
279# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
280# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 281# elif PTRSIZE == LONGSIZE
07409e01
NC
282# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
283# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
284# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
285# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
7212898e
NC
286# else
287# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
288# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
289# endif
1109a392 290
7212898e 291# if defined(my_htolen) && defined(my_letohn) && \
1109a392 292 defined(my_htoben) && defined(my_betohn)
7212898e 293# define DO_BO_UNPACK_N(var, type) \
1109a392 294 STMT_START { \
66c611c5 295 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
296 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
297 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
298 default: break; \
299 } \
300 } STMT_END
301
7212898e 302# define DO_BO_PACK_N(var, type) \
1109a392 303 STMT_START { \
66c611c5 304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
306 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
307 default: break; \
308 } \
309 } STMT_END
7212898e
NC
310# else
311# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
312# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
313# endif
314
f337b084 315#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 316
78d46eaa 317#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 318#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
319#define PACK_SIZE_MASK 0x3F
320
78d46eaa
NC
321/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
322 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
323
324#if TYPE_IS_SHRIEKING != 0x100
325 ++++shriek offset should be 256
326#endif
327
328typedef U8 packprops_t;
78d46eaa
NC
329#if 'J'-'I' == 1
330/* ASCII */
a7a3cfaa
TH
331const packprops_t packprops[512] = {
332 /* normal */
333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 0, 0, 0,
338 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 339#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 340 /* D */ LONG_DOUBLESIZE,
80a13697 341#else
a7a3cfaa 342 0,
80a13697 343#endif
a7a3cfaa
TH
344 0,
345 /* F */ NVSIZE,
346 0, 0,
347 /* I */ sizeof(unsigned int),
348 /* J */ UVSIZE,
349 0,
350 /* L */ SIZE32,
351 0,
352 /* N */ SIZE32,
353 0, 0,
80a13697 354#if defined(HAS_QUAD)
a7a3cfaa 355 /* Q */ sizeof(Uquad_t),
80a13697 356#else
a7a3cfaa 357 0,
80a13697 358#endif
a7a3cfaa
TH
359 0,
360 /* S */ SIZE16,
361 0,
362 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
363 /* V */ SIZE32,
364 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 /* c */ sizeof(char),
367 /* d */ sizeof(double),
368 0,
369 /* f */ sizeof(float),
370 0, 0,
371 /* i */ sizeof(int),
372 /* j */ IVSIZE,
373 0,
374 /* l */ SIZE32,
375 0,
376 /* n */ SIZE16,
377 0,
378 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 379#if defined(HAS_QUAD)
a7a3cfaa 380 /* q */ sizeof(Quad_t),
80a13697 381#else
a7a3cfaa 382 0,
80a13697 383#endif
a7a3cfaa
TH
384 0,
385 /* s */ SIZE16,
386 0, 0,
387 /* v */ SIZE16,
388 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
394 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0,
398 /* shrieking */
399 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
401 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 0, 0, 0, 0, 0, 0, 0, 0, 0,
404 /* I */ sizeof(unsigned int),
405 0, 0,
406 /* L */ sizeof(unsigned long),
407 0,
7212898e 408#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 409 /* N */ SIZE32,
7212898e 410#else
a7a3cfaa 411 0,
7212898e 412#endif
a7a3cfaa
TH
413 0, 0, 0, 0,
414 /* S */ sizeof(unsigned short),
415 0, 0,
7212898e 416#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 417 /* V */ SIZE32,
7212898e 418#else
a7a3cfaa 419 0,
7212898e 420#endif
a7a3cfaa
TH
421 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
422 0, 0,
423 /* i */ sizeof(int),
424 0, 0,
425 /* l */ sizeof(long),
426 0,
7212898e 427#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 428 /* n */ SIZE16,
7212898e 429#else
a7a3cfaa 430 0,
7212898e 431#endif
a7a3cfaa
TH
432 0, 0, 0, 0,
433 /* s */ sizeof(short),
434 0, 0,
7212898e 435#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 436 /* v */ SIZE16,
7212898e 437#else
a7a3cfaa 438 0,
7212898e 439#endif
a7a3cfaa
TH
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
449};
450#else
451/* EBCDIC (or bust) */
a7a3cfaa
TH
452const packprops_t packprops[512] = {
453 /* normal */
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0,
463 /* c */ sizeof(char),
464 /* d */ sizeof(double),
465 0,
466 /* f */ sizeof(float),
467 0, 0,
468 /* i */ sizeof(int),
469 0, 0, 0, 0, 0, 0, 0,
470 /* j */ IVSIZE,
471 0,
472 /* l */ SIZE32,
473 0,
474 /* n */ SIZE16,
475 0,
476 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 477#if defined(HAS_QUAD)
a7a3cfaa 478 /* q */ sizeof(Quad_t),
80a13697 479#else
a7a3cfaa 480 0,
80a13697 481#endif
a7a3cfaa
TH
482 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 /* s */ SIZE16,
484 0, 0,
485 /* v */ SIZE16,
486 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 490#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 491 /* D */ LONG_DOUBLESIZE,
80a13697 492#else
a7a3cfaa 493 0,
80a13697 494#endif
a7a3cfaa
TH
495 0,
496 /* F */ NVSIZE,
497 0, 0,
498 /* I */ sizeof(unsigned int),
499 0, 0, 0, 0, 0, 0, 0,
500 /* J */ UVSIZE,
501 0,
502 /* L */ SIZE32,
503 0,
504 /* N */ SIZE32,
505 0, 0,
80a13697 506#if defined(HAS_QUAD)
a7a3cfaa 507 /* Q */ sizeof(Uquad_t),
80a13697 508#else
a7a3cfaa 509 0,
80a13697 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 /* S */ SIZE16,
513 0,
514 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
515 /* V */ SIZE32,
516 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 /* shrieking */
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* i */ sizeof(int),
530 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 /* l */ sizeof(long),
532 0,
7212898e 533#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 534 /* n */ SIZE16,
7212898e 535#else
a7a3cfaa 536 0,
7212898e 537#endif
a7a3cfaa
TH
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 /* s */ sizeof(short),
540 0, 0,
7212898e 541#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 542 /* v */ SIZE16,
7212898e 543#else
a7a3cfaa 544 0,
7212898e 545#endif
a7a3cfaa
TH
546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548 0, 0, 0,
549 /* I */ sizeof(unsigned int),
550 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 /* L */ sizeof(unsigned long),
552 0,
7212898e 553#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 554 /* N */ SIZE32,
7212898e 555#else
a7a3cfaa 556 0,
7212898e 557#endif
a7a3cfaa
TH
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 /* S */ sizeof(unsigned short),
560 0, 0,
7212898e 561#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 562 /* V */ SIZE32,
7212898e 563#else
a7a3cfaa 564 0,
7212898e 565#endif
a7a3cfaa
TH
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
568};
569#endif
570
08ca2aa3 571STATIC U8
f337b084 572uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
573{
574 UV val;
575 STRLEN retlen;
f337b084
TH
576 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
577 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
578 /* We try to process malformed UTF-8 as much as possible (preferrably with
579 warnings), but these two mean we make no progress in the string and
580 might enter an infinite loop */
581 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
582 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
583 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 584 if (val >= 0x100) {
f337b084 585 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
586 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
587 "Character in '%c' format wrapped in unpack",
f337b084 588 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
589 val &= 0xff;
590 }
591 *s += retlen;
fe2774ed 592 return (U8)val;
08ca2aa3
TH
593}
594
f337b084
TH
595#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
596 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
597 *(U8 *)(s)++)
598
599STATIC bool
f337b084 600uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
601{
602 UV val;
603 STRLEN retlen;
604 char *from = *s;
605 int bad = 0;
606 U32 flags = ckWARN(WARN_UTF8) ?
607 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
608 for (;buf_len > 0; buf_len--) {
609 if (from >= end) return FALSE;
f337b084 610 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
611 if (retlen == (STRLEN) -1 || retlen == 0) {
612 from += UTF8SKIP(from);
613 bad |= 1;
614 } else from += retlen;
615 if (val >= 0x100) {
616 bad |= 2;
617 val &= 0xff;
618 }
fe2774ed 619 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
620 }
621 /* We have enough characters for the buffer. Did we have problems ? */
622 if (bad) {
623 if (bad & 1) {
624 /* Rewalk the string fragment while warning */
625 char *ptr;
9e27e96a 626 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
627 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
628 if (ptr >= end) break;
f337b084 629 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
630 }
631 if (from > end) from = end;
632 }
633 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 634 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
635 WARN_PACK : WARN_UNPACK),
636 "Character(s) in '%c' format wrapped in %s",
fc241834 637 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 638 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
639 }
640 *s = from;
641 return TRUE;
642}
643
644STATIC bool
645next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
646{
647 UV val;
648 STRLEN retlen;
f337b084 649 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
650 if (val >= 0x100 || !ISUUCHAR(val) ||
651 retlen == (STRLEN) -1 || retlen == 0) {
652 *out = 0;
653 return FALSE;
654 }
655 *out = PL_uudmap[val] & 077;
f337b084 656 *s += retlen;
08ca2aa3
TH
657 return TRUE;
658}
78d46eaa 659
f337b084
TH
660STATIC void
661bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
662 U8 buffer[UTF8_MAXLEN];
663 U8 *end = start + len;
664 char *d = *dest;
665 while (start < end) {
666 int length =
667 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
668 switch(length) {
669 case 1:
670 *d++ = buffer[0];
671 break;
672 case 2:
673 *d++ = buffer[0];
674 *d++ = buffer[1];
675 break;
676 default:
677 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
678 *start, length);
679 }
680 start++;
681 }
682 *dest = d;
683}
684
230e1fce
NC
685#define PUSH_BYTES(utf8, cur, buf, len) \
686STMT_START { \
687 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
688 else { \
689 Copy(buf, cur, len, char); \
690 (cur) += (len); \
691 } \
f337b084
TH
692} STMT_END
693
694#define GROWING(utf8, cat, start, cur, in_len) \
695STMT_START { \
696 STRLEN glen = (in_len); \
3473cf63 697 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
698 if ((cur) + glen >= (start) + SvLEN(cat)) { \
699 (start) = sv_exp_grow(aTHX_ cat, glen); \
700 (cur) = (start) + SvCUR(cat); \
701 } \
702} STMT_END
703
704#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
705STMT_START { \
706 STRLEN glen = (in_len); \
707 STRLEN gl = glen; \
3473cf63 708 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
709 if ((cur) + gl >= (start) + SvLEN(cat)) { \
710 *cur = '\0'; \
b162af07 711 SvCUR_set((cat), (cur) - (start)); \
f337b084
TH
712 (start) = sv_exp_grow(aTHX_ cat, gl); \
713 (cur) = (start) + SvCUR(cat); \
714 } \
715 PUSH_BYTES(utf8, cur, buf, glen); \
716} STMT_END
717
718#define PUSH_BYTE(utf8, s, byte) \
719STMT_START { \
720 if (utf8) { \
721 U8 au8 = (byte); \
722 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
723 } else *(U8 *)(s)++ = (byte); \
724} STMT_END
725
726/* Only to be used inside a loop (see the break) */
727#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
728STMT_START { \
729 STRLEN retlen; \
730 if (str >= end) break; \
731 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
732 if (retlen == (STRLEN) -1 || retlen == 0) { \
733 *cur = '\0'; \
734 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
735 } \
736 str += retlen; \
737} STMT_END
738
206947d2 739/* Returns the sizeof() struct described by pat */
028d1f6d 740STATIC I32
f337b084 741S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 742{
f337b084 743 I32 total = 0;
206947d2 744
49704364 745 while (next_symbol(symptr)) {
f337b084
TH
746 I32 len;
747 int star, size;
f337b084
TH
748
749 switch (symptr->howlen) {
fc241834 750 case e_star:
49704364
LW
751 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
752 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
753 break;
f337b084
TH
754 default:
755 /* e_no_len and e_number */
756 len = symptr->length;
757 break;
49704364
LW
758 }
759
a7a3cfaa 760 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
761 if (!size) {
762 /* endianness doesn't influence the size of a type */
763 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
764 default:
765 Perl_croak(aTHX_ "Invalid type '%c' in %s",
766 (int)TYPE_NO_MODIFIERS(symptr->code),
767 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
28be1210
TH
768#ifdef PERL_PACK_CAN_SHRIEKSIGN
769 case '.' | TYPE_IS_SHRIEKING:
770 case '@' | TYPE_IS_SHRIEKING:
771#endif
80a13697 772 case '@':
28be1210 773 case '.':
80a13697
NC
774 case '/':
775 case 'U': /* XXXX Is it correct? */
776 case 'w':
777 case 'u':
778 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 779 (int) TYPE_NO_MODIFIERS(symptr->code),
80a13697
NC
780 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
781 case '%':
782 size = 0;
783 break;
784 case '(':
fc241834
RGS
785 {
786 tempsym_t savsym = *symptr;
787 symptr->patptr = savsym.grpbeg;
788 symptr->patend = savsym.grpend;
789 /* XXXX Theoretically, we need to measure many times at
790 different positions, since the subexpression may contain
791 alignment commands, but be not of aligned length.
792 Need to detect this and croak(). */
793 size = measure_struct(symptr);
794 *symptr = savsym;
795 break;
796 }
80a13697
NC
797 case 'X' | TYPE_IS_SHRIEKING:
798 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
799 */
800 if (!len) /* Avoid division by 0 */
801 len = 1;
802 len = total % len; /* Assumed: the start is aligned. */
803 /* FALL THROUGH */
804 case 'X':
805 size = -1;
806 if (total < len)
807 Perl_croak(aTHX_ "'X' outside of string in %s",
808 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
809 break;
810 case 'x' | TYPE_IS_SHRIEKING:
811 if (!len) /* Avoid division by 0 */
812 len = 1;
813 star = total % len; /* Assumed: the start is aligned. */
814 if (star) /* Other portable ways? */
815 len = len - star;
816 else
817 len = 0;
818 /* FALL THROUGH */
819 case 'x':
820 case 'A':
821 case 'Z':
822 case 'a':
80a13697
NC
823 size = 1;
824 break;
825 case 'B':
826 case 'b':
827 len = (len + 7)/8;
828 size = 1;
829 break;
830 case 'H':
831 case 'h':
832 len = (len + 1)/2;
833 size = 1;
834 break;
78d46eaa 835
80a13697
NC
836 case 'P':
837 len = 1;
838 size = sizeof(char*);
78d46eaa
NC
839 break;
840 }
206947d2
IZ
841 }
842 total += len * size;
843 }
844 return total;
845}
846
49704364
LW
847
848/* locate matching closing parenthesis or bracket
849 * returns char pointer to char after match, or NULL
850 */
851STATIC char *
852S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 853{
49704364
LW
854 while (patptr < patend) {
855 char c = *patptr++;
856
857 if (isSPACE(c))
858 continue;
859 else if (c == ender)
860 return patptr-1;
861 else if (c == '#') {
862 while (patptr < patend && *patptr != '\n')
863 patptr++;
864 continue;
865 } else if (c == '(')
866 patptr = group_end(patptr, patend, ')') + 1;
867 else if (c == '[')
868 patptr = group_end(patptr, patend, ']') + 1;
18529408 869 }
49704364
LW
870 Perl_croak(aTHX_ "No group ending character '%c' found in template",
871 ender);
872 return 0;
18529408
IZ
873}
874
49704364
LW
875
876/* Convert unsigned decimal number to binary.
877 * Expects a pointer to the first digit and address of length variable
878 * Advances char pointer to 1st non-digit char and returns number
fc241834 879 */
18529408 880STATIC char *
49704364
LW
881S_get_num(pTHX_ register char *patptr, I32 *lenptr )
882{
883 I32 len = *patptr++ - '0';
884 while (isDIGIT(*patptr)) {
885 if (len >= 0x7FFFFFFF/10)
886 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
887 len = (len * 10) + (*patptr++ - '0');
888 }
889 *lenptr = len;
890 return patptr;
891}
892
893/* The marvellous template parsing routine: Using state stored in *symptr,
894 * locates next template code and count
895 */
896STATIC bool
f337b084 897S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 898{
fc241834
RGS
899 char* patptr = symptr->patptr;
900 char* patend = symptr->patend;
f337b084 901 const char *allowed = "";
49704364
LW
902
903 symptr->flags &= ~FLAG_SLASH;
904
905 while (patptr < patend) {
906 if (isSPACE(*patptr))
907 patptr++;
908 else if (*patptr == '#') {
909 patptr++;
910 while (patptr < patend && *patptr != '\n')
911 patptr++;
912 if (patptr < patend)
913 patptr++;
914 } else {
fc241834 915 /* We should have found a template code */
49704364 916 I32 code = *patptr++ & 0xFF;
66c611c5 917 U32 inherited_modifiers = 0;
49704364
LW
918
919 if (code == ','){ /* grandfather in commas but with a warning */
920 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
921 symptr->flags |= FLAG_COMMA;
922 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
923 "Invalid type ',' in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 }
926 continue;
927 }
fc241834 928
49704364 929 /* for '(', skip to ')' */
fc241834 930 if (code == '(') {
49704364
LW
931 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
932 Perl_croak(aTHX_ "()-group starts with a count in %s",
933 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
934 symptr->grpbeg = patptr;
935 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
936 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
937 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
938 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
939 }
940
66c611c5
MHM
941 /* look for group modifiers to inherit */
942 if (TYPE_ENDIANNESS(symptr->flags)) {
943 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
944 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
945 }
946
1109a392
MHM
947 /* look for modifiers */
948 while (patptr < patend) {
1109a392
MHM
949 I32 modifier = 0;
950 switch (*patptr) {
951 case '!':
952 modifier = TYPE_IS_SHRIEKING;
7212898e 953 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 954 break;
7212898e 955#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
956 case '>':
957 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 958 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
959 break;
960 case '<':
961 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 962 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 963 break;
f337b084 964#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
965 default:
966 break;
967 }
66c611c5 968
1109a392
MHM
969 if (modifier == 0)
970 break;
66c611c5 971
1109a392
MHM
972 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
973 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
974 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
975
976 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
977 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
978 (int) TYPE_NO_MODIFIERS(code),
979 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
980 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
981 TYPE_ENDIANNESS_MASK)
982 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
983 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
984
1109a392
MHM
985 if (ckWARN(WARN_UNPACK)) {
986 if (code & modifier)
987 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
988 "Duplicate modifier '%c' after '%c' in %s",
989 *patptr, (int) TYPE_NO_MODIFIERS(code),
990 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
991 }
66c611c5 992
1109a392
MHM
993 code |= modifier;
994 patptr++;
49704364
LW
995 }
996
66c611c5
MHM
997 /* inherit modifiers */
998 code |= inherited_modifiers;
999
fc241834 1000 /* look for count and/or / */
49704364
LW
1001 if (patptr < patend) {
1002 if (isDIGIT(*patptr)) {
1003 patptr = get_num( patptr, &symptr->length );
1004 symptr->howlen = e_number;
1005
1006 } else if (*patptr == '*') {
1007 patptr++;
1008 symptr->howlen = e_star;
1009
1010 } else if (*patptr == '[') {
fc241834 1011 char* lenptr = ++patptr;
49704364
LW
1012 symptr->howlen = e_number;
1013 patptr = group_end( patptr, patend, ']' ) + 1;
1014 /* what kind of [] is it? */
1015 if (isDIGIT(*lenptr)) {
1016 lenptr = get_num( lenptr, &symptr->length );
1017 if( *lenptr != ']' )
1018 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1019 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1020 } else {
1021 tempsym_t savsym = *symptr;
1022 symptr->patend = patptr-1;
1023 symptr->patptr = lenptr;
1024 savsym.length = measure_struct(symptr);
1025 *symptr = savsym;
1026 }
1027 } else {
1028 symptr->howlen = e_no_len;
1029 symptr->length = 1;
1030 }
1031
1032 /* try to find / */
1033 while (patptr < patend) {
1034 if (isSPACE(*patptr))
1035 patptr++;
1036 else if (*patptr == '#') {
1037 patptr++;
1038 while (patptr < patend && *patptr != '\n')
1039 patptr++;
1040 if (patptr < patend)
1041 patptr++;
1042 } else {
66c611c5 1043 if (*patptr == '/') {
49704364
LW
1044 symptr->flags |= FLAG_SLASH;
1045 patptr++;
66c611c5
MHM
1046 if (patptr < patend &&
1047 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
LW
1048 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1049 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1050 }
1051 break;
1052 }
18529408 1053 }
49704364
LW
1054 } else {
1055 /* at end - no count, no / */
1056 symptr->howlen = e_no_len;
1057 symptr->length = 1;
1058 }
1059
1060 symptr->code = code;
fc241834 1061 symptr->patptr = patptr;
49704364 1062 return TRUE;
18529408 1063 }
49704364 1064 }
fc241834 1065 symptr->patptr = patptr;
49704364 1066 return FALSE;
18529408
IZ
1067}
1068
18529408 1069/*
fc241834 1070 There is no way to cleanly handle the case where we should process the
08ca2aa3 1071 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1072 (e.g. estimates like strend-s as an upper bound for the number of
1073 characters left wouldn't work). So if we foresee the need of this
1074 (pattern starts with U or contains U0), we want to work on the encoded
1075 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1076 themselves if they need to do a lot of unpacks like this on it
1077*/
fc241834 1078STATIC bool
08ca2aa3
TH
1079need_utf8(const char *pat, const char *patend)
1080{
1081 bool first = TRUE;
1082 while (pat < patend) {
1083 if (pat[0] == '#') {
1084 pat++;
07409e01 1085 pat = (char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1086 if (!pat) return FALSE;
1087 } else if (pat[0] == 'U') {
1088 if (first || pat[1] == '0') return TRUE;
1089 } else first = FALSE;
1090 pat++;
1091 }
1092 return FALSE;
1093}
1094
1095STATIC char
1096first_symbol(const char *pat, const char *patend) {
1097 while (pat < patend) {
1098 if (pat[0] != '#') return pat[0];
1099 pat++;
07409e01 1100 pat = (char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1101 if (!pat) return 0;
1102 pat++;
1103 }
1104 return 0;
1105}
1106
1107/*
18529408
IZ
1108=for apidoc unpack_str
1109
7accc089
JH
1110The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1111and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1112
1113=cut */
1114
1115I32
f337b084 1116Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1117{
9e27e96a
AL
1118 tempsym_t sym = { NULL, NULL, NULL, NULL, 0, 0, 0, 0, 0, 0, NULL };
1119 (void)strbeg;
1120 (void)new_s;
1121 (void)ocnt;
08ca2aa3 1122
f337b084 1123 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1124 else if (need_utf8(pat, patend)) {
1125 /* We probably should try to avoid this in case a scalar context call
1126 wouldn't get to the "U0" */
1127 STRLEN len = strend - s;
230e1fce 1128 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1129 SAVEFREEPV(s);
1130 strend = s + len;
f337b084 1131 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1132 }
1133
f337b084
TH
1134 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1135 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1136
49704364
LW
1137 sym.patptr = pat;
1138 sym.patend = patend;
1139 sym.flags = flags;
1140
1141 return unpack_rec(&sym, s, s, strend, NULL );
1142}
1143
7accc089
JH
1144/*
1145=for apidoc unpackstring
1146
608d3aed
LW
1147The engine implementing unpack() Perl function. C<unpackstring> puts the
1148extracted list items on the stack and returns the number of elements.
1149Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1150
1151=cut */
1152
1153I32
f337b084 1154Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089 1155{
9e27e96a 1156 tempsym_t sym = { NULL, NULL, NULL, NULL, 0, 0, 0, 0, 0, 0, NULL };
08ca2aa3 1157
f337b084 1158 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1159 else if (need_utf8(pat, patend)) {
1160 /* We probably should try to avoid this in case a scalar context call
1161 wouldn't get to the "U0" */
1162 STRLEN len = strend - s;
230e1fce 1163 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1164 SAVEFREEPV(s);
1165 strend = s + len;
f337b084 1166 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1167 }
1168
f337b084
TH
1169 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1170 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1171
7accc089
JH
1172 sym.patptr = pat;
1173 sym.patend = patend;
1174 sym.flags = flags;
1175
1176 return unpack_rec(&sym, s, s, strend, NULL );
1177}
1178
49704364
LW
1179STATIC
1180I32
08ca2aa3 1181S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1182{
27da23d5 1183 dVAR; dSP;
18529408
IZ
1184 SV *sv;
1185 I32 start_sp_offset = SP - PL_stack_base;
49704364 1186 howlen_t howlen;
a6ec74c1 1187
a6ec74c1 1188 I32 checksum = 0;
92d41999 1189 UV cuv = 0;
a6ec74c1 1190 NV cdouble = 0.0;
f337b084 1191 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1192 bool beyond = FALSE;
21c16052 1193 bool explicit_length;
9e27e96a 1194 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1195 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1196 symptr->strbeg = s - strbeg;
49704364 1197
49704364 1198 while (next_symbol(symptr)) {
a7a3cfaa 1199 packprops_t props;
9e27e96a 1200 I32 len;
f337b084 1201 I32 datumtype = symptr->code;
206947d2 1202 /* do first one only unless in list context
08ca2aa3 1203 / is implemented by unpacking the count, then popping it from the
206947d2 1204 stack, so must check that we're not in the middle of a / */
49704364 1205 if ( unpack_only_one
206947d2 1206 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1207 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1208 break;
49704364 1209
f337b084 1210 switch (howlen = symptr->howlen) {
fc241834
RGS
1211 case e_star:
1212 len = strend - strbeg; /* long enough */
49704364 1213 break;
f337b084
TH
1214 default:
1215 /* e_no_len and e_number */
1216 len = symptr->length;
1217 break;
49704364 1218 }
18529408 1219
21c16052 1220 explicit_length = TRUE;
a6ec74c1 1221 redo_switch:
49704364 1222 beyond = s >= strend;
a7a3cfaa
TH
1223
1224 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1225 if (props) {
1226 /* props nonzero means we can process this letter. */
9e27e96a
AL
1227 const long size = props & PACK_SIZE_MASK;
1228 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1229 if (len > howmany)
1230 len = howmany;
1231
1232 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1233 if (len && unpack_only_one) len = 1;
1234 EXTEND(SP, len);
1235 EXTEND_MORTAL(len);
78d46eaa
NC
1236 }
1237 }
a7a3cfaa 1238
1109a392 1239 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1240 default:
1109a392 1241 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1242
a6ec74c1 1243 case '%':
49704364 1244 if (howlen == e_no_len)
18529408 1245 len = 16; /* len is not specified */
a6ec74c1 1246 checksum = len;
92d41999 1247 cuv = 0;
a6ec74c1 1248 cdouble = 0;
18529408 1249 continue;
a6ec74c1 1250 break;
18529408
IZ
1251 case '(':
1252 {
49704364 1253 tempsym_t savsym = *symptr;
9e27e96a 1254 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1255 symptr->flags |= group_modifiers;
49704364 1256 symptr->patend = savsym.grpend;
28be1210 1257 symptr->previous = &savsym;
49704364 1258 symptr->level++;
18529408
IZ
1259 PUTBACK;
1260 while (len--) {
49704364 1261 symptr->patptr = savsym.grpbeg;
f337b084
TH
1262 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1263 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1264 unpack_rec(symptr, s, strbeg, strend, &s);
1265 if (s == strend && savsym.howlen == e_star)
49704364 1266 break; /* No way to continue */
18529408
IZ
1267 }
1268 SPAGAIN;
28be1210 1269 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1270 *symptr = savsym;
18529408
IZ
1271 break;
1272 }
28be1210
TH
1273#ifdef PERL_PACK_CAN_SHRIEKSIGN
1274 case '.' | TYPE_IS_SHRIEKING:
1275#endif
1276 case '.': {
9e27e96a 1277 const char *from;
28be1210
TH
1278 SV *sv;
1279#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1280 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1281#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1282 const bool u8 = utf8;
28be1210
TH
1283#endif
1284 if (howlen == e_star) from = strbeg;
1285 else if (len <= 0) from = s;
1286 else {
1287 tempsym_t *group = symptr;
1288
1289 while (--len && group) group = group->previous;
1290 from = group ? strbeg + group->strbeg : strbeg;
1291 }
1292 sv = from <= s ?
00646304
CB
1293 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1294 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1295 XPUSHs(sv_2mortal(sv));
1296 break;
1297 }
1298#ifdef PERL_PACK_CAN_SHRIEKSIGN
1299 case '@' | TYPE_IS_SHRIEKING:
1300#endif
a6ec74c1 1301 case '@':
28be1210
TH
1302 s = strbeg + symptr->strbeg;
1303#ifdef PERL_PACK_CAN_SHRIEKSIGN
1304 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1305#else /* PERL_PACK_CAN_SHRIEKSIGN */
1306 if (utf8)
1307#endif
1308 {
08ca2aa3
TH
1309 while (len > 0) {
1310 if (s >= strend)
1311 Perl_croak(aTHX_ "'@' outside of string in unpack");
1312 s += UTF8SKIP(s);
1313 len--;
1314 }
1315 if (s > strend)
1316 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1317 } else {
28be1210 1318 if (strend-s < len)
fc241834 1319 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1320 s += len;
08ca2aa3 1321 }
a6ec74c1 1322 break;
62f95557
IZ
1323 case 'X' | TYPE_IS_SHRIEKING:
1324 if (!len) /* Avoid division by 0 */
1325 len = 1;
08ca2aa3
TH
1326 if (utf8) {
1327 char *hop, *last;
f337b084
TH
1328 I32 l = len;
1329 hop = last = strbeg;
1330 while (hop < s) {
1331 hop += UTF8SKIP(hop);
1332 if (--l == 0) {
08ca2aa3 1333 last = hop;
f337b084
TH
1334 l = len;
1335 }
fc241834 1336 }
f337b084
TH
1337 if (last > s)
1338 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1339 s = last;
1340 break;
f337b084
TH
1341 }
1342 len = (s - strbeg) % len;
62f95557 1343 /* FALL THROUGH */
a6ec74c1 1344 case 'X':
08ca2aa3
TH
1345 if (utf8) {
1346 while (len > 0) {
1347 if (s <= strbeg)
1348 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1349 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1350 if (s <= strbeg)
1351 Perl_croak(aTHX_ "'X' outside of string in unpack");
1352 }
1353 len--;
1354 }
1355 } else {
fc241834
RGS
1356 if (len > s - strbeg)
1357 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1358 s -= len;
08ca2aa3 1359 }
a6ec74c1 1360 break;
9e27e96a
AL
1361 case 'x' | TYPE_IS_SHRIEKING: {
1362 I32 ai32;
62f95557
IZ
1363 if (!len) /* Avoid division by 0 */
1364 len = 1;
230e1fce
NC
1365 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1366 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1367 if (ai32 == 0) break;
1368 len -= ai32;
9e27e96a 1369 }
62f95557 1370 /* FALL THROUGH */
a6ec74c1 1371 case 'x':
08ca2aa3
TH
1372 if (utf8) {
1373 while (len>0) {
1374 if (s >= strend)
1375 Perl_croak(aTHX_ "'x' outside of string in unpack");
1376 s += UTF8SKIP(s);
1377 len--;
1378 }
1379 } else {
fc241834
RGS
1380 if (len > strend - s)
1381 Perl_croak(aTHX_ "'x' outside of string in unpack");
1382 s += len;
f337b084 1383 }
a6ec74c1
JH
1384 break;
1385 case '/':
49704364
LW
1386 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1387 break;
a6ec74c1
JH
1388 case 'A':
1389 case 'Z':
1390 case 'a':
08ca2aa3
TH
1391 if (checksum) {
1392 /* Preliminary length estimate is assumed done in 'W' */
1393 if (len > strend - s) len = strend - s;
1394 goto W_checksum;
1395 }
1396 if (utf8) {
1397 I32 l;
1398 char *hop;
1399 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1400 if (hop >= strend) {
1401 if (hop > strend)
1402 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1403 break;
fc241834 1404 }
a6ec74c1 1405 }
08ca2aa3
TH
1406 if (hop > strend)
1407 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1408 len = hop - s;
1409 } else if (len > strend - s)
1410 len = strend - s;
1411
1412 if (datumtype == 'Z') {
1413 /* 'Z' strips stuff after first null */
f337b084
TH
1414 char *ptr, *end;
1415 end = s + len;
1416 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1417 sv = newSVpvn(s, ptr-s);
1418 if (howlen == e_star) /* exact for 'Z*' */
1419 len = ptr-s + (ptr != strend ? 1 : 0);
1420 } else if (datumtype == 'A') {
1421 /* 'A' strips both nulls and spaces */
1422 char *ptr;
18bdf90a
TH
1423 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1424 for (ptr = s+len-1; ptr >= s; ptr--)
1425 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1426 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1427 if (ptr >= s) ptr += UTF8SKIP(ptr);
1428 else ptr++;
28be1210 1429 if (ptr > s+len)
18bdf90a
TH
1430 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1431 } else {
1432 for (ptr = s+len-1; ptr >= s; ptr--)
1433 if (*ptr != 0 && !isSPACE(*ptr)) break;
1434 ptr++;
1435 }
08ca2aa3
TH
1436 sv = newSVpvn(s, ptr-s);
1437 } else sv = newSVpvn(s, len);
1438
1439 if (utf8) {
1440 SvUTF8_on(sv);
1441 /* Undo any upgrade done due to need_utf8() */
f337b084 1442 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1443 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1444 }
1445 XPUSHs(sv_2mortal(sv));
08ca2aa3 1446 s += len;
a6ec74c1
JH
1447 break;
1448 case 'B':
08ca2aa3
TH
1449 case 'b': {
1450 char *str;
49704364 1451 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1452 len = (strend - s) * 8;
1453 if (checksum) {
1454 if (!PL_bitcount) {
08ca2aa3 1455 int bits;
a6ec74c1
JH
1456 Newz(601, PL_bitcount, 256, char);
1457 for (bits = 1; bits < 256; bits++) {
1458 if (bits & 1) PL_bitcount[bits]++;
1459 if (bits & 2) PL_bitcount[bits]++;
1460 if (bits & 4) PL_bitcount[bits]++;
1461 if (bits & 8) PL_bitcount[bits]++;
1462 if (bits & 16) PL_bitcount[bits]++;
1463 if (bits & 32) PL_bitcount[bits]++;
1464 if (bits & 64) PL_bitcount[bits]++;
1465 if (bits & 128) PL_bitcount[bits]++;
1466 }
1467 }
f337b084 1468 if (utf8)
08ca2aa3 1469 while (len >= 8 && s < strend) {
f337b084 1470 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1471 len -= 8;
1472 }
f337b084 1473 else
fc241834 1474 while (len >= 8) {
08ca2aa3 1475 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1476 len -= 8;
1477 }
08ca2aa3
TH
1478 if (len && s < strend) {
1479 U8 bits;
f337b084
TH
1480 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1481 if (datumtype == 'b')
a6ec74c1 1482 while (len-- > 0) {
92d41999 1483 if (bits & 1) cuv++;
a6ec74c1
JH
1484 bits >>= 1;
1485 }
f337b084 1486 else
a6ec74c1 1487 while (len-- > 0) {
08ca2aa3 1488 if (bits & 0x80) cuv++;
a6ec74c1
JH
1489 bits <<= 1;
1490 }
fc241834 1491 }
a6ec74c1
JH
1492 break;
1493 }
08ca2aa3
TH
1494
1495 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1496 SvPOK_on(sv);
1497 str = SvPVX(sv);
1498 if (datumtype == 'b') {
f337b084 1499 U8 bits = 0;
9e27e96a 1500 I32 ai32 = len;
08ca2aa3
TH
1501 for (len = 0; len < ai32; len++) {
1502 if (len & 7) bits >>= 1;
1503 else if (utf8) {
1504 if (s >= strend) break;
f337b084 1505 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1506 } else bits = *(U8 *) s++;
1507 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1508 }
08ca2aa3 1509 } else {
f337b084 1510 U8 bits = 0;
9e27e96a 1511 I32 ai32 = len;
08ca2aa3
TH
1512 for (len = 0; len < ai32; len++) {
1513 if (len & 7) bits <<= 1;
1514 else if (utf8) {
1515 if (s >= strend) break;
f337b084 1516 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1517 } else bits = *(U8 *) s++;
1518 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1519 }
1520 }
1521 *str = '\0';
08ca2aa3
TH
1522 SvCUR_set(sv, str - SvPVX(sv));
1523 XPUSHs(sv);
a6ec74c1 1524 break;
08ca2aa3 1525 }
a6ec74c1 1526 case 'H':
08ca2aa3
TH
1527 case 'h': {
1528 char *str;
fc241834 1529 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1530 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1531 len = (strend - s) * 2;
fc241834 1532 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1533 SvPOK_on(sv);
1534 str = SvPVX(sv);
1535 if (datumtype == 'h') {
f337b084 1536 U8 bits = 0;
9e27e96a 1537 I32 ai32 = len;
fc241834
RGS
1538 for (len = 0; len < ai32; len++) {
1539 if (len & 1) bits >>= 4;
1540 else if (utf8) {
1541 if (s >= strend) break;
f337b084 1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1543 } else bits = * (U8 *) s++;
a6ec74c1
JH
1544 *str++ = PL_hexdigit[bits & 15];
1545 }
08ca2aa3 1546 } else {
f337b084 1547 U8 bits = 0;
9e27e96a 1548 I32 ai32 = len;
08ca2aa3
TH
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 1) bits <<= 4;
1551 else if (utf8) {
1552 if (s >= strend) break;
f337b084 1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1554 } else bits = *(U8 *) s++;
a6ec74c1
JH
1555 *str++ = PL_hexdigit[(bits >> 4) & 15];
1556 }
1557 }
1558 *str = '\0';
08ca2aa3
TH
1559 SvCUR_set(sv, str - SvPVX(sv));
1560 XPUSHs(sv);
a6ec74c1 1561 break;
08ca2aa3 1562 }
a6ec74c1 1563 case 'c':
73cb7263 1564 while (len-- > 0) {
f337b084 1565 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1566 if (aint >= 128) /* fake up signed chars */
1567 aint -= 256;
08ca2aa3 1568 if (!checksum)
ac7f3b1b 1569 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1570 else if (checksum > bits_in_uv)
1571 cdouble += (NV)aint;
1572 else
1573 cuv += aint;
a6ec74c1
JH
1574 }
1575 break;
1576 case 'C':
08ca2aa3
TH
1577 case 'W':
1578 W_checksum:
35bcd338 1579 if (len == 0) {
fc241834 1580 if (explicit_length && datumtype == 'C')
08ca2aa3 1581 /* Switch to "character" mode */
f337b084 1582 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1583 break;
1584 }
fc241834 1585 if (datumtype == 'C' ?
f337b084
TH
1586 (symptr->flags & FLAG_DO_UTF8) &&
1587 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1588 while (len-- > 0 && s < strend) {
1589 UV val;
1590 STRLEN retlen;
f337b084
TH
1591 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1592 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1593 if (retlen == (STRLEN) -1 || retlen == 0)
1594 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1595 s += retlen;
1596 if (!checksum)
1597 PUSHs(sv_2mortal(newSVuv((UV) val)));
1598 else if (checksum > bits_in_uv)
1599 cdouble += (NV) val;
d6d3e8bd 1600 else
08ca2aa3 1601 cuv += val;
fc241834 1602 }
08ca2aa3 1603 } else if (!checksum)
a6ec74c1 1604 while (len-- > 0) {
08ca2aa3
TH
1605 U8 ch = *(U8 *) s++;
1606 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1607 }
08ca2aa3
TH
1608 else if (checksum > bits_in_uv)
1609 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1610 else
1611 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1612 break;
1613 case 'U':
35bcd338 1614 if (len == 0) {
08ca2aa3
TH
1615 if (explicit_length) {
1616 /* Switch to "bytes in UTF-8" mode */
f337b084 1617 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1618 else
1619 /* Should be impossible due to the need_utf8() test */
1620 Perl_croak(aTHX_ "U0 mode on a byte string");
1621 }
35bcd338
JH
1622 break;
1623 }
08ca2aa3 1624 if (len > strend - s) len = strend - s;
fc241834 1625 if (!checksum) {
08ca2aa3
TH
1626 if (len && unpack_only_one) len = 1;
1627 EXTEND(SP, len);
1628 EXTEND_MORTAL(len);
fc241834 1629 }
08ca2aa3
TH
1630 while (len-- > 0 && s < strend) {
1631 STRLEN retlen;
1632 UV auv;
1633 if (utf8) {
1634 U8 result[UTF8_MAXLEN];
1635 char *ptr;
1636 STRLEN len;
1637 ptr = s;
1638 /* Bug: warns about bad utf8 even if we are short on bytes
1639 and will break out of the loop */
230e1fce
NC
1640 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1641 'U'))
08ca2aa3
TH
1642 break;
1643 len = UTF8SKIP(result);
fc241834 1644 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1645 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1646 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1647 s = ptr;
1648 } else {
1649 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1650 if (retlen == (STRLEN) -1 || retlen == 0)
1651 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1652 s += retlen;
1653 }
1654 if (!checksum)
1655 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1656 else if (checksum > bits_in_uv)
08ca2aa3 1657 cdouble += (NV) auv;
73cb7263 1658 else
08ca2aa3 1659 cuv += auv;
a6ec74c1
JH
1660 }
1661 break;
49704364
LW
1662 case 's' | TYPE_IS_SHRIEKING:
1663#if SHORTSIZE != SIZE16
73cb7263 1664 while (len-- > 0) {
08ca2aa3 1665 short ashort;
f337b084
TH
1666 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1667 DO_BO_UNPACK(ashort, s);
08ca2aa3 1668 if (!checksum)
ac7f3b1b 1669 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1670 else if (checksum > bits_in_uv)
1671 cdouble += (NV)ashort;
1672 else
1673 cuv += ashort;
49704364
LW
1674 }
1675 break;
1676#else
1677 /* Fallthrough! */
a6ec74c1 1678#endif
49704364 1679 case 's':
73cb7263 1680 while (len-- > 0) {
08ca2aa3
TH
1681 I16 ai16;
1682
1683#if U16SIZE > SIZE16
1684 ai16 = 0;
1685#endif
f337b084 1686 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1687 DO_BO_UNPACK(ai16, 16);
1109a392 1688#if U16SIZE > SIZE16
73cb7263
NC
1689 if (ai16 > 32767)
1690 ai16 -= 65536;
a6ec74c1 1691#endif
08ca2aa3 1692 if (!checksum)
ac7f3b1b 1693 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1694 else if (checksum > bits_in_uv)
1695 cdouble += (NV)ai16;
1696 else
1697 cuv += ai16;
a6ec74c1
JH
1698 }
1699 break;
49704364
LW
1700 case 'S' | TYPE_IS_SHRIEKING:
1701#if SHORTSIZE != SIZE16
73cb7263 1702 while (len-- > 0) {
08ca2aa3 1703 unsigned short aushort;
f337b084
TH
1704 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1705 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1706 if (!checksum)
1707 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1708 else if (checksum > bits_in_uv)
1709 cdouble += (NV)aushort;
1710 else
1711 cuv += aushort;
49704364
LW
1712 }
1713 break;
1714#else
1715 /* Fallhrough! */
1716#endif
a6ec74c1
JH
1717 case 'v':
1718 case 'n':
1719 case 'S':
73cb7263 1720 while (len-- > 0) {
08ca2aa3
TH
1721 U16 au16;
1722#if U16SIZE > SIZE16
1723 au16 = 0;
1724#endif
f337b084 1725 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1726 DO_BO_UNPACK(au16, 16);
a6ec74c1 1727#ifdef HAS_NTOHS
73cb7263
NC
1728 if (datumtype == 'n')
1729 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1730#endif
1731#ifdef HAS_VTOHS
73cb7263
NC
1732 if (datumtype == 'v')
1733 au16 = vtohs(au16);
a6ec74c1 1734#endif
08ca2aa3
TH
1735 if (!checksum)
1736 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1737 else if (checksum > bits_in_uv)
f337b084 1738 cdouble += (NV) au16;
73cb7263
NC
1739 else
1740 cuv += au16;
a6ec74c1
JH
1741 }
1742 break;
7212898e 1743#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1744 case 'v' | TYPE_IS_SHRIEKING:
1745 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1746 while (len-- > 0) {
08ca2aa3
TH
1747 I16 ai16;
1748# if U16SIZE > SIZE16
1749 ai16 = 0;
1750# endif
f337b084 1751 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1752# ifdef HAS_NTOHS
73cb7263 1753 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1754 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1755# endif /* HAS_NTOHS */
1756# ifdef HAS_VTOHS
73cb7263 1757 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1758 ai16 = (I16) vtohs((U16) ai16);
1759# endif /* HAS_VTOHS */
1760 if (!checksum)
ac7f3b1b 1761 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1762 else if (checksum > bits_in_uv)
08ca2aa3 1763 cdouble += (NV) ai16;
73cb7263
NC
1764 else
1765 cuv += ai16;
068bd2e7
MHM
1766 }
1767 break;
08ca2aa3 1768#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1769 case 'i':
49704364 1770 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1771 while (len-- > 0) {
08ca2aa3 1772 int aint;
f337b084
TH
1773 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1774 DO_BO_UNPACK(aint, i);
08ca2aa3 1775 if (!checksum)
ac7f3b1b 1776 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1777 else if (checksum > bits_in_uv)
1778 cdouble += (NV)aint;
1779 else
1780 cuv += aint;
a6ec74c1
JH
1781 }
1782 break;
1783 case 'I':
49704364 1784 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1785 while (len-- > 0) {
08ca2aa3 1786 unsigned int auint;
f337b084
TH
1787 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1788 DO_BO_UNPACK(auint, i);
08ca2aa3 1789 if (!checksum)
ac7f3b1b 1790 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1791 else if (checksum > bits_in_uv)
1792 cdouble += (NV)auint;
1793 else
1794 cuv += auint;
a6ec74c1
JH
1795 }
1796 break;
92d41999 1797 case 'j':
73cb7263 1798 while (len-- > 0) {
08ca2aa3 1799 IV aiv;
f337b084 1800 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1801#if IVSIZE == INTSIZE
f337b084 1802 DO_BO_UNPACK(aiv, i);
1109a392 1803#elif IVSIZE == LONGSIZE
f337b084 1804 DO_BO_UNPACK(aiv, l);
1109a392 1805#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1806 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1807#else
1808 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1809#endif
08ca2aa3 1810 if (!checksum)
ac7f3b1b 1811 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)aiv;
1814 else
1815 cuv += aiv;
92d41999
JH
1816 }
1817 break;
1818 case 'J':
73cb7263 1819 while (len-- > 0) {
08ca2aa3 1820 UV auv;
f337b084 1821 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1822#if IVSIZE == INTSIZE
f337b084 1823 DO_BO_UNPACK(auv, i);
08ca2aa3 1824#elif IVSIZE == LONGSIZE
f337b084 1825 DO_BO_UNPACK(auv, l);
08ca2aa3 1826#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1827 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1828#else
1829 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1830#endif
08ca2aa3 1831 if (!checksum)
ac7f3b1b 1832 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1833 else if (checksum > bits_in_uv)
1834 cdouble += (NV)auv;
1835 else
1836 cuv += auv;
92d41999
JH
1837 }
1838 break;
49704364
LW
1839 case 'l' | TYPE_IS_SHRIEKING:
1840#if LONGSIZE != SIZE32
73cb7263 1841 while (len-- > 0) {
08ca2aa3 1842 long along;
f337b084
TH
1843 SHIFT_VAR(utf8, s, strend, along, datumtype);
1844 DO_BO_UNPACK(along, l);
08ca2aa3 1845 if (!checksum)
ac7f3b1b 1846 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1847 else if (checksum > bits_in_uv)
1848 cdouble += (NV)along;
1849 else
1850 cuv += along;
49704364
LW
1851 }
1852 break;
1853#else
1854 /* Fallthrough! */
a6ec74c1 1855#endif
49704364 1856 case 'l':
73cb7263 1857 while (len-- > 0) {
08ca2aa3
TH
1858 I32 ai32;
1859#if U32SIZE > SIZE32
1860 ai32 = 0;
1861#endif
f337b084 1862 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1863 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1864#if U32SIZE > SIZE32
08ca2aa3 1865 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1866#endif
08ca2aa3 1867 if (!checksum)
ac7f3b1b 1868 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1869 else if (checksum > bits_in_uv)
1870 cdouble += (NV)ai32;
1871 else
1872 cuv += ai32;
a6ec74c1
JH
1873 }
1874 break;
49704364
LW
1875 case 'L' | TYPE_IS_SHRIEKING:
1876#if LONGSIZE != SIZE32
73cb7263 1877 while (len-- > 0) {
08ca2aa3 1878 unsigned long aulong;
f337b084
TH
1879 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1880 DO_BO_UNPACK(aulong, l);
08ca2aa3 1881 if (!checksum)
ac7f3b1b 1882 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1883 else if (checksum > bits_in_uv)
1884 cdouble += (NV)aulong;
1885 else
1886 cuv += aulong;
49704364
LW
1887 }
1888 break;
1889#else
1890 /* Fall through! */
1891#endif
a6ec74c1
JH
1892 case 'V':
1893 case 'N':
1894 case 'L':
73cb7263 1895 while (len-- > 0) {
08ca2aa3
TH
1896 U32 au32;
1897#if U32SIZE > SIZE32
1898 au32 = 0;
1899#endif
f337b084 1900 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1901 DO_BO_UNPACK(au32, 32);
a6ec74c1 1902#ifdef HAS_NTOHL
73cb7263
NC
1903 if (datumtype == 'N')
1904 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1905#endif
1906#ifdef HAS_VTOHL
73cb7263
NC
1907 if (datumtype == 'V')
1908 au32 = vtohl(au32);
a6ec74c1 1909#endif
08ca2aa3 1910 if (!checksum)
fc241834
RGS
1911 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1912 else if (checksum > bits_in_uv)
1913 cdouble += (NV)au32;
1914 else
1915 cuv += au32;
a6ec74c1
JH
1916 }
1917 break;
7212898e 1918#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1919 case 'V' | TYPE_IS_SHRIEKING:
1920 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1921 while (len-- > 0) {
08ca2aa3
TH
1922 I32 ai32;
1923# if U32SIZE > SIZE32
1924 ai32 = 0;
1925# endif
f337b084 1926 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1927# ifdef HAS_NTOHL
73cb7263
NC
1928 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1929 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1930# endif
1931# ifdef HAS_VTOHL
73cb7263
NC
1932 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1933 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1934# endif
1935 if (!checksum)
ac7f3b1b 1936 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1937 else if (checksum > bits_in_uv)
1938 cdouble += (NV)ai32;
1939 else
1940 cuv += ai32;
068bd2e7
MHM
1941 }
1942 break;
08ca2aa3 1943#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1944 case 'p':
a6ec74c1 1945 while (len-- > 0) {
08ca2aa3 1946 char *aptr;
f337b084 1947 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1948 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1949 /* newSVpv generates undef if aptr is NULL */
1950 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1951 }
1952 break;
1953 case 'w':
a6ec74c1
JH
1954 {
1955 UV auv = 0;
1956 U32 bytes = 0;
fc241834 1957
08ca2aa3
TH
1958 while (len > 0 && s < strend) {
1959 U8 ch;
f337b084 1960 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1961 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1962 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1963 if (ch < 0x80) {
a6ec74c1 1964 bytes = 0;
ac7f3b1b 1965 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1966 len--;
1967 auv = 0;
08ca2aa3 1968 continue;
a6ec74c1 1969 }
08ca2aa3 1970 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1971 char *t;
1972 STRLEN n_a;
1973
1974 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1975 while (s < strend) {
f337b084 1976 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1977 sv = mul128(sv, (U8)(ch & 0x7f));
1978 if (!(ch & 0x80)) {
a6ec74c1
JH
1979 bytes = 0;
1980 break;
1981 }
1982 }
1983 t = SvPV(sv, n_a);
1984 while (*t == '0')
1985 t++;
1986 sv_chop(sv, t);
1987 PUSHs(sv_2mortal(sv));
1988 len--;
1989 auv = 0;
1990 }
1991 }
1992 if ((s >= strend) && bytes)
49704364 1993 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1994 }
1995 break;
1996 case 'P':
49704364
LW
1997 if (symptr->howlen == e_star)
1998 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1999 EXTEND(SP, 1);
08ca2aa3
TH
2000 if (sizeof(char*) <= strend - s) {
2001 char *aptr;
f337b084 2002 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2003 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2004 /* newSVpvn generates undef if aptr is NULL */
2005 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2006 }
a6ec74c1
JH
2007 break;
2008#ifdef HAS_QUAD
2009 case 'q':
73cb7263 2010 while (len-- > 0) {
08ca2aa3 2011 Quad_t aquad;
f337b084
TH
2012 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2013 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2014 if (!checksum)
2015 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2016 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2017 else if (checksum > bits_in_uv)
2018 cdouble += (NV)aquad;
2019 else
2020 cuv += aquad;
2021 }
a6ec74c1
JH
2022 break;
2023 case 'Q':
73cb7263 2024 while (len-- > 0) {
08ca2aa3 2025 Uquad_t auquad;
f337b084
TH
2026 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2027 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2028 if (!checksum)
2029 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2030 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2031 else if (checksum > bits_in_uv)
2032 cdouble += (NV)auquad;
2033 else
2034 cuv += auquad;
a6ec74c1
JH
2035 }
2036 break;
08ca2aa3 2037#endif /* HAS_QUAD */
a6ec74c1
JH
2038 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2039 case 'f':
73cb7263 2040 while (len-- > 0) {
08ca2aa3 2041 float afloat;
f337b084 2042 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2043 DO_BO_UNPACK_N(afloat, float);
2044 if (!checksum)
2045 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2046 else
73cb7263 2047 cdouble += afloat;
fc241834 2048 }
a6ec74c1
JH
2049 break;
2050 case 'd':
73cb7263 2051 while (len-- > 0) {
08ca2aa3 2052 double adouble;
f337b084 2053 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2054 DO_BO_UNPACK_N(adouble, double);
2055 if (!checksum)
2056 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2057 else
73cb7263 2058 cdouble += adouble;
fc241834 2059 }
a6ec74c1 2060 break;
92d41999 2061 case 'F':
73cb7263 2062 while (len-- > 0) {
08ca2aa3 2063 NV anv;
f337b084 2064 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2065 DO_BO_UNPACK_N(anv, NV);
2066 if (!checksum)
2067 PUSHs(sv_2mortal(newSVnv(anv)));
2068 else
73cb7263 2069 cdouble += anv;
fc241834 2070 }
92d41999
JH
2071 break;
2072#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2073 case 'D':
73cb7263 2074 while (len-- > 0) {
08ca2aa3 2075 long double aldouble;
f337b084 2076 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2077 DO_BO_UNPACK_N(aldouble, long double);
2078 if (!checksum)
2079 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2080 else
2081 cdouble += aldouble;
92d41999
JH
2082 }
2083 break;
2084#endif
a6ec74c1
JH
2085 case 'u':
2086 /* MKS:
2087 * Initialise the decode mapping. By using a table driven
2088 * algorithm, the code will be character-set independent
2089 * (and just as fast as doing character arithmetic)
2090 */
2091 if (PL_uudmap['M'] == 0) {
2092 int i;
2093
2094 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2095 PL_uudmap[(U8)PL_uuemap[i]] = i;
2096 /*
2097 * Because ' ' and '`' map to the same value,
2098 * we need to decode them both the same.
2099 */
2100 PL_uudmap[' '] = 0;
2101 }
08ca2aa3
TH
2102 {
2103 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2104 sv = sv_2mortal(NEWSV(42, l));
2105 if (l) SvPOK_on(sv);
2106 }
2107 if (utf8) {
2108 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2109 I32 a, b, c, d;
2110 char hunk[4];
2111
2112 hunk[3] = '\0';
2113 while (len > 0) {
2114 next_uni_uu(aTHX_ &s, strend, &a);
2115 next_uni_uu(aTHX_ &s, strend, &b);
2116 next_uni_uu(aTHX_ &s, strend, &c);
2117 next_uni_uu(aTHX_ &s, strend, &d);
2118 hunk[0] = (char)((a << 2) | (b >> 4));
2119 hunk[1] = (char)((b << 4) | (c >> 2));
2120 hunk[2] = (char)((c << 6) | d);
2121 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2122 len -= 3;
2123 }
2124 if (s < strend) {
2125 if (*s == '\n') s++;
2126 else {
2127 /* possible checksum byte */
2128 char *skip = s+UTF8SKIP(s);
2129 if (skip < strend && *skip == '\n') s = skip+1;
2130 }
2131 }
2132 }
2133 } else {
fc241834
RGS
2134 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2135 I32 a, b, c, d;
2136 char hunk[4];
a6ec74c1 2137
fc241834
RGS
2138 hunk[3] = '\0';
2139 len = PL_uudmap[*(U8*)s++] & 077;
2140 while (len > 0) {
2141 if (s < strend && ISUUCHAR(*s))
2142 a = PL_uudmap[*(U8*)s++] & 077;
2143 else
2144 a = 0;
2145 if (s < strend && ISUUCHAR(*s))
2146 b = PL_uudmap[*(U8*)s++] & 077;
2147 else
2148 b = 0;
2149 if (s < strend && ISUUCHAR(*s))
2150 c = PL_uudmap[*(U8*)s++] & 077;
2151 else
2152 c = 0;
2153 if (s < strend && ISUUCHAR(*s))
2154 d = PL_uudmap[*(U8*)s++] & 077;
2155 else
2156 d = 0;
2157 hunk[0] = (char)((a << 2) | (b >> 4));
2158 hunk[1] = (char)((b << 4) | (c >> 2));
2159 hunk[2] = (char)((c << 6) | d);
2160 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2161 len -= 3;
2162 }
2163 if (*s == '\n')
2164 s++;
2165 else /* possible checksum byte */
2166 if (s + 1 < strend && s[1] == '\n')
2167 s += 2;
a6ec74c1 2168 }
08ca2aa3
TH
2169 }
2170 XPUSHs(sv);
a6ec74c1
JH
2171 break;
2172 }
49704364 2173
a6ec74c1 2174 if (checksum) {
1109a392 2175 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2176 (checksum > bits_in_uv &&
08ca2aa3
TH
2177 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2178 NV trouble, anv;
a6ec74c1 2179
08ca2aa3 2180 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2181 while (checksum >= 16) {
2182 checksum -= 16;
08ca2aa3 2183 anv *= 65536.0;
a6ec74c1 2184 }
a6ec74c1 2185 while (cdouble < 0.0)
08ca2aa3
TH
2186 cdouble += anv;
2187 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2188 sv = newSVnv(cdouble);
a6ec74c1
JH
2189 }
2190 else {
fa8ec7c1
NC
2191 if (checksum < bits_in_uv) {
2192 UV mask = ((UV)1 << checksum) - 1;
92d41999 2193 cuv &= mask;
a6ec74c1 2194 }
c4c5f44a 2195 sv = newSVuv(cuv);
a6ec74c1
JH
2196 }
2197 XPUSHs(sv_2mortal(sv));
2198 checksum = 0;
2199 }
fc241834 2200
49704364
LW
2201 if (symptr->flags & FLAG_SLASH){
2202 if (SP - PL_stack_base - start_sp_offset <= 0)
2203 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2204 if( next_symbol(symptr) ){
2205 if( symptr->howlen == e_number )
2206 Perl_croak(aTHX_ "Count after length/code in unpack" );
2207 if( beyond ){
2208 /* ...end of char buffer then no decent length available */
2209 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2210 } else {
2211 /* take top of stack (hope it's numeric) */
2212 len = POPi;
2213 if( len < 0 )
2214 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2215 }
2216 } else {
2217 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2218 }
2219 datumtype = symptr->code;
21c16052 2220 explicit_length = FALSE;
49704364
LW
2221 goto redo_switch;
2222 }
a6ec74c1 2223 }
49704364 2224
18529408
IZ
2225 if (new_s)
2226 *new_s = s;
2227 PUTBACK;
2228 return SP - PL_stack_base - start_sp_offset;
2229}
2230
2231PP(pp_unpack)
2232{
2233 dSP;
bab9c0ac 2234 dPOPPOPssrl;
18529408
IZ
2235 I32 gimme = GIMME_V;
2236 STRLEN llen;
2237 STRLEN rlen;
08ca2aa3
TH
2238 char *pat = SvPV(left, llen);
2239 char *s = SvPV(right, rlen);
18529408 2240 char *strend = s + rlen;
08ca2aa3
TH
2241 char *patend = pat + llen;
2242 I32 cnt;
18529408
IZ
2243
2244 PUTBACK;
7accc089 2245 cnt = unpackstring(pat, patend, s, strend,
49704364 2246 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2247 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2248
18529408
IZ
2249 SPAGAIN;
2250 if ( !cnt && gimme == G_SCALAR )
2251 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2252 RETURN;
2253}
2254
f337b084
TH
2255STATIC U8 *
2256doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2257{
f337b084 2258 *h++ = PL_uuemap[len];
a6ec74c1 2259 while (len > 2) {
f337b084
TH
2260 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2261 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2262 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2263 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2264 s += 3;
2265 len -= 3;
2266 }
2267 if (len > 0) {
2268 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2269 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2270 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2271 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2272 *h++ = PL_uuemap[0];
a6ec74c1 2273 }
f337b084
TH
2274 *h++ = '\n';
2275 return h;
a6ec74c1
JH
2276}
2277
2278STATIC SV *
2279S_is_an_int(pTHX_ char *s, STRLEN l)
2280{
2281 STRLEN n_a;
2282 SV *result = newSVpvn(s, l);
2283 char *result_c = SvPV(result, n_a); /* convenience */
2284 char *out = result_c;
2285 bool skip = 1;
2286 bool ignore = 0;
2287
2288 while (*s) {
2289 switch (*s) {
2290 case ' ':
2291 break;
2292 case '+':
2293 if (!skip) {
2294 SvREFCNT_dec(result);
2295 return (NULL);
2296 }
2297 break;
2298 case '0':
2299 case '1':
2300 case '2':
2301 case '3':
2302 case '4':
2303 case '5':
2304 case '6':
2305 case '7':
2306 case '8':
2307 case '9':
2308 skip = 0;
2309 if (!ignore) {
2310 *(out++) = *s;
2311 }
2312 break;
2313 case '.':
2314 ignore = 1;
2315 break;
2316 default:
2317 SvREFCNT_dec(result);
2318 return (NULL);
2319 }
2320 s++;
2321 }
2322 *(out++) = '\0';
2323 SvCUR_set(result, out - result_c);
2324 return (result);
2325}
2326
2327/* pnum must be '\0' terminated */
2328STATIC int
2329S_div128(pTHX_ SV *pnum, bool *done)
2330{
2331 STRLEN len;
2332 char *s = SvPV(pnum, len);
2333 int m = 0;
2334 int r = 0;
2335 char *t = s;
2336
2337 *done = 1;
2338 while (*t) {
2339 int i;
2340
2341 i = m * 10 + (*t - '0');
2342 m = i & 0x7F;
2343 r = (i >> 7); /* r < 10 */
2344 if (r) {
2345 *done = 0;
2346 }
2347 *(t++) = '0' + r;
2348 }
2349 *(t++) = '\0';
2350 SvCUR_set(pnum, (STRLEN) (t - s));
2351 return (m);
2352}
2353
aadb217d
JH
2354#define TEMPSYM_INIT(symptr, p, e) \
2355 STMT_START { \
2356 (symptr)->patptr = p; \
2357 (symptr)->patend = e; \
2358 (symptr)->grpbeg = NULL; \
2359 (symptr)->grpend = NULL; \
2360 (symptr)->grpend = NULL; \
2361 (symptr)->code = 0; \
2362 (symptr)->length = 0; \
2363 (symptr)->howlen = 0; \
2364 (symptr)->level = 0; \
2365 (symptr)->flags = FLAG_PACK; \
2366 (symptr)->strbeg = 0; \
2367 (symptr)->previous = NULL; \
2368 } STMT_END
a6ec74c1 2369
18529408
IZ
2370/*
2371=for apidoc pack_cat
2372
7accc089
JH
2373The engine implementing pack() Perl function. Note: parameters next_in_list and
2374flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2375
2376=cut */
2377
49704364 2378
18529408
IZ
2379void
2380Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2381{
aadb217d 2382 tempsym_t sym;
9e27e96a
AL
2383 (void)next_in_list;
2384 (void)flags;
7accc089 2385
aadb217d
JH
2386 TEMPSYM_INIT(&sym, pat, patend);
2387
7accc089
JH
2388 (void)pack_rec( cat, &sym, beglist, endlist );
2389}
2390
2391
2392/*
2393=for apidoc packlist
2394
2395The engine implementing pack() Perl function.
2396
2397=cut */
2398
2399
2400void
2401Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2402{
f337b084 2403 STRLEN no_len;
aadb217d
JH
2404 tempsym_t sym;
2405
2406 TEMPSYM_INIT(&sym, pat, patend);
49704364 2407
f337b084
TH
2408 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2409 Also make sure any UTF8 flag is loaded */
2410 SvPV_force(cat, no_len);
2411 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2412
49704364
LW
2413 (void)pack_rec( cat, &sym, beglist, endlist );
2414}
2415
f337b084
TH
2416/* like sv_utf8_upgrade, but also repoint the group start markers */
2417STATIC void
2418marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2419 STRLEN len;
2420 tempsym_t *group;
2421 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2422
2423 if (SvUTF8(sv)) return;
2424
2425 from_start = SvPVX(sv);
2426 from_end = from_start + SvCUR(sv);
2427 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2428 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2429 if (from_ptr == from_end) {
2430 /* Simple case: no character needs to be changed */
2431 SvUTF8_on(sv);
2432 return;
2433 }
2434
3473cf63 2435 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2436 New('U', to_start, len, char);
2437 Copy(from_start, to_start, from_ptr-from_start, char);
2438 to_ptr = to_start + (from_ptr-from_start);
2439
2440 New('U', marks, sym_ptr->level+2, char *);
2441 for (group=sym_ptr; group; group = group->previous)
2442 marks[group->level] = from_start + group->strbeg;
2443 marks[sym_ptr->level+1] = from_end+1;
2444 for (m = marks; *m < from_ptr; m++)
2445 *m = to_start + (*m-from_start);
2446
2447 for (;from_ptr < from_end; from_ptr++) {
2448 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2449 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2450 }
2451 *to_ptr = 0;
2452
2453 while (*m == from_ptr) *m++ = to_ptr;
2454 if (m != marks + sym_ptr->level+1) {
2455 Safefree(marks);
2456 Safefree(to_start);
2457 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2458 }
2459 for (group=sym_ptr; group; group = group->previous)
2460 group->strbeg = marks[group->level] - to_start;
2461 Safefree(marks);
2462
2463 if (SvOOK(sv)) {
2464 if (SvIVX(sv)) {
b162af07 2465 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2466 from_start -= SvIVX(sv);
2467 SvIV_set(sv, 0);
2468 }
2469 SvFLAGS(sv) &= ~SVf_OOK;
2470 }
2471 if (SvLEN(sv) != 0)
2472 Safefree(from_start);
f880fe2f 2473 SvPV_set(sv, to_start);
b162af07
SP
2474 SvCUR_set(sv, to_ptr - to_start);
2475 SvLEN_set(sv, len);
f337b084
TH
2476 SvUTF8_on(sv);
2477}
2478
2479/* Exponential string grower. Makes string extension effectively O(n)
2480 needed says how many extra bytes we need (not counting the final '\0')
2481 Only grows the string if there is an actual lack of space
2482*/
2483STATIC char *
2484sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2485 STRLEN cur = SvCUR(sv);
2486 STRLEN len = SvLEN(sv);
2487 STRLEN extend;
2488 if (len - cur > needed) return SvPVX(sv);
2489 extend = needed > len ? needed : len;
2490 return SvGROW(sv, len+extend+1);
2491}
49704364
LW
2492
2493STATIC
2494SV **
f337b084 2495S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2496{
49704364 2497 tempsym_t lookahead;
f337b084
TH
2498 I32 items = endlist - beglist;
2499 bool found = next_symbol(symptr);
2500 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2501
2502 if (symptr->level == 0 && found && symptr->code == 'U') {
2503 marked_upgrade(aTHX_ cat, symptr);
2504 symptr->flags |= FLAG_DO_UTF8;
2505 utf8 = 0;
49704364 2506 }
f337b084 2507 symptr->strbeg = SvCUR(cat);
49704364
LW
2508
2509 while (found) {
f337b084
TH
2510 SV *fromstr;
2511 STRLEN fromlen;
2512 I32 len;
a6ec74c1 2513 SV *lengthcode = Nullsv;
49704364 2514 I32 datumtype = symptr->code;
f337b084
TH
2515 howlen_t howlen = symptr->howlen;
2516 char *start = SvPVX(cat);
2517 char *cur = start + SvCUR(cat);
49704364 2518
f337b084
TH
2519#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2520
2521 switch (howlen) {
fc241834 2522 case e_star:
f337b084
TH
2523 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2524 0 : items;
2525 break;
2526 default:
2527 /* e_no_len and e_number */
2528 len = symptr->length;
49704364
LW
2529 break;
2530 }
2531
f337b084 2532 if (len) {
a7a3cfaa 2533 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2534
a7a3cfaa
TH
2535 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2536 /* We can process this letter. */
2537 STRLEN size = props & PACK_SIZE_MASK;
2538 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2539 }
f337b084
TH
2540 }
2541
49704364
LW
2542 /* Look ahead for next symbol. Do we have code/code? */
2543 lookahead = *symptr;
2544 found = next_symbol(&lookahead);
246f24af
TH
2545 if (symptr->flags & FLAG_SLASH) {
2546 IV count;
f337b084 2547 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2548 if (strchr("aAZ", lookahead.code)) {
2549 if (lookahead.howlen == e_number) count = lookahead.length;
2550 else {
2551 if (items > 0)
2552 count = DO_UTF8(*beglist) ?
2553 sv_len_utf8(*beglist) : sv_len(*beglist);
2554 else count = 0;
2555 if (lookahead.code == 'Z') count++;
2556 }
2557 } else {
2558 if (lookahead.howlen == e_number && lookahead.length < items)
2559 count = lookahead.length;
2560 else count = items;
2561 }
2562 lookahead.howlen = e_number;
2563 lookahead.length = count;
2564 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2565 }
49704364 2566
fc241834
RGS
2567 /* Code inside the switch must take care to properly update
2568 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2569 doesn't simply leave using break */
1109a392 2570 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2571 default:
f337b084
TH
2572 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2573 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2574 case '%':
49704364 2575 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2576 {
2577 char *from;
2578#ifdef PERL_PACK_CAN_SHRIEKSIGN
2579 case '.' | TYPE_IS_SHRIEKING:
2580#endif
2581 case '.':
2582 if (howlen == e_star) from = start;
2583 else if (len == 0) from = cur;
2584 else {
2585 tempsym_t *group = symptr;
2586
2587 while (--len && group) group = group->previous;
2588 from = group ? start + group->strbeg : start;
2589 }
2590 fromstr = NEXTFROM;
2591 len = SvIV(fromstr);
2592 goto resize;
2593#ifdef PERL_PACK_CAN_SHRIEKSIGN
2594 case '@' | TYPE_IS_SHRIEKING:
2595#endif
a6ec74c1 2596 case '@':
28be1210
TH
2597 from = start + symptr->strbeg;
2598 resize:
2599#ifdef PERL_PACK_CAN_SHRIEKSIGN
2600 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2601#else /* PERL_PACK_CAN_SHRIEKSIGN */
2602 if (utf8)
2603#endif
2604 if (len >= 0) {
2605 while (len && from < cur) {
2606 from += UTF8SKIP(from);
2607 len--;
2608 }
2609 if (from > cur)
2610 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2611 if (len) {
2612 /* Here we know from == cur */
2613 grow:
2614 GROWING(0, cat, start, cur, len);
2615 Zero(cur, len, char);
2616 cur += len;
2617 } else if (from < cur) {
2618 len = cur - from;
2619 goto shrink;
2620 } else goto no_change;
2621 } else {
2622 cur = from;
2623 len = -len;
2624 goto utf8_shrink;
f337b084 2625 }
28be1210
TH
2626 else {
2627 len -= cur - from;
f337b084 2628 if (len > 0) goto grow;
28be1210 2629 if (len == 0) goto no_change;
fc241834 2630 len = -len;
28be1210 2631 goto shrink;
f337b084 2632 }
a6ec74c1 2633 break;
28be1210 2634 }
fc241834 2635 case '(': {
49704364 2636 tempsym_t savsym = *symptr;
66c611c5
MHM
2637 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2638 symptr->flags |= group_modifiers;
49704364
LW
2639 symptr->patend = savsym.grpend;
2640 symptr->level++;
f337b084 2641 symptr->previous = &lookahead;
18529408 2642 while (len--) {
f337b084
TH
2643 U32 was_utf8;
2644 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2645 else symptr->flags &= ~FLAG_PARSE_UTF8;
2646 was_utf8 = SvUTF8(cat);
49704364 2647 symptr->patptr = savsym.grpbeg;
f337b084
TH
2648 beglist = pack_rec(cat, symptr, beglist, endlist);
2649 if (SvUTF8(cat) != was_utf8)
2650 /* This had better be an upgrade while in utf8==0 mode */
2651 utf8 = 1;
2652
49704364 2653 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2654 break; /* No way to continue */
2655 }
f337b084
TH
2656 lookahead.flags = symptr->flags & ~group_modifiers;
2657 goto no_change;
18529408 2658 }
62f95557
IZ
2659 case 'X' | TYPE_IS_SHRIEKING:
2660 if (!len) /* Avoid division by 0 */
2661 len = 1;
f337b084
TH
2662 if (utf8) {
2663 char *hop, *last;
2664 I32 l = len;
2665 hop = last = start;
2666 while (hop < cur) {
2667 hop += UTF8SKIP(hop);
2668 if (--l == 0) {
2669 last = hop;
2670 l = len;
2671 }
2672 }
2673 if (last > cur)
2674 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2675 cur = last;
2676 break;
2677 }
2678 len = (cur-start) % len;
62f95557 2679 /* FALL THROUGH */
a6ec74c1 2680 case 'X':
f337b084
TH
2681 if (utf8) {
2682 if (len < 1) goto no_change;
28be1210 2683 utf8_shrink:
f337b084
TH
2684 while (len > 0) {
2685 if (cur <= start)
28be1210
TH
2686 Perl_croak(aTHX_ "'%c' outside of string in pack",
2687 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2688 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2689 if (cur <= start)
28be1210
TH
2690 Perl_croak(aTHX_ "'%c' outside of string in pack",
2691 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2692 }
2693 len--;
2694 }
2695 } else {
fc241834 2696 shrink:
f337b084 2697 if (cur - start < len)
28be1210
TH
2698 Perl_croak(aTHX_ "'%c' outside of string in pack",
2699 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2700 cur -= len;
2701 }
2702 if (cur < start+symptr->strbeg) {
2703 /* Make sure group starts don't point into the void */
2704 tempsym_t *group;
9e27e96a 2705 const STRLEN length = cur-start;
f337b084
TH
2706 for (group = symptr;
2707 group && length < group->strbeg;
2708 group = group->previous) group->strbeg = length;
2709 lookahead.strbeg = length;
2710 }
a6ec74c1 2711 break;
fc241834
RGS
2712 case 'x' | TYPE_IS_SHRIEKING: {
2713 I32 ai32;
62f95557
IZ
2714 if (!len) /* Avoid division by 0 */
2715 len = 1;
230e1fce 2716 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2717 else ai32 = (cur - start) % len;
2718 if (ai32 == 0) goto no_change;
2719 len -= ai32;
2720 }
2721 /* FALL THROUGH */
a6ec74c1 2722 case 'x':
f337b084 2723 goto grow;
a6ec74c1
JH
2724 case 'A':
2725 case 'Z':
f337b084
TH
2726 case 'a': {
2727 char *aptr;
2728
a6ec74c1
JH
2729 fromstr = NEXTFROM;
2730 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2731 if (DO_UTF8(fromstr)) {
2732 char *end, *s;
2733
2734 if (!utf8 && !SvUTF8(cat)) {
2735 marked_upgrade(aTHX_ cat, symptr);
2736 lookahead.flags |= FLAG_DO_UTF8;
2737 lookahead.strbeg = symptr->strbeg;
2738 utf8 = 1;
2739 start = SvPVX(cat);
2740 cur = start + SvCUR(cat);
2741 }
fc241834 2742 if (howlen == e_star) {
f337b084
TH
2743 if (utf8) goto string_copy;
2744 len = fromlen+1;
2745 }
2746 s = aptr;
2747 end = aptr + fromlen;
2748 fromlen = datumtype == 'Z' ? len-1 : len;
2749 while ((I32) fromlen > 0 && s < end) {
2750 s += UTF8SKIP(s);
2751 fromlen--;
2752 }
2753 if (s > end)
2754 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2755 if (utf8) {
fc241834 2756 len = fromlen;
f337b084
TH
2757 if (datumtype == 'Z') len++;
2758 fromlen = s-aptr;
2759 len += fromlen;
fc241834 2760
f337b084 2761 goto string_copy;
fc241834 2762 }
f337b084
TH
2763 fromlen = len - fromlen;
2764 if (datumtype == 'Z') fromlen--;
2765 if (howlen == e_star) {
2766 len = fromlen;
2767 if (datumtype == 'Z') len++;
fc241834 2768 }
f337b084 2769 GROWING(0, cat, start, cur, len);
fc241834 2770 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2771 datumtype | TYPE_IS_PACK))
2772 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2773 cur += fromlen;
a6ec74c1 2774 len -= fromlen;
f337b084
TH
2775 } else if (utf8) {
2776 if (howlen == e_star) {
2777 len = fromlen;
2778 if (datumtype == 'Z') len++;
a6ec74c1 2779 }
f337b084
TH
2780 if (len <= (I32) fromlen) {
2781 fromlen = len;
2782 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2783 }
fc241834 2784 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2785 upgrade, so:
2786 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2787 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2788 len -= fromlen;
2789 while (fromlen > 0) {
230e1fce 2790 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2791 aptr++;
2792 fromlen--;
fc241834 2793 }
f337b084
TH
2794 } else {
2795 string_copy:
2796 if (howlen == e_star) {
2797 len = fromlen;
2798 if (datumtype == 'Z') len++;
2799 }
2800 if (len <= (I32) fromlen) {
2801 fromlen = len;
2802 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2803 }
f337b084
TH
2804 GROWING(0, cat, start, cur, len);
2805 Copy(aptr, cur, fromlen, char);
2806 cur += fromlen;
2807 len -= fromlen;
a6ec74c1 2808 }
f337b084
TH
2809 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2810 cur += len;
a6ec74c1 2811 break;
f337b084 2812 }
a6ec74c1 2813 case 'B':
f337b084
TH
2814 case 'b': {
2815 char *str, *end;
2816 I32 l, field_len;
2817 U8 bits;
2818 bool utf8_source;
2819 U32 utf8_flags;
a6ec74c1 2820
fc241834
RGS
2821 fromstr = NEXTFROM;
2822 str = SvPV(fromstr, fromlen);
f337b084
TH
2823 end = str + fromlen;
2824 if (DO_UTF8(fromstr)) {
2825 utf8_source = TRUE;
2826 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2827 } else {
2828 utf8_source = FALSE;
2829 utf8_flags = 0; /* Unused, but keep compilers happy */
2830 }
2831 if (howlen == e_star) len = fromlen;
2832 field_len = (len+7)/8;
2833 GROWING(utf8, cat, start, cur, field_len);
2834 if (len > (I32)fromlen) len = fromlen;
2835 bits = 0;
2836 l = 0;
2837 if (datumtype == 'B')
2838 while (l++ < len) {
2839 if (utf8_source) {
2840 UV val;
2841 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2842 bits |= val & 1;
2843 } else bits |= *str++ & 1;
2844 if (l & 7) bits <<= 1;
fc241834 2845 else {
f337b084
TH
2846 PUSH_BYTE(utf8, cur, bits);
2847 bits = 0;
a6ec74c1
JH
2848 }
2849 }
f337b084
TH
2850 else
2851 /* datumtype == 'b' */
2852 while (l++ < len) {
2853 if (utf8_source) {
2854 UV val;
2855 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2856 if (val & 1) bits |= 0x80;
2857 } else if (*str++ & 1)
2858 bits |= 0x80;
2859 if (l & 7) bits >>= 1;
fc241834 2860 else {
f337b084
TH
2861 PUSH_BYTE(utf8, cur, bits);
2862 bits = 0;
a6ec74c1
JH
2863 }
2864 }
f337b084
TH
2865 l--;
2866 if (l & 7) {
fc241834 2867 if (datumtype == 'B')
f337b084 2868 bits <<= 7 - (l & 7);
fc241834 2869 else
f337b084
TH
2870 bits >>= 7 - (l & 7);
2871 PUSH_BYTE(utf8, cur, bits);
2872 l += 7;
a6ec74c1 2873 }
f337b084
TH
2874 /* Determine how many chars are left in the requested field */
2875 l /= 8;
2876 if (howlen == e_star) field_len = 0;
2877 else field_len -= l;
2878 Zero(cur, field_len, char);
2879 cur += field_len;
a6ec74c1 2880 break;
f337b084 2881 }
a6ec74c1 2882 case 'H':
f337b084
TH
2883 case 'h': {
2884 char *str, *end;
2885 I32 l, field_len;
2886 U8 bits;
2887 bool utf8_source;
2888 U32 utf8_flags;
a6ec74c1 2889
fc241834
RGS
2890 fromstr = NEXTFROM;
2891 str = SvPV(fromstr, fromlen);
f337b084
TH
2892 end = str + fromlen;
2893 if (DO_UTF8(fromstr)) {
2894 utf8_source = TRUE;
2895 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2896 } else {
2897 utf8_source = FALSE;
2898 utf8_flags = 0; /* Unused, but keep compilers happy */
2899 }
2900 if (howlen == e_star) len = fromlen;
2901 field_len = (len+1)/2;
2902 GROWING(utf8, cat, start, cur, field_len);
2903 if (!utf8 && len > (I32)fromlen) len = fromlen;
2904 bits = 0;
2905 l = 0;
2906 if (datumtype == 'H')
2907 while (l++ < len) {
2908 if (utf8_source) {
2909 UV val;
2910 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2911 if (val < 256 && isALPHA(val))
2912 bits |= (val + 9) & 0xf;
a6ec74c1 2913 else
f337b084
TH
2914 bits |= val & 0xf;
2915 } else if (isALPHA(*str))
2916 bits |= (*str++ + 9) & 0xf;
2917 else
2918 bits |= *str++ & 0xf;
2919 if (l & 1) bits <<= 4;
fc241834 2920 else {
f337b084
TH
2921 PUSH_BYTE(utf8, cur, bits);
2922 bits = 0;
a6ec74c1
JH
2923 }
2924 }
f337b084
TH
2925 else
2926 while (l++ < len) {
2927 if (utf8_source) {
2928 UV val;
2929 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2930 if (val < 256 && isALPHA(val))
2931 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2932 else
f337b084
TH
2933 bits |= (val & 0xf) << 4;
2934 } else if (isALPHA(*str))
2935 bits |= ((*str++ + 9) & 0xf) << 4;
2936 else
2937 bits |= (*str++ & 0xf) << 4;
2938 if (l & 1) bits >>= 4;
fc241834 2939 else {
f337b084
TH
2940 PUSH_BYTE(utf8, cur, bits);
2941 bits = 0;
a6ec74c1 2942 }
fc241834 2943 }
f337b084
TH
2944 l--;
2945 if (l & 1) {
2946 PUSH_BYTE(utf8, cur, bits);
2947 l++;
2948 }
2949 /* Determine how many chars are left in the requested field */
2950 l /= 2;
2951 if (howlen == e_star) field_len = 0;
2952 else field_len -= l;
2953 Zero(cur, field_len, char);
2954 cur += field_len;
2955 break;
fc241834
RGS
2956 }
2957 case 'c':
f337b084
TH
2958 while (len-- > 0) {
2959 IV aiv;
2960 fromstr = NEXTFROM;
2961 aiv = SvIV(fromstr);
2962 if ((-128 > aiv || aiv > 127) &&
2963 ckWARN(WARN_PACK))
2964 Perl_warner(aTHX_ packWARN(WARN_PACK),
2965 "Character in 'c' format wrapped in pack");
2966 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2967 }
2968 break;
2969 case 'C':
f337b084
TH
2970 if (len == 0) {
2971 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2972 break;
2973 }
2974 GROWING(0, cat, start, cur, len);
a6ec74c1 2975 while (len-- > 0) {
f337b084 2976 IV aiv;
a6ec74c1 2977 fromstr = NEXTFROM;
f337b084
TH
2978 aiv = SvIV(fromstr);
2979 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2980 ckWARN(WARN_PACK))
2981 Perl_warner(aTHX_ packWARN(WARN_PACK),
2982 "Character in 'C' format wrapped in pack");
f337b084
TH
2983 *cur++ = aiv & 0xff;
2984 }
fc241834
RGS
2985 break;
2986 case 'W': {
2987 char *end;
2988 U8 in_bytes = IN_BYTES;
2989
2990 end = start+SvLEN(cat)-1;
2991 if (utf8) end -= UTF8_MAXLEN-1;
2992 while (len-- > 0) {
2993 UV auv;
2994 fromstr = NEXTFROM;
2995 auv = SvUV(fromstr);
2996 if (in_bytes) auv = auv % 0x100;
2997 if (utf8) {
2998 W_utf8:
2999 if (cur > end) {
3000 *cur = '\0';
b162af07 3001 SvCUR_set(cat, cur - start);
fc241834
RGS
3002
3003 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3004 end = start+SvLEN(cat)-UTF8_MAXLEN;
3005 }
230e1fce
NC
3006 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3007 NATIVE_TO_UNI(auv),
3008 ckWARN(WARN_UTF8) ?
3009 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3010 } else {
3011 if (auv >= 0x100) {
3012 if (!SvUTF8(cat)) {
3013 *cur = '\0';
b162af07 3014 SvCUR_set(cat, cur - start);
fc241834
RGS
3015 marked_upgrade(aTHX_ cat, symptr);
3016 lookahead.flags |= FLAG_DO_UTF8;
3017 lookahead.strbeg = symptr->strbeg;
3018 utf8 = 1;
3019 start = SvPVX(cat);
3020 cur = start + SvCUR(cat);
3021 end = start+SvLEN(cat)-UTF8_MAXLEN;
3022 goto W_utf8;
3023 }
3024 if (ckWARN(WARN_PACK))
3025 Perl_warner(aTHX_ packWARN(WARN_PACK),
3026 "Character in 'W' format wrapped in pack");
3027 auv &= 0xff;
3028 }
3029 if (cur >= end) {
3030 *cur = '\0';
b162af07 3031 SvCUR_set(cat, cur - start);
fc241834
RGS
3032 GROWING(0, cat, start, cur, len+1);
3033 end = start+SvLEN(cat)-1;
3034 }
fe2774ed 3035 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3036 }
3037 }
3038 break;
fc241834
RGS
3039 }
3040 case 'U': {
3041 char *end;
3042
3043 if (len == 0) {
3044 if (!(symptr->flags & FLAG_DO_UTF8)) {
3045 marked_upgrade(aTHX_ cat, symptr);
3046 lookahead.flags |= FLAG_DO_UTF8;
3047 lookahead.strbeg = symptr->strbeg;
3048 }
3049 utf8 = 0;
3050 goto no_change;
3051 }
3052
3053 end = start+SvLEN(cat);
3054 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3055 while (len-- > 0) {
fc241834 3056 UV auv;
a6ec74c1 3057 fromstr = NEXTFROM;
fc241834
RGS
3058 auv = SvUV(fromstr);
3059 if (utf8) {
230e1fce 3060 U8 buffer[UTF8_MAXLEN], *endb;
fc241834
RGS
3061 endb = uvuni_to_utf8_flags(buffer, auv,
3062 ckWARN(WARN_UTF8) ?
3063 0 : UNICODE_ALLOW_ANY);
3064 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3065 *cur = '\0';
b162af07 3066 SvCUR_set(cat, cur - start);
fc241834
RGS
3067 GROWING(0, cat, start, cur,
3068 len+(endb-buffer)*UTF8_EXPAND);
3069 end = start+SvLEN(cat);
3070 }
3071 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3072 } else {
3073 if (cur >= end) {
3074 *cur = '\0';
b162af07 3075 SvCUR_set(cat, cur - start);
fc241834
RGS
3076 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3077 end = start+SvLEN(cat)-UTF8_MAXLEN;
3078 }
230e1fce
NC
3079 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3080 ckWARN(WARN_UTF8) ?
3081 0 : UNICODE_ALLOW_ANY);
fc241834 3082 }
a6ec74c1 3083 }
a6ec74c1 3084 break;
fc241834 3085 }
a6ec74c1
JH
3086 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3087 case 'f':
a6ec74c1 3088 while (len-- > 0) {
f337b084
TH
3089 float afloat;
3090 NV anv;
a6ec74c1 3091 fromstr = NEXTFROM;
f337b084 3092 anv = SvNV(fromstr);
5cdb9e01 3093#ifdef __VOS__
f337b084 3094 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3095 during conversion from double to float into infinity, so we
3096 do it by hand. This code should either be generalized for
3097 any OS that needs it, or removed if and when VOS implements
3098 posix-976 (suggestion to support mapping to infinity).
3099 Paul.Green@stratus.com 02-04-02. */
f337b084 3100 if (anv > FLT_MAX)
fc241834 3101 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3102 else if (anv < -FLT_MAX)
fc241834 3103 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3104 else afloat = (float) anv;
3105#else /* __VOS__ */
baf3cf9c 3106# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3107 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3108 * on Alpha; fake it if we don't have them.
3109 */
f337b084 3110 if (anv > FLT_MAX)
fc241834 3111 afloat = FLT_MAX;
f337b084 3112 else if (anv < -FLT_MAX)
fc241834 3113 afloat = -FLT_MAX;
f337b084 3114 else afloat = (float)anv;
baf3cf9c 3115# else
f337b084 3116 afloat = (float)anv;
baf3cf9c 3117# endif
f337b084 3118#endif /* __VOS__ */
1109a392 3119 DO_BO_PACK_N(afloat, float);
f337b084 3120 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3121 }
3122 break;
3123 case 'd':
a6ec74c1 3124 while (len-- > 0) {
f337b084
TH
3125 double adouble;
3126 NV anv;
a6ec74c1 3127 fromstr = NEXTFROM;
f337b084 3128 anv = SvNV(fromstr);
5cdb9e01 3129#ifdef __VOS__
f337b084 3130 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3131 during conversion from long double to double into infinity,
3132 so we do it by hand. This code should either be generalized
3133 for any OS that needs it, or removed if and when VOS
3134 implements posix-976 (suggestion to support mapping to
3135 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3136 if (anv > DBL_MAX)
fc241834 3137 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3138 else if (anv < -DBL_MAX)
fc241834 3139 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3140 else adouble = (double) anv;
3141#else /* __VOS__ */
baf3cf9c 3142# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3143 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3144 * on Alpha; fake it if we don't have them.
3145 */
f337b084 3146 if (anv > DBL_MAX)
fc241834 3147 adouble = DBL_MAX;
f337b084 3148 else if (anv < -DBL_MAX)
fc241834 3149 adouble = -DBL_MAX;
f337b084 3150 else adouble = (double)anv;
baf3cf9c 3151# else
f337b084 3152 adouble = (double)anv;
baf3cf9c 3153# endif
f337b084 3154#endif /* __VOS__ */
1109a392 3155 DO_BO_PACK_N(adouble, double);
f337b084 3156 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3157 }
3158 break;
fc241834
RGS
3159 case 'F': {
3160 NV anv;
1109a392 3161 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3162 while (len-- > 0) {
3163 fromstr = NEXTFROM;
3164 anv = SvNV(fromstr);
1109a392 3165 DO_BO_PACK_N(anv, NV);
fc241834 3166 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3167 }
3168 break;
fc241834 3169 }
92d41999 3170#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3171 case 'D': {
3172 long double aldouble;
1109a392
MHM
3173 /* long doubles can have unused bits, which may be nonzero */
3174 Zero(&aldouble, 1, long double);
92d41999
JH
3175 while (len-- > 0) {
3176 fromstr = NEXTFROM;
3177 aldouble = (long double)SvNV(fromstr);
1109a392 3178 DO_BO_PACK_N(aldouble, long double);
fc241834 3179 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3180 }
3181 break;
fc241834 3182 }
92d41999 3183#endif
7212898e 3184#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3185 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3186#endif
a6ec74c1
JH
3187 case 'n':
3188 while (len-- > 0) {
f337b084 3189 I16 ai16;
a6ec74c1 3190 fromstr = NEXTFROM;
ef108786 3191 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3192#ifdef HAS_HTONS
ef108786 3193 ai16 = PerlSock_htons(ai16);
a6ec74c1 3194#endif
f337b084 3195 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3196 }
3197 break;
7212898e 3198#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3199 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3200#endif
a6ec74c1
JH
3201 case 'v':
3202 while (len-- > 0) {
f337b084 3203 I16 ai16;
a6ec74c1 3204 fromstr = NEXTFROM;
ef108786 3205 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3206#ifdef HAS_HTOVS
ef108786 3207 ai16 = htovs(ai16);
a6ec74c1 3208#endif
f337b084 3209 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3210 }
3211 break;
49704364 3212 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3213#if SHORTSIZE != SIZE16
fc241834 3214 while (len-- > 0) {
f337b084 3215 unsigned short aushort;
fc241834
RGS
3216 fromstr = NEXTFROM;
3217 aushort = SvUV(fromstr);
3218 DO_BO_PACK(aushort, s);
f337b084 3219 PUSH_VAR(utf8, cur, aushort);
fc241834 3220 }
49704364
LW
3221 break;
3222#else
3223 /* Fall through! */
a6ec74c1 3224#endif
49704364 3225 case 'S':
fc241834 3226 while (len-- > 0) {
f337b084 3227 U16 au16;
fc241834
RGS
3228 fromstr = NEXTFROM;
3229 au16 = (U16)SvUV(fromstr);
3230 DO_BO_PACK(au16, 16);
f337b084 3231 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3232 }
3233 break;
49704364 3234 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3235#if SHORTSIZE != SIZE16
fc241834 3236 while (len-- > 0) {
f337b084 3237 short ashort;
fc241834
RGS
3238 fromstr = NEXTFROM;
3239 ashort = SvIV(fromstr);
3240 DO_BO_PACK(ashort, s);
f337b084 3241 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3242 }
49704364
LW
3243 break;
3244#else
3245 /* Fall through! */
a6ec74c1 3246#endif
49704364
LW
3247 case 's':
3248 while (len-- > 0) {
f337b084 3249 I16 ai16;
49704364 3250 fromstr = NEXTFROM;
ef108786
MHM
3251 ai16 = (I16)SvIV(fromstr);
3252 DO_BO_PACK(ai16, 16);
f337b084 3253 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3254 }
3255 break;
3256 case 'I':
49704364 3257 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3258 while (len-- > 0) {
f337b084 3259 unsigned int auint;
a6ec74c1
JH
3260 fromstr = NEXTFROM;
3261 auint = SvUV(fromstr);
1109a392 3262 DO_BO_PACK(auint, i);
f337b084 3263 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3264 }
3265 break;
92d41999
JH
3266 case 'j':
3267 while (len-- > 0) {
f337b084 3268 IV aiv;
92d41999
JH
3269 fromstr = NEXTFROM;
3270 aiv = SvIV(fromstr);
1109a392
MHM
3271#if IVSIZE == INTSIZE
3272 DO_BO_PACK(aiv, i);
3273#elif IVSIZE == LONGSIZE
3274 DO_BO_PACK(aiv, l);
3275#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3276 DO_BO_PACK(aiv, 64);
f337b084
TH
3277#else
3278 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3279#endif
f337b084 3280 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3281 }
3282 break;
3283 case 'J':
3284 while (len-- > 0) {
f337b084 3285 UV auv;
92d41999
JH
3286 fromstr = NEXTFROM;
3287 auv = SvUV(fromstr);
1109a392
MHM
3288#if UVSIZE == INTSIZE
3289 DO_BO_PACK(auv, i);
3290#elif UVSIZE == LONGSIZE
3291 DO_BO_PACK(auv, l);
3292#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3293 DO_BO_PACK(auv, 64);
f337b084
TH
3294#else
3295 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3296#endif
f337b084 3297 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3298 }
3299 break;
a6ec74c1
JH
3300 case 'w':
3301 while (len-- > 0) {
f337b084 3302 NV anv;
a6ec74c1 3303 fromstr = NEXTFROM;
15e9f109 3304 anv = SvNV(fromstr);
a6ec74c1 3305
f337b084
TH
3306 if (anv < 0) {
3307 *cur = '\0';
b162af07 3308 SvCUR_set(cat, cur - start);
49704364 3309 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3310 }
a6ec74c1 3311
196b62db
NC
3312 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3313 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3314 any negative IVs will have already been got by the croak()
3315 above. IOK is untrue for fractions, so we test them
3316 against UV_MAX_P1. */
f337b084
TH
3317 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3318 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3319 char *in = buf + sizeof(buf);
196b62db 3320 UV auv = SvUV(fromstr);
a6ec74c1
JH
3321
3322 do {
eb160463 3323 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3324 auv >>= 7;
3325 } while (auv);
3326 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3327 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3328 in, (buf + sizeof(buf)) - in);
3329 } else if (SvPOKp(fromstr))
3330 goto w_string;
a6ec74c1 3331 else if (SvNOKp(fromstr)) {
0258719b
NC
3332 /* 10**NV_MAX_10_EXP is the largest power of 10
3333 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3334 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3335 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3336 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3337 Some C compilers are strict about integral constant
3338 expressions so we conservatively divide by a slightly
3339 smaller integer instead of multiplying by the exact
3340 floating-point value.
0258719b
NC
3341 */
3342#ifdef NV_MAX_10_EXP
f337b084 3343 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3344 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3345#else
f337b084 3346 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3347 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3348#endif
a6ec74c1
JH
3349 char *in = buf + sizeof(buf);
3350
15e9f109 3351 anv = Perl_floor(anv);
a6ec74c1 3352 do {
15e9f109 3353 NV next = Perl_floor(anv / 128);
a6ec74c1 3354 if (in <= buf) /* this cannot happen ;-) */
49704364 3355 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3356 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3357 anv = next;
3358 } while (anv > 0);
a6ec74c1 3359 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3360 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3361 in, (buf + sizeof(buf)) - in);
3362 } else {
735b914b
JH
3363 char *from, *result, *in;
3364 SV *norm;
3365 STRLEN len;
3366 bool done;
3367
f337b084 3368 w_string:
735b914b
JH
3369 /* Copy string and check for compliance */
3370 from = SvPV(fromstr, len);
3371 if ((norm = is_an_int(from, len)) == NULL)
49704364 3372 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3373
3374 New('w', result, len, char);
3375 in = result + len;
3376 done = FALSE;
f337b084 3377 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3378 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3379 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3380 in, (result + len) - in);
735b914b
JH
3381 Safefree(result);
3382 SvREFCNT_dec(norm); /* free norm */
fc241834 3383 }
a6ec74c1
JH
3384 }
3385 break;
3386 case 'i':
49704364 3387 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3388 while (len-- > 0) {
f337b084 3389 int aint;
a6ec74c1
JH
3390 fromstr = NEXTFROM;
3391 aint = SvIV(fromstr);
1109a392 3392 DO_BO_PACK(aint, i);
f337b084 3393 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3394 }
3395 break;
7212898e 3396#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3397 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3398#endif
a6ec74c1
JH
3399 case 'N':
3400 while (len-- > 0) {
f337b084 3401 U32 au32;
a6ec74c1 3402 fromstr = NEXTFROM;
ef108786 3403 au32 = SvUV(fromstr);
a6ec74c1 3404#ifdef HAS_HTONL
ef108786 3405 au32 = PerlSock_htonl(au32);
a6ec74c1 3406#endif
f337b084 3407 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3408 }
3409 break;
7212898e 3410#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3411 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3412#endif
a6ec74c1
JH
3413 case 'V':
3414 while (len-- > 0) {
f337b084 3415 U32 au32;
a6ec74c1 3416 fromstr = NEXTFROM;
ef108786 3417 au32 = SvUV(fromstr);
a6ec74c1 3418#ifdef HAS_HTOVL
ef108786 3419 au32 = htovl(au32);
a6ec74c1 3420#endif
f337b084 3421 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3422 }
3423 break;
49704364 3424 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3425#if LONGSIZE != SIZE32
fc241834 3426 while (len-- > 0) {
f337b084 3427 unsigned long aulong;
fc241834
RGS
3428 fromstr = NEXTFROM;
3429 aulong = SvUV(fromstr);
3430 DO_BO_PACK(aulong, l);
f337b084 3431 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3432 }
49704364
LW
3433 break;
3434#else
3435 /* Fall though! */
a6ec74c1 3436#endif
49704364 3437 case 'L':
fc241834 3438 while (len-- > 0) {
f337b084 3439 U32 au32;
fc241834
RGS
3440 fromstr = NEXTFROM;
3441 au32 = SvUV(fromstr);
3442 DO_BO_PACK(au32, 32);
f337b084 3443 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3444 }
3445 break;
49704364 3446 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3447#if LONGSIZE != SIZE32
fc241834 3448 while (len-- > 0) {
f337b084 3449 long along;
fc241834
RGS
3450 fromstr = NEXTFROM;
3451 along = SvIV(fromstr);
3452 DO_BO_PACK(along, l);
f337b084 3453 PUSH_VAR(utf8, cur, along);
a6ec74c1 3454 }
49704364
LW
3455 break;
3456#else
3457 /* Fall though! */
a6ec74c1 3458#endif
49704364
LW
3459 case 'l':
3460 while (len-- > 0) {
f337b084 3461 I32 ai32;
49704364 3462 fromstr = NEXTFROM;
ef108786
MHM
3463 ai32 = SvIV(fromstr);
3464 DO_BO_PACK(ai32, 32);
f337b084 3465 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3466 }
3467 break;
3468#ifdef HAS_QUAD
3469 case 'Q':
3470 while (len-- > 0) {
f337b084 3471 Uquad_t auquad;
a6ec74c1 3472 fromstr = NEXTFROM;
f337b084 3473 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3474 DO_BO_PACK(auquad, 64);
f337b084 3475 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3476 }
3477 break;
3478 case 'q':
3479 while (len-- > 0) {
f337b084 3480 Quad_t aquad;
a6ec74c1
JH
3481 fromstr = NEXTFROM;
3482 aquad = (Quad_t)SvIV(fromstr);
1109a392 3483 DO_BO_PACK(aquad, 64);
f337b084 3484 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3485 }
3486 break;
f337b084 3487#endif /* HAS_QUAD */
a6ec74c1
JH
3488 case 'P':
3489 len = 1; /* assume SV is correct length */
f337b084 3490 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3491 /* Fall through! */
a6ec74c1
JH
3492 case 'p':
3493 while (len-- > 0) {
f337b084
TH
3494 char *aptr;
3495
a6ec74c1 3496 fromstr = NEXTFROM;
28a4f200 3497 SvGETMAGIC(fromstr);
3498 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3499 else {
3500 STRLEN n_a;
3501 /* XXX better yet, could spirit away the string to
3502 * a safe spot and hang on to it until the result
3503 * of pack() (and all copies of the result) are
3504 * gone.
3505 */
f337b084
TH
3506 if (ckWARN(WARN_PACK) &&
3507 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3508 !SvREADONLY(fromstr)))) {
9014280d 3509 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3510 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3511 }
3512 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3513 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3514 else
28a4f200 3515 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3516 }
07409e01 3517 DO_BO_PACK_PC(aptr);
f337b084 3518 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3519 }
3520 break;
fc241834
RGS
3521 case 'u': {
3522 char *aptr, *aend;
3523 bool from_utf8;
f337b084 3524
a6ec74c1 3525 fromstr = NEXTFROM;
fc241834
RGS
3526 if (len <= 2) len = 45;
3527 else len = len / 3 * 3;
3528 if (len >= 64) {
3529 Perl_warner(aTHX_ packWARN(WARN_PACK),
3530 "Field too wide in 'u' format in pack");
3531 len = 63;
3532 }
a6ec74c1 3533 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3534 from_utf8 = DO_UTF8(fromstr);
3535 if (from_utf8) {
3536 aend = aptr + fromlen;
3537 fromlen = sv_len_utf8(fromstr);
3538 } else aend = NULL; /* Unused, but keep compilers happy */
3539 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3540 while (fromlen > 0) {
fc241834 3541 U8 *end;
a6ec74c1 3542 I32 todo;
fc241834 3543 U8 hunk[1+63/3*4+1];
a6ec74c1 3544
eb160463 3545 if ((I32)fromlen > len)
a6ec74c1
JH
3546 todo = len;
3547 else
3548 todo = fromlen;
fc241834
RGS
3549 if (from_utf8) {
3550 char buffer[64];
3551 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3552 'u' | TYPE_IS_PACK)) {
3553 *cur = '\0';
b162af07 3554 SvCUR_set(cat, cur - start);
fc241834
RGS
3555 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3556 }
3557 end = doencodes(hunk, buffer, todo);
3558 } else {
3559 end = doencodes(hunk, aptr, todo);
3560 aptr += todo;
3561 }
3562 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3563 fromlen -= todo;
3564 }
a6ec74c1
JH
3565 break;
3566 }
f337b084
TH
3567 }
3568 *cur = '\0';
b162af07 3569 SvCUR_set(cat, cur - start);
f337b084 3570 no_change:
49704364 3571 *symptr = lookahead;
a6ec74c1 3572 }
49704364 3573 return beglist;
18529408
IZ
3574}
3575#undef NEXTFROM
3576
3577
3578PP(pp_pack)
3579{
3580 dSP; dMARK; dORIGMARK; dTARGET;
3581 register SV *cat = TARG;
3582 STRLEN fromlen;
3583 register char *pat = SvPVx(*++MARK, fromlen);
3584 register char *patend = pat + fromlen;
3585
3586 MARK++;
3587 sv_setpvn(cat, "", 0);
f337b084 3588 SvUTF8_off(cat);
18529408 3589
7accc089 3590 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3591
a6ec74c1
JH
3592 SvSETMAGIC(cat);
3593 SP = ORIGMARK;
3594 PUSHs(cat);
3595 RETURN;
3596}
a6ec74c1 3597
73cb7263
NC
3598/*
3599 * Local variables:
3600 * c-indentation-style: bsd
3601 * c-basic-offset: 4
3602 * indent-tabs-mode: t
3603 * End:
3604 *
edf815fd 3605 * vim: shiftwidth=4:
73cb7263 3606*/