This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactoring to Sv*_set() macros - patch #5
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084 121#define PUSH_VAR(utf8, aptr, var) \
230e1fce 122 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 123
49704364
LW
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e 185#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 186# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 187#else
28be1210 188# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
07409e01
NC
200# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
07409e01 232# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 235 case TYPE_IS_BIG_ENDIAN: \
07409e01 236 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 239 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
07409e01 246# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 249 case TYPE_IS_BIG_ENDIAN: \
07409e01 250 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 253 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e 276# if PTRSIZE == INTSIZE
07409e01
NC
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
279# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
280# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 281# elif PTRSIZE == LONGSIZE
07409e01
NC
282# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
283# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
284# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
285# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
7212898e
NC
286# else
287# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
288# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
289# endif
1109a392 290
7212898e 291# if defined(my_htolen) && defined(my_letohn) && \
1109a392 292 defined(my_htoben) && defined(my_betohn)
7212898e 293# define DO_BO_UNPACK_N(var, type) \
1109a392 294 STMT_START { \
66c611c5 295 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
296 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
297 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
298 default: break; \
299 } \
300 } STMT_END
301
7212898e 302# define DO_BO_PACK_N(var, type) \
1109a392 303 STMT_START { \
66c611c5 304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
306 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
307 default: break; \
308 } \
309 } STMT_END
7212898e
NC
310# else
311# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
312# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
313# endif
314
f337b084 315#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 316
78d46eaa 317#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 318#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
319#define PACK_SIZE_MASK 0x3F
320
78d46eaa
NC
321/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
322 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
323
324#if TYPE_IS_SHRIEKING != 0x100
325 ++++shriek offset should be 256
326#endif
327
328typedef U8 packprops_t;
78d46eaa
NC
329#if 'J'-'I' == 1
330/* ASCII */
a7a3cfaa
TH
331const packprops_t packprops[512] = {
332 /* normal */
333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
337 0, 0, 0,
338 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 339#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 340 /* D */ LONG_DOUBLESIZE,
80a13697 341#else
a7a3cfaa 342 0,
80a13697 343#endif
a7a3cfaa
TH
344 0,
345 /* F */ NVSIZE,
346 0, 0,
347 /* I */ sizeof(unsigned int),
348 /* J */ UVSIZE,
349 0,
350 /* L */ SIZE32,
351 0,
352 /* N */ SIZE32,
353 0, 0,
80a13697 354#if defined(HAS_QUAD)
a7a3cfaa 355 /* Q */ sizeof(Uquad_t),
80a13697 356#else
a7a3cfaa 357 0,
80a13697 358#endif
a7a3cfaa
TH
359 0,
360 /* S */ SIZE16,
361 0,
362 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
363 /* V */ SIZE32,
364 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 /* c */ sizeof(char),
367 /* d */ sizeof(double),
368 0,
369 /* f */ sizeof(float),
370 0, 0,
371 /* i */ sizeof(int),
372 /* j */ IVSIZE,
373 0,
374 /* l */ SIZE32,
375 0,
376 /* n */ SIZE16,
377 0,
378 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 379#if defined(HAS_QUAD)
a7a3cfaa 380 /* q */ sizeof(Quad_t),
80a13697 381#else
a7a3cfaa 382 0,
80a13697 383#endif
a7a3cfaa
TH
384 0,
385 /* s */ SIZE16,
386 0, 0,
387 /* v */ SIZE16,
388 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
394 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0,
398 /* shrieking */
399 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
401 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 0, 0, 0, 0, 0, 0, 0, 0, 0,
404 /* I */ sizeof(unsigned int),
405 0, 0,
406 /* L */ sizeof(unsigned long),
407 0,
7212898e 408#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 409 /* N */ SIZE32,
7212898e 410#else
a7a3cfaa 411 0,
7212898e 412#endif
a7a3cfaa
TH
413 0, 0, 0, 0,
414 /* S */ sizeof(unsigned short),
415 0, 0,
7212898e 416#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 417 /* V */ SIZE32,
7212898e 418#else
a7a3cfaa 419 0,
7212898e 420#endif
a7a3cfaa
TH
421 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
422 0, 0,
423 /* i */ sizeof(int),
424 0, 0,
425 /* l */ sizeof(long),
426 0,
7212898e 427#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 428 /* n */ SIZE16,
7212898e 429#else
a7a3cfaa 430 0,
7212898e 431#endif
a7a3cfaa
TH
432 0, 0, 0, 0,
433 /* s */ sizeof(short),
434 0, 0,
7212898e 435#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 436 /* v */ SIZE16,
7212898e 437#else
a7a3cfaa 438 0,
7212898e 439#endif
a7a3cfaa
TH
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
449};
450#else
451/* EBCDIC (or bust) */
a7a3cfaa
TH
452const packprops_t packprops[512] = {
453 /* normal */
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0,
463 /* c */ sizeof(char),
464 /* d */ sizeof(double),
465 0,
466 /* f */ sizeof(float),
467 0, 0,
468 /* i */ sizeof(int),
469 0, 0, 0, 0, 0, 0, 0,
470 /* j */ IVSIZE,
471 0,
472 /* l */ SIZE32,
473 0,
474 /* n */ SIZE16,
475 0,
476 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 477#if defined(HAS_QUAD)
a7a3cfaa 478 /* q */ sizeof(Quad_t),
80a13697 479#else
a7a3cfaa 480 0,
80a13697 481#endif
a7a3cfaa
TH
482 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 /* s */ SIZE16,
484 0, 0,
485 /* v */ SIZE16,
486 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 490#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 491 /* D */ LONG_DOUBLESIZE,
80a13697 492#else
a7a3cfaa 493 0,
80a13697 494#endif
a7a3cfaa
TH
495 0,
496 /* F */ NVSIZE,
497 0, 0,
498 /* I */ sizeof(unsigned int),
499 0, 0, 0, 0, 0, 0, 0,
500 /* J */ UVSIZE,
501 0,
502 /* L */ SIZE32,
503 0,
504 /* N */ SIZE32,
505 0, 0,
80a13697 506#if defined(HAS_QUAD)
a7a3cfaa 507 /* Q */ sizeof(Uquad_t),
80a13697 508#else
a7a3cfaa 509 0,
80a13697 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 /* S */ SIZE16,
513 0,
514 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
515 /* V */ SIZE32,
516 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 /* shrieking */
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* i */ sizeof(int),
530 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 /* l */ sizeof(long),
532 0,
7212898e 533#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 534 /* n */ SIZE16,
7212898e 535#else
a7a3cfaa 536 0,
7212898e 537#endif
a7a3cfaa
TH
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 /* s */ sizeof(short),
540 0, 0,
7212898e 541#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 542 /* v */ SIZE16,
7212898e 543#else
a7a3cfaa 544 0,
7212898e 545#endif
a7a3cfaa
TH
546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548 0, 0, 0,
549 /* I */ sizeof(unsigned int),
550 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 /* L */ sizeof(unsigned long),
552 0,
7212898e 553#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 554 /* N */ SIZE32,
7212898e 555#else
a7a3cfaa 556 0,
7212898e 557#endif
a7a3cfaa
TH
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 /* S */ sizeof(unsigned short),
560 0, 0,
7212898e 561#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 562 /* V */ SIZE32,
7212898e 563#else
a7a3cfaa 564 0,
7212898e 565#endif
a7a3cfaa
TH
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
568};
569#endif
570
08ca2aa3 571STATIC U8
f337b084 572uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
573{
574 UV val;
575 STRLEN retlen;
f337b084
TH
576 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
577 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
578 /* We try to process malformed UTF-8 as much as possible (preferrably with
579 warnings), but these two mean we make no progress in the string and
580 might enter an infinite loop */
581 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
582 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
583 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 584 if (val >= 0x100) {
f337b084 585 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
586 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
587 "Character in '%c' format wrapped in unpack",
f337b084 588 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
589 val &= 0xff;
590 }
591 *s += retlen;
fe2774ed 592 return (U8)val;
08ca2aa3
TH
593}
594
f337b084
TH
595#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
596 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
597 *(U8 *)(s)++)
598
599STATIC bool
f337b084 600uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
601{
602 UV val;
603 STRLEN retlen;
604 char *from = *s;
605 int bad = 0;
606 U32 flags = ckWARN(WARN_UTF8) ?
607 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
608 for (;buf_len > 0; buf_len--) {
609 if (from >= end) return FALSE;
f337b084 610 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
611 if (retlen == (STRLEN) -1 || retlen == 0) {
612 from += UTF8SKIP(from);
613 bad |= 1;
614 } else from += retlen;
615 if (val >= 0x100) {
616 bad |= 2;
617 val &= 0xff;
618 }
fe2774ed 619 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
620 }
621 /* We have enough characters for the buffer. Did we have problems ? */
622 if (bad) {
623 if (bad & 1) {
624 /* Rewalk the string fragment while warning */
625 char *ptr;
626 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
627 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
628 if (ptr >= end) break;
f337b084 629 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
630 }
631 if (from > end) from = end;
632 }
633 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 634 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
635 WARN_PACK : WARN_UNPACK),
636 "Character(s) in '%c' format wrapped in %s",
fc241834 637 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 638 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
639 }
640 *s = from;
641 return TRUE;
642}
643
644STATIC bool
645next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
646{
647 UV val;
648 STRLEN retlen;
f337b084 649 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
650 if (val >= 0x100 || !ISUUCHAR(val) ||
651 retlen == (STRLEN) -1 || retlen == 0) {
652 *out = 0;
653 return FALSE;
654 }
655 *out = PL_uudmap[val] & 077;
f337b084 656 *s += retlen;
08ca2aa3
TH
657 return TRUE;
658}
78d46eaa 659
f337b084
TH
660STATIC void
661bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
662 U8 buffer[UTF8_MAXLEN];
663 U8 *end = start + len;
664 char *d = *dest;
665 while (start < end) {
666 int length =
667 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
668 switch(length) {
669 case 1:
670 *d++ = buffer[0];
671 break;
672 case 2:
673 *d++ = buffer[0];
674 *d++ = buffer[1];
675 break;
676 default:
677 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
678 *start, length);
679 }
680 start++;
681 }
682 *dest = d;
683}
684
230e1fce
NC
685#define PUSH_BYTES(utf8, cur, buf, len) \
686STMT_START { \
687 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
688 else { \
689 Copy(buf, cur, len, char); \
690 (cur) += (len); \
691 } \
f337b084
TH
692} STMT_END
693
694#define GROWING(utf8, cat, start, cur, in_len) \
695STMT_START { \
696 STRLEN glen = (in_len); \
3473cf63 697 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
698 if ((cur) + glen >= (start) + SvLEN(cat)) { \
699 (start) = sv_exp_grow(aTHX_ cat, glen); \
700 (cur) = (start) + SvCUR(cat); \
701 } \
702} STMT_END
703
704#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
705STMT_START { \
706 STRLEN glen = (in_len); \
707 STRLEN gl = glen; \
3473cf63 708 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
709 if ((cur) + gl >= (start) + SvLEN(cat)) { \
710 *cur = '\0'; \
b162af07 711 SvCUR_set((cat), (cur) - (start)); \
f337b084
TH
712 (start) = sv_exp_grow(aTHX_ cat, gl); \
713 (cur) = (start) + SvCUR(cat); \
714 } \
715 PUSH_BYTES(utf8, cur, buf, glen); \
716} STMT_END
717
718#define PUSH_BYTE(utf8, s, byte) \
719STMT_START { \
720 if (utf8) { \
721 U8 au8 = (byte); \
722 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
723 } else *(U8 *)(s)++ = (byte); \
724} STMT_END
725
726/* Only to be used inside a loop (see the break) */
727#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
728STMT_START { \
729 STRLEN retlen; \
730 if (str >= end) break; \
731 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
732 if (retlen == (STRLEN) -1 || retlen == 0) { \
733 *cur = '\0'; \
734 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
735 } \
736 str += retlen; \
737} STMT_END
738
206947d2 739/* Returns the sizeof() struct described by pat */
028d1f6d 740STATIC I32
f337b084 741S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 742{
f337b084 743 I32 total = 0;
206947d2 744
49704364 745 while (next_symbol(symptr)) {
f337b084
TH
746 I32 len;
747 int star, size;
f337b084
TH
748
749 switch (symptr->howlen) {
fc241834 750 case e_star:
49704364
LW
751 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
752 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
753 break;
f337b084
TH
754 default:
755 /* e_no_len and e_number */
756 len = symptr->length;
757 break;
49704364
LW
758 }
759
a7a3cfaa 760 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
761 if (!size) {
762 /* endianness doesn't influence the size of a type */
763 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
764 default:
765 Perl_croak(aTHX_ "Invalid type '%c' in %s",
766 (int)TYPE_NO_MODIFIERS(symptr->code),
767 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
28be1210
TH
768#ifdef PERL_PACK_CAN_SHRIEKSIGN
769 case '.' | TYPE_IS_SHRIEKING:
770 case '@' | TYPE_IS_SHRIEKING:
771#endif
80a13697 772 case '@':
28be1210 773 case '.':
80a13697
NC
774 case '/':
775 case 'U': /* XXXX Is it correct? */
776 case 'w':
777 case 'u':
778 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 779 (int) TYPE_NO_MODIFIERS(symptr->code),
80a13697
NC
780 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
781 case '%':
782 size = 0;
783 break;
784 case '(':
fc241834
RGS
785 {
786 tempsym_t savsym = *symptr;
787 symptr->patptr = savsym.grpbeg;
788 symptr->patend = savsym.grpend;
789 /* XXXX Theoretically, we need to measure many times at
790 different positions, since the subexpression may contain
791 alignment commands, but be not of aligned length.
792 Need to detect this and croak(). */
793 size = measure_struct(symptr);
794 *symptr = savsym;
795 break;
796 }
80a13697
NC
797 case 'X' | TYPE_IS_SHRIEKING:
798 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
799 */
800 if (!len) /* Avoid division by 0 */
801 len = 1;
802 len = total % len; /* Assumed: the start is aligned. */
803 /* FALL THROUGH */
804 case 'X':
805 size = -1;
806 if (total < len)
807 Perl_croak(aTHX_ "'X' outside of string in %s",
808 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
809 break;
810 case 'x' | TYPE_IS_SHRIEKING:
811 if (!len) /* Avoid division by 0 */
812 len = 1;
813 star = total % len; /* Assumed: the start is aligned. */
814 if (star) /* Other portable ways? */
815 len = len - star;
816 else
817 len = 0;
818 /* FALL THROUGH */
819 case 'x':
820 case 'A':
821 case 'Z':
822 case 'a':
80a13697
NC
823 size = 1;
824 break;
825 case 'B':
826 case 'b':
827 len = (len + 7)/8;
828 size = 1;
829 break;
830 case 'H':
831 case 'h':
832 len = (len + 1)/2;
833 size = 1;
834 break;
78d46eaa 835
80a13697
NC
836 case 'P':
837 len = 1;
838 size = sizeof(char*);
78d46eaa
NC
839 break;
840 }
206947d2
IZ
841 }
842 total += len * size;
843 }
844 return total;
845}
846
49704364
LW
847
848/* locate matching closing parenthesis or bracket
849 * returns char pointer to char after match, or NULL
850 */
851STATIC char *
852S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 853{
49704364
LW
854 while (patptr < patend) {
855 char c = *patptr++;
856
857 if (isSPACE(c))
858 continue;
859 else if (c == ender)
860 return patptr-1;
861 else if (c == '#') {
862 while (patptr < patend && *patptr != '\n')
863 patptr++;
864 continue;
865 } else if (c == '(')
866 patptr = group_end(patptr, patend, ')') + 1;
867 else if (c == '[')
868 patptr = group_end(patptr, patend, ']') + 1;
18529408 869 }
49704364
LW
870 Perl_croak(aTHX_ "No group ending character '%c' found in template",
871 ender);
872 return 0;
18529408
IZ
873}
874
49704364
LW
875
876/* Convert unsigned decimal number to binary.
877 * Expects a pointer to the first digit and address of length variable
878 * Advances char pointer to 1st non-digit char and returns number
fc241834 879 */
18529408 880STATIC char *
49704364
LW
881S_get_num(pTHX_ register char *patptr, I32 *lenptr )
882{
883 I32 len = *patptr++ - '0';
884 while (isDIGIT(*patptr)) {
885 if (len >= 0x7FFFFFFF/10)
886 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
887 len = (len * 10) + (*patptr++ - '0');
888 }
889 *lenptr = len;
890 return patptr;
891}
892
893/* The marvellous template parsing routine: Using state stored in *symptr,
894 * locates next template code and count
895 */
896STATIC bool
f337b084 897S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 898{
fc241834
RGS
899 char* patptr = symptr->patptr;
900 char* patend = symptr->patend;
f337b084 901 const char *allowed = "";
49704364
LW
902
903 symptr->flags &= ~FLAG_SLASH;
904
905 while (patptr < patend) {
906 if (isSPACE(*patptr))
907 patptr++;
908 else if (*patptr == '#') {
909 patptr++;
910 while (patptr < patend && *patptr != '\n')
911 patptr++;
912 if (patptr < patend)
913 patptr++;
914 } else {
fc241834 915 /* We should have found a template code */
49704364 916 I32 code = *patptr++ & 0xFF;
66c611c5 917 U32 inherited_modifiers = 0;
49704364
LW
918
919 if (code == ','){ /* grandfather in commas but with a warning */
920 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
921 symptr->flags |= FLAG_COMMA;
922 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
923 "Invalid type ',' in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 }
926 continue;
927 }
fc241834 928
49704364 929 /* for '(', skip to ')' */
fc241834 930 if (code == '(') {
49704364
LW
931 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
932 Perl_croak(aTHX_ "()-group starts with a count in %s",
933 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
934 symptr->grpbeg = patptr;
935 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
936 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
937 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
938 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
939 }
940
66c611c5
MHM
941 /* look for group modifiers to inherit */
942 if (TYPE_ENDIANNESS(symptr->flags)) {
943 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
944 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
945 }
946
1109a392
MHM
947 /* look for modifiers */
948 while (patptr < patend) {
1109a392
MHM
949 I32 modifier = 0;
950 switch (*patptr) {
951 case '!':
952 modifier = TYPE_IS_SHRIEKING;
7212898e 953 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 954 break;
7212898e 955#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
956 case '>':
957 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 958 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
959 break;
960 case '<':
961 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 962 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 963 break;
f337b084 964#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
965 default:
966 break;
967 }
66c611c5 968
1109a392
MHM
969 if (modifier == 0)
970 break;
66c611c5 971
1109a392
MHM
972 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
973 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
974 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
975
976 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
977 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
978 (int) TYPE_NO_MODIFIERS(code),
979 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
980 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
981 TYPE_ENDIANNESS_MASK)
982 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
983 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
984
1109a392
MHM
985 if (ckWARN(WARN_UNPACK)) {
986 if (code & modifier)
987 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
988 "Duplicate modifier '%c' after '%c' in %s",
989 *patptr, (int) TYPE_NO_MODIFIERS(code),
990 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
991 }
66c611c5 992
1109a392
MHM
993 code |= modifier;
994 patptr++;
49704364
LW
995 }
996
66c611c5
MHM
997 /* inherit modifiers */
998 code |= inherited_modifiers;
999
fc241834 1000 /* look for count and/or / */
49704364
LW
1001 if (patptr < patend) {
1002 if (isDIGIT(*patptr)) {
1003 patptr = get_num( patptr, &symptr->length );
1004 symptr->howlen = e_number;
1005
1006 } else if (*patptr == '*') {
1007 patptr++;
1008 symptr->howlen = e_star;
1009
1010 } else if (*patptr == '[') {
fc241834 1011 char* lenptr = ++patptr;
49704364
LW
1012 symptr->howlen = e_number;
1013 patptr = group_end( patptr, patend, ']' ) + 1;
1014 /* what kind of [] is it? */
1015 if (isDIGIT(*lenptr)) {
1016 lenptr = get_num( lenptr, &symptr->length );
1017 if( *lenptr != ']' )
1018 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1019 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1020 } else {
1021 tempsym_t savsym = *symptr;
1022 symptr->patend = patptr-1;
1023 symptr->patptr = lenptr;
1024 savsym.length = measure_struct(symptr);
1025 *symptr = savsym;
1026 }
1027 } else {
1028 symptr->howlen = e_no_len;
1029 symptr->length = 1;
1030 }
1031
1032 /* try to find / */
1033 while (patptr < patend) {
1034 if (isSPACE(*patptr))
1035 patptr++;
1036 else if (*patptr == '#') {
1037 patptr++;
1038 while (patptr < patend && *patptr != '\n')
1039 patptr++;
1040 if (patptr < patend)
1041 patptr++;
1042 } else {
66c611c5 1043 if (*patptr == '/') {
49704364
LW
1044 symptr->flags |= FLAG_SLASH;
1045 patptr++;
66c611c5
MHM
1046 if (patptr < patend &&
1047 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
LW
1048 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1049 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1050 }
1051 break;
1052 }
18529408 1053 }
49704364
LW
1054 } else {
1055 /* at end - no count, no / */
1056 symptr->howlen = e_no_len;
1057 symptr->length = 1;
1058 }
1059
1060 symptr->code = code;
fc241834 1061 symptr->patptr = patptr;
49704364 1062 return TRUE;
18529408 1063 }
49704364 1064 }
fc241834 1065 symptr->patptr = patptr;
49704364 1066 return FALSE;
18529408
IZ
1067}
1068
18529408 1069/*
fc241834 1070 There is no way to cleanly handle the case where we should process the
08ca2aa3 1071 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1072 (e.g. estimates like strend-s as an upper bound for the number of
1073 characters left wouldn't work). So if we foresee the need of this
1074 (pattern starts with U or contains U0), we want to work on the encoded
1075 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1076 themselves if they need to do a lot of unpacks like this on it
1077*/
fc241834 1078STATIC bool
08ca2aa3
TH
1079need_utf8(const char *pat, const char *patend)
1080{
1081 bool first = TRUE;
1082 while (pat < patend) {
1083 if (pat[0] == '#') {
1084 pat++;
07409e01 1085 pat = (char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1086 if (!pat) return FALSE;
1087 } else if (pat[0] == 'U') {
1088 if (first || pat[1] == '0') return TRUE;
1089 } else first = FALSE;
1090 pat++;
1091 }
1092 return FALSE;
1093}
1094
1095STATIC char
1096first_symbol(const char *pat, const char *patend) {
1097 while (pat < patend) {
1098 if (pat[0] != '#') return pat[0];
1099 pat++;
07409e01 1100 pat = (char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1101 if (!pat) return 0;
1102 pat++;
1103 }
1104 return 0;
1105}
1106
1107/*
18529408
IZ
1108=for apidoc unpack_str
1109
7accc089
JH
1110The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1111and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1112
1113=cut */
1114
1115I32
f337b084 1116Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1117{
49704364 1118 tempsym_t sym = { 0 };
08ca2aa3 1119
f337b084 1120 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1121 else if (need_utf8(pat, patend)) {
1122 /* We probably should try to avoid this in case a scalar context call
1123 wouldn't get to the "U0" */
1124 STRLEN len = strend - s;
230e1fce 1125 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1126 SAVEFREEPV(s);
1127 strend = s + len;
f337b084 1128 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1129 }
1130
f337b084
TH
1131 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1132 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1133
49704364
LW
1134 sym.patptr = pat;
1135 sym.patend = patend;
1136 sym.flags = flags;
1137
1138 return unpack_rec(&sym, s, s, strend, NULL );
1139}
1140
7accc089
JH
1141/*
1142=for apidoc unpackstring
1143
608d3aed
LW
1144The engine implementing unpack() Perl function. C<unpackstring> puts the
1145extracted list items on the stack and returns the number of elements.
1146Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1147
1148=cut */
1149
1150I32
f337b084 1151Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1152{
1153 tempsym_t sym = { 0 };
08ca2aa3 1154
f337b084 1155 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1156 else if (need_utf8(pat, patend)) {
1157 /* We probably should try to avoid this in case a scalar context call
1158 wouldn't get to the "U0" */
1159 STRLEN len = strend - s;
230e1fce 1160 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1161 SAVEFREEPV(s);
1162 strend = s + len;
f337b084 1163 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1164 }
1165
f337b084
TH
1166 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1167 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1168
7accc089
JH
1169 sym.patptr = pat;
1170 sym.patend = patend;
1171 sym.flags = flags;
1172
1173 return unpack_rec(&sym, s, s, strend, NULL );
1174}
1175
49704364
LW
1176STATIC
1177I32
08ca2aa3 1178S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1179{
a6ec74c1 1180 dSP;
18529408
IZ
1181 SV *sv;
1182 I32 start_sp_offset = SP - PL_stack_base;
49704364 1183 howlen_t howlen;
a6ec74c1 1184
a6ec74c1 1185 I32 checksum = 0;
92d41999 1186 UV cuv = 0;
a6ec74c1 1187 NV cdouble = 0.0;
f337b084 1188 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1189 bool beyond = FALSE;
21c16052 1190 bool explicit_length;
49704364 1191 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1192 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1193 symptr->strbeg = s - strbeg;
49704364 1194
49704364 1195 while (next_symbol(symptr)) {
a7a3cfaa 1196 packprops_t props;
f337b084
TH
1197 I32 len, ai32;
1198 I32 datumtype = symptr->code;
206947d2 1199 /* do first one only unless in list context
08ca2aa3 1200 / is implemented by unpacking the count, then popping it from the
206947d2 1201 stack, so must check that we're not in the middle of a / */
49704364 1202 if ( unpack_only_one
206947d2 1203 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1204 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1205 break;
49704364 1206
f337b084 1207 switch (howlen = symptr->howlen) {
fc241834
RGS
1208 case e_star:
1209 len = strend - strbeg; /* long enough */
49704364 1210 break;
f337b084
TH
1211 default:
1212 /* e_no_len and e_number */
1213 len = symptr->length;
1214 break;
49704364 1215 }
18529408 1216
21c16052 1217 explicit_length = TRUE;
a6ec74c1 1218 redo_switch:
49704364 1219 beyond = s >= strend;
a7a3cfaa
TH
1220
1221 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1222 if (props) {
1223 /* props nonzero means we can process this letter. */
1224 long size = props & PACK_SIZE_MASK;
1225 long howmany = (strend - s) / size;
1226 if (len > howmany)
1227 len = howmany;
1228
1229 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1230 if (len && unpack_only_one) len = 1;
1231 EXTEND(SP, len);
1232 EXTEND_MORTAL(len);
78d46eaa
NC
1233 }
1234 }
a7a3cfaa 1235
1109a392 1236 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1237 default:
1109a392 1238 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1239
a6ec74c1 1240 case '%':
49704364 1241 if (howlen == e_no_len)
18529408 1242 len = 16; /* len is not specified */
a6ec74c1 1243 checksum = len;
92d41999 1244 cuv = 0;
a6ec74c1 1245 cdouble = 0;
18529408 1246 continue;
a6ec74c1 1247 break;
18529408
IZ
1248 case '(':
1249 {
49704364 1250 tempsym_t savsym = *symptr;
66c611c5
MHM
1251 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1252 symptr->flags |= group_modifiers;
49704364 1253 symptr->patend = savsym.grpend;
28be1210 1254 symptr->previous = &savsym;
49704364 1255 symptr->level++;
18529408
IZ
1256 PUTBACK;
1257 while (len--) {
49704364 1258 symptr->patptr = savsym.grpbeg;
f337b084
TH
1259 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1260 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1261 unpack_rec(symptr, s, strbeg, strend, &s);
1262 if (s == strend && savsym.howlen == e_star)
49704364 1263 break; /* No way to continue */
18529408
IZ
1264 }
1265 SPAGAIN;
28be1210 1266 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1267 *symptr = savsym;
18529408
IZ
1268 break;
1269 }
28be1210
TH
1270#ifdef PERL_PACK_CAN_SHRIEKSIGN
1271 case '.' | TYPE_IS_SHRIEKING:
1272#endif
1273 case '.': {
1274 char *from;
1275 SV *sv;
1276#ifdef PERL_PACK_CAN_SHRIEKSIGN
1277 bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1278#else /* PERL_PACK_CAN_SHRIEKSIGN */
1279 bool u8 = utf8;
1280#endif
1281 if (howlen == e_star) from = strbeg;
1282 else if (len <= 0) from = s;
1283 else {
1284 tempsym_t *group = symptr;
1285
1286 while (--len && group) group = group->previous;
1287 from = group ? strbeg + group->strbeg : strbeg;
1288 }
1289 sv = from <= s ?
00646304
CB
1290 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1291 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1292 XPUSHs(sv_2mortal(sv));
1293 break;
1294 }
1295#ifdef PERL_PACK_CAN_SHRIEKSIGN
1296 case '@' | TYPE_IS_SHRIEKING:
1297#endif
a6ec74c1 1298 case '@':
28be1210
TH
1299 s = strbeg + symptr->strbeg;
1300#ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1302#else /* PERL_PACK_CAN_SHRIEKSIGN */
1303 if (utf8)
1304#endif
1305 {
08ca2aa3
TH
1306 while (len > 0) {
1307 if (s >= strend)
1308 Perl_croak(aTHX_ "'@' outside of string in unpack");
1309 s += UTF8SKIP(s);
1310 len--;
1311 }
1312 if (s > strend)
1313 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1314 } else {
28be1210 1315 if (strend-s < len)
fc241834 1316 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1317 s += len;
08ca2aa3 1318 }
a6ec74c1 1319 break;
62f95557
IZ
1320 case 'X' | TYPE_IS_SHRIEKING:
1321 if (!len) /* Avoid division by 0 */
1322 len = 1;
08ca2aa3
TH
1323 if (utf8) {
1324 char *hop, *last;
f337b084
TH
1325 I32 l = len;
1326 hop = last = strbeg;
1327 while (hop < s) {
1328 hop += UTF8SKIP(hop);
1329 if (--l == 0) {
08ca2aa3 1330 last = hop;
f337b084
TH
1331 l = len;
1332 }
fc241834 1333 }
f337b084
TH
1334 if (last > s)
1335 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1336 s = last;
1337 break;
f337b084
TH
1338 }
1339 len = (s - strbeg) % len;
62f95557 1340 /* FALL THROUGH */
a6ec74c1 1341 case 'X':
08ca2aa3
TH
1342 if (utf8) {
1343 while (len > 0) {
1344 if (s <= strbeg)
1345 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1346 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1347 if (s <= strbeg)
1348 Perl_croak(aTHX_ "'X' outside of string in unpack");
1349 }
1350 len--;
1351 }
1352 } else {
fc241834
RGS
1353 if (len > s - strbeg)
1354 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1355 s -= len;
08ca2aa3 1356 }
a6ec74c1 1357 break;
62f95557
IZ
1358 case 'x' | TYPE_IS_SHRIEKING:
1359 if (!len) /* Avoid division by 0 */
1360 len = 1;
230e1fce
NC
1361 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1362 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1363 if (ai32 == 0) break;
1364 len -= ai32;
62f95557 1365 /* FALL THROUGH */
a6ec74c1 1366 case 'x':
08ca2aa3
TH
1367 if (utf8) {
1368 while (len>0) {
1369 if (s >= strend)
1370 Perl_croak(aTHX_ "'x' outside of string in unpack");
1371 s += UTF8SKIP(s);
1372 len--;
1373 }
1374 } else {
fc241834
RGS
1375 if (len > strend - s)
1376 Perl_croak(aTHX_ "'x' outside of string in unpack");
1377 s += len;
f337b084 1378 }
a6ec74c1
JH
1379 break;
1380 case '/':
49704364
LW
1381 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1382 break;
a6ec74c1
JH
1383 case 'A':
1384 case 'Z':
1385 case 'a':
08ca2aa3
TH
1386 if (checksum) {
1387 /* Preliminary length estimate is assumed done in 'W' */
1388 if (len > strend - s) len = strend - s;
1389 goto W_checksum;
1390 }
1391 if (utf8) {
1392 I32 l;
1393 char *hop;
1394 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1395 if (hop >= strend) {
1396 if (hop > strend)
1397 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1398 break;
fc241834 1399 }
a6ec74c1 1400 }
08ca2aa3
TH
1401 if (hop > strend)
1402 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1403 len = hop - s;
1404 } else if (len > strend - s)
1405 len = strend - s;
1406
1407 if (datumtype == 'Z') {
1408 /* 'Z' strips stuff after first null */
f337b084
TH
1409 char *ptr, *end;
1410 end = s + len;
1411 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1412 sv = newSVpvn(s, ptr-s);
1413 if (howlen == e_star) /* exact for 'Z*' */
1414 len = ptr-s + (ptr != strend ? 1 : 0);
1415 } else if (datumtype == 'A') {
1416 /* 'A' strips both nulls and spaces */
1417 char *ptr;
18bdf90a
TH
1418 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1419 for (ptr = s+len-1; ptr >= s; ptr--)
1420 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1421 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1422 if (ptr >= s) ptr += UTF8SKIP(ptr);
1423 else ptr++;
28be1210 1424 if (ptr > s+len)
18bdf90a
TH
1425 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1426 } else {
1427 for (ptr = s+len-1; ptr >= s; ptr--)
1428 if (*ptr != 0 && !isSPACE(*ptr)) break;
1429 ptr++;
1430 }
08ca2aa3
TH
1431 sv = newSVpvn(s, ptr-s);
1432 } else sv = newSVpvn(s, len);
1433
1434 if (utf8) {
1435 SvUTF8_on(sv);
1436 /* Undo any upgrade done due to need_utf8() */
f337b084 1437 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1438 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1439 }
1440 XPUSHs(sv_2mortal(sv));
08ca2aa3 1441 s += len;
a6ec74c1
JH
1442 break;
1443 case 'B':
08ca2aa3
TH
1444 case 'b': {
1445 char *str;
49704364 1446 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1447 len = (strend - s) * 8;
1448 if (checksum) {
1449 if (!PL_bitcount) {
08ca2aa3 1450 int bits;
a6ec74c1
JH
1451 Newz(601, PL_bitcount, 256, char);
1452 for (bits = 1; bits < 256; bits++) {
1453 if (bits & 1) PL_bitcount[bits]++;
1454 if (bits & 2) PL_bitcount[bits]++;
1455 if (bits & 4) PL_bitcount[bits]++;
1456 if (bits & 8) PL_bitcount[bits]++;
1457 if (bits & 16) PL_bitcount[bits]++;
1458 if (bits & 32) PL_bitcount[bits]++;
1459 if (bits & 64) PL_bitcount[bits]++;
1460 if (bits & 128) PL_bitcount[bits]++;
1461 }
1462 }
f337b084 1463 if (utf8)
08ca2aa3 1464 while (len >= 8 && s < strend) {
f337b084 1465 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1466 len -= 8;
1467 }
f337b084 1468 else
fc241834 1469 while (len >= 8) {
08ca2aa3 1470 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1471 len -= 8;
1472 }
08ca2aa3
TH
1473 if (len && s < strend) {
1474 U8 bits;
f337b084
TH
1475 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1476 if (datumtype == 'b')
a6ec74c1 1477 while (len-- > 0) {
92d41999 1478 if (bits & 1) cuv++;
a6ec74c1
JH
1479 bits >>= 1;
1480 }
f337b084 1481 else
a6ec74c1 1482 while (len-- > 0) {
08ca2aa3 1483 if (bits & 0x80) cuv++;
a6ec74c1
JH
1484 bits <<= 1;
1485 }
fc241834 1486 }
a6ec74c1
JH
1487 break;
1488 }
08ca2aa3
TH
1489
1490 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1491 SvPOK_on(sv);
1492 str = SvPVX(sv);
1493 if (datumtype == 'b') {
f337b084 1494 U8 bits = 0;
08ca2aa3
TH
1495 ai32 = len;
1496 for (len = 0; len < ai32; len++) {
1497 if (len & 7) bits >>= 1;
1498 else if (utf8) {
1499 if (s >= strend) break;
f337b084 1500 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1501 } else bits = *(U8 *) s++;
1502 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1503 }
08ca2aa3 1504 } else {
f337b084 1505 U8 bits = 0;
08ca2aa3
TH
1506 ai32 = len;
1507 for (len = 0; len < ai32; len++) {
1508 if (len & 7) bits <<= 1;
1509 else if (utf8) {
1510 if (s >= strend) break;
f337b084 1511 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1512 } else bits = *(U8 *) s++;
1513 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1514 }
1515 }
1516 *str = '\0';
08ca2aa3
TH
1517 SvCUR_set(sv, str - SvPVX(sv));
1518 XPUSHs(sv);
a6ec74c1 1519 break;
08ca2aa3 1520 }
a6ec74c1 1521 case 'H':
08ca2aa3
TH
1522 case 'h': {
1523 char *str;
fc241834 1524 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1525 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1526 len = (strend - s) * 2;
fc241834 1527 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1528 SvPOK_on(sv);
1529 str = SvPVX(sv);
1530 if (datumtype == 'h') {
f337b084 1531 U8 bits = 0;
fc241834
RGS
1532 ai32 = len;
1533 for (len = 0; len < ai32; len++) {
1534 if (len & 1) bits >>= 4;
1535 else if (utf8) {
1536 if (s >= strend) break;
f337b084 1537 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1538 } else bits = * (U8 *) s++;
a6ec74c1
JH
1539 *str++ = PL_hexdigit[bits & 15];
1540 }
08ca2aa3 1541 } else {
f337b084 1542 U8 bits = 0;
08ca2aa3
TH
1543 ai32 = len;
1544 for (len = 0; len < ai32; len++) {
1545 if (len & 1) bits <<= 4;
1546 else if (utf8) {
1547 if (s >= strend) break;
f337b084 1548 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1549 } else bits = *(U8 *) s++;
a6ec74c1
JH
1550 *str++ = PL_hexdigit[(bits >> 4) & 15];
1551 }
1552 }
1553 *str = '\0';
08ca2aa3
TH
1554 SvCUR_set(sv, str - SvPVX(sv));
1555 XPUSHs(sv);
a6ec74c1 1556 break;
08ca2aa3 1557 }
a6ec74c1 1558 case 'c':
73cb7263 1559 while (len-- > 0) {
f337b084 1560 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1561 if (aint >= 128) /* fake up signed chars */
1562 aint -= 256;
08ca2aa3 1563 if (!checksum)
ac7f3b1b 1564 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1565 else if (checksum > bits_in_uv)
1566 cdouble += (NV)aint;
1567 else
1568 cuv += aint;
a6ec74c1
JH
1569 }
1570 break;
1571 case 'C':
08ca2aa3
TH
1572 case 'W':
1573 W_checksum:
35bcd338 1574 if (len == 0) {
fc241834 1575 if (explicit_length && datumtype == 'C')
08ca2aa3 1576 /* Switch to "character" mode */
f337b084 1577 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1578 break;
1579 }
fc241834 1580 if (datumtype == 'C' ?
f337b084
TH
1581 (symptr->flags & FLAG_DO_UTF8) &&
1582 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1583 while (len-- > 0 && s < strend) {
1584 UV val;
1585 STRLEN retlen;
f337b084
TH
1586 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1587 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1588 if (retlen == (STRLEN) -1 || retlen == 0)
1589 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1590 s += retlen;
1591 if (!checksum)
1592 PUSHs(sv_2mortal(newSVuv((UV) val)));
1593 else if (checksum > bits_in_uv)
1594 cdouble += (NV) val;
d6d3e8bd 1595 else
08ca2aa3 1596 cuv += val;
fc241834 1597 }
08ca2aa3 1598 } else if (!checksum)
a6ec74c1 1599 while (len-- > 0) {
08ca2aa3
TH
1600 U8 ch = *(U8 *) s++;
1601 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1602 }
08ca2aa3
TH
1603 else if (checksum > bits_in_uv)
1604 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1605 else
1606 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1607 break;
1608 case 'U':
35bcd338 1609 if (len == 0) {
08ca2aa3
TH
1610 if (explicit_length) {
1611 /* Switch to "bytes in UTF-8" mode */
f337b084 1612 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1613 else
1614 /* Should be impossible due to the need_utf8() test */
1615 Perl_croak(aTHX_ "U0 mode on a byte string");
1616 }
35bcd338
JH
1617 break;
1618 }
08ca2aa3 1619 if (len > strend - s) len = strend - s;
fc241834 1620 if (!checksum) {
08ca2aa3
TH
1621 if (len && unpack_only_one) len = 1;
1622 EXTEND(SP, len);
1623 EXTEND_MORTAL(len);
fc241834 1624 }
08ca2aa3
TH
1625 while (len-- > 0 && s < strend) {
1626 STRLEN retlen;
1627 UV auv;
1628 if (utf8) {
1629 U8 result[UTF8_MAXLEN];
1630 char *ptr;
1631 STRLEN len;
1632 ptr = s;
1633 /* Bug: warns about bad utf8 even if we are short on bytes
1634 and will break out of the loop */
230e1fce
NC
1635 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1636 'U'))
08ca2aa3
TH
1637 break;
1638 len = UTF8SKIP(result);
fc241834 1639 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1640 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1641 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1642 s = ptr;
1643 } else {
1644 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1645 if (retlen == (STRLEN) -1 || retlen == 0)
1646 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1647 s += retlen;
1648 }
1649 if (!checksum)
1650 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1651 else if (checksum > bits_in_uv)
08ca2aa3 1652 cdouble += (NV) auv;
73cb7263 1653 else
08ca2aa3 1654 cuv += auv;
a6ec74c1
JH
1655 }
1656 break;
49704364
LW
1657 case 's' | TYPE_IS_SHRIEKING:
1658#if SHORTSIZE != SIZE16
73cb7263 1659 while (len-- > 0) {
08ca2aa3 1660 short ashort;
f337b084
TH
1661 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1662 DO_BO_UNPACK(ashort, s);
08ca2aa3 1663 if (!checksum)
ac7f3b1b 1664 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1665 else if (checksum > bits_in_uv)
1666 cdouble += (NV)ashort;
1667 else
1668 cuv += ashort;
49704364
LW
1669 }
1670 break;
1671#else
1672 /* Fallthrough! */
a6ec74c1 1673#endif
49704364 1674 case 's':
73cb7263 1675 while (len-- > 0) {
08ca2aa3
TH
1676 I16 ai16;
1677
1678#if U16SIZE > SIZE16
1679 ai16 = 0;
1680#endif
f337b084 1681 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1682 DO_BO_UNPACK(ai16, 16);
1109a392 1683#if U16SIZE > SIZE16
73cb7263
NC
1684 if (ai16 > 32767)
1685 ai16 -= 65536;
a6ec74c1 1686#endif
08ca2aa3 1687 if (!checksum)
ac7f3b1b 1688 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1689 else if (checksum > bits_in_uv)
1690 cdouble += (NV)ai16;
1691 else
1692 cuv += ai16;
a6ec74c1
JH
1693 }
1694 break;
49704364
LW
1695 case 'S' | TYPE_IS_SHRIEKING:
1696#if SHORTSIZE != SIZE16
73cb7263 1697 while (len-- > 0) {
08ca2aa3 1698 unsigned short aushort;
f337b084
TH
1699 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1700 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1701 if (!checksum)
1702 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV)aushort;
1705 else
1706 cuv += aushort;
49704364
LW
1707 }
1708 break;
1709#else
1710 /* Fallhrough! */
1711#endif
a6ec74c1
JH
1712 case 'v':
1713 case 'n':
1714 case 'S':
73cb7263 1715 while (len-- > 0) {
08ca2aa3
TH
1716 U16 au16;
1717#if U16SIZE > SIZE16
1718 au16 = 0;
1719#endif
f337b084 1720 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1721 DO_BO_UNPACK(au16, 16);
a6ec74c1 1722#ifdef HAS_NTOHS
73cb7263
NC
1723 if (datumtype == 'n')
1724 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1725#endif
1726#ifdef HAS_VTOHS
73cb7263
NC
1727 if (datumtype == 'v')
1728 au16 = vtohs(au16);
a6ec74c1 1729#endif
08ca2aa3
TH
1730 if (!checksum)
1731 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1732 else if (checksum > bits_in_uv)
f337b084 1733 cdouble += (NV) au16;
73cb7263
NC
1734 else
1735 cuv += au16;
a6ec74c1
JH
1736 }
1737 break;
7212898e 1738#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1739 case 'v' | TYPE_IS_SHRIEKING:
1740 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1741 while (len-- > 0) {
08ca2aa3
TH
1742 I16 ai16;
1743# if U16SIZE > SIZE16
1744 ai16 = 0;
1745# endif
f337b084 1746 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1747# ifdef HAS_NTOHS
73cb7263 1748 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1749 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1750# endif /* HAS_NTOHS */
1751# ifdef HAS_VTOHS
73cb7263 1752 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1753 ai16 = (I16) vtohs((U16) ai16);
1754# endif /* HAS_VTOHS */
1755 if (!checksum)
ac7f3b1b 1756 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1757 else if (checksum > bits_in_uv)
08ca2aa3 1758 cdouble += (NV) ai16;
73cb7263
NC
1759 else
1760 cuv += ai16;
068bd2e7
MHM
1761 }
1762 break;
08ca2aa3 1763#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1764 case 'i':
49704364 1765 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1766 while (len-- > 0) {
08ca2aa3 1767 int aint;
f337b084
TH
1768 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1769 DO_BO_UNPACK(aint, i);
08ca2aa3 1770 if (!checksum)
ac7f3b1b 1771 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1772 else if (checksum > bits_in_uv)
1773 cdouble += (NV)aint;
1774 else
1775 cuv += aint;
a6ec74c1
JH
1776 }
1777 break;
1778 case 'I':
49704364 1779 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1780 while (len-- > 0) {
08ca2aa3 1781 unsigned int auint;
f337b084
TH
1782 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1783 DO_BO_UNPACK(auint, i);
08ca2aa3 1784 if (!checksum)
ac7f3b1b 1785 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)auint;
1788 else
1789 cuv += auint;
a6ec74c1
JH
1790 }
1791 break;
92d41999 1792 case 'j':
73cb7263 1793 while (len-- > 0) {
08ca2aa3 1794 IV aiv;
f337b084 1795 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1796#if IVSIZE == INTSIZE
f337b084 1797 DO_BO_UNPACK(aiv, i);
1109a392 1798#elif IVSIZE == LONGSIZE
f337b084 1799 DO_BO_UNPACK(aiv, l);
1109a392 1800#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1801 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1802#else
1803 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1804#endif
08ca2aa3 1805 if (!checksum)
ac7f3b1b 1806 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1807 else if (checksum > bits_in_uv)
1808 cdouble += (NV)aiv;
1809 else
1810 cuv += aiv;
92d41999
JH
1811 }
1812 break;
1813 case 'J':
73cb7263 1814 while (len-- > 0) {
08ca2aa3 1815 UV auv;
f337b084 1816 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1817#if IVSIZE == INTSIZE
f337b084 1818 DO_BO_UNPACK(auv, i);
08ca2aa3 1819#elif IVSIZE == LONGSIZE
f337b084 1820 DO_BO_UNPACK(auv, l);
08ca2aa3 1821#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1822 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1823#else
1824 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1825#endif
08ca2aa3 1826 if (!checksum)
ac7f3b1b 1827 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV)auv;
1830 else
1831 cuv += auv;
92d41999
JH
1832 }
1833 break;
49704364
LW
1834 case 'l' | TYPE_IS_SHRIEKING:
1835#if LONGSIZE != SIZE32
73cb7263 1836 while (len-- > 0) {
08ca2aa3 1837 long along;
f337b084
TH
1838 SHIFT_VAR(utf8, s, strend, along, datumtype);
1839 DO_BO_UNPACK(along, l);
08ca2aa3 1840 if (!checksum)
ac7f3b1b 1841 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1842 else if (checksum > bits_in_uv)
1843 cdouble += (NV)along;
1844 else
1845 cuv += along;
49704364
LW
1846 }
1847 break;
1848#else
1849 /* Fallthrough! */
a6ec74c1 1850#endif
49704364 1851 case 'l':
73cb7263 1852 while (len-- > 0) {
08ca2aa3
TH
1853 I32 ai32;
1854#if U32SIZE > SIZE32
1855 ai32 = 0;
1856#endif
f337b084 1857 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1858 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1859#if U32SIZE > SIZE32
08ca2aa3 1860 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1861#endif
08ca2aa3 1862 if (!checksum)
ac7f3b1b 1863 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1864 else if (checksum > bits_in_uv)
1865 cdouble += (NV)ai32;
1866 else
1867 cuv += ai32;
a6ec74c1
JH
1868 }
1869 break;
49704364
LW
1870 case 'L' | TYPE_IS_SHRIEKING:
1871#if LONGSIZE != SIZE32
73cb7263 1872 while (len-- > 0) {
08ca2aa3 1873 unsigned long aulong;
f337b084
TH
1874 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1875 DO_BO_UNPACK(aulong, l);
08ca2aa3 1876 if (!checksum)
ac7f3b1b 1877 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1878 else if (checksum > bits_in_uv)
1879 cdouble += (NV)aulong;
1880 else
1881 cuv += aulong;
49704364
LW
1882 }
1883 break;
1884#else
1885 /* Fall through! */
1886#endif
a6ec74c1
JH
1887 case 'V':
1888 case 'N':
1889 case 'L':
73cb7263 1890 while (len-- > 0) {
08ca2aa3
TH
1891 U32 au32;
1892#if U32SIZE > SIZE32
1893 au32 = 0;
1894#endif
f337b084 1895 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1896 DO_BO_UNPACK(au32, 32);
a6ec74c1 1897#ifdef HAS_NTOHL
73cb7263
NC
1898 if (datumtype == 'N')
1899 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1900#endif
1901#ifdef HAS_VTOHL
73cb7263
NC
1902 if (datumtype == 'V')
1903 au32 = vtohl(au32);
a6ec74c1 1904#endif
08ca2aa3 1905 if (!checksum)
fc241834
RGS
1906 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1907 else if (checksum > bits_in_uv)
1908 cdouble += (NV)au32;
1909 else
1910 cuv += au32;
a6ec74c1
JH
1911 }
1912 break;
7212898e 1913#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1914 case 'V' | TYPE_IS_SHRIEKING:
1915 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1916 while (len-- > 0) {
08ca2aa3
TH
1917 I32 ai32;
1918# if U32SIZE > SIZE32
1919 ai32 = 0;
1920# endif
f337b084 1921 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1922# ifdef HAS_NTOHL
73cb7263
NC
1923 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1924 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1925# endif
1926# ifdef HAS_VTOHL
73cb7263
NC
1927 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1928 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1929# endif
1930 if (!checksum)
ac7f3b1b 1931 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)ai32;
1934 else
1935 cuv += ai32;
068bd2e7
MHM
1936 }
1937 break;
08ca2aa3 1938#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1939 case 'p':
a6ec74c1 1940 while (len-- > 0) {
08ca2aa3 1941 char *aptr;
f337b084 1942 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1943 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1944 /* newSVpv generates undef if aptr is NULL */
1945 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1946 }
1947 break;
1948 case 'w':
a6ec74c1
JH
1949 {
1950 UV auv = 0;
1951 U32 bytes = 0;
fc241834 1952
08ca2aa3
TH
1953 while (len > 0 && s < strend) {
1954 U8 ch;
f337b084 1955 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1956 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1957 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1958 if (ch < 0x80) {
a6ec74c1 1959 bytes = 0;
ac7f3b1b 1960 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1961 len--;
1962 auv = 0;
08ca2aa3 1963 continue;
a6ec74c1 1964 }
08ca2aa3 1965 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1966 char *t;
1967 STRLEN n_a;
1968
1969 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1970 while (s < strend) {
f337b084 1971 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1972 sv = mul128(sv, (U8)(ch & 0x7f));
1973 if (!(ch & 0x80)) {
a6ec74c1
JH
1974 bytes = 0;
1975 break;
1976 }
1977 }
1978 t = SvPV(sv, n_a);
1979 while (*t == '0')
1980 t++;
1981 sv_chop(sv, t);
1982 PUSHs(sv_2mortal(sv));
1983 len--;
1984 auv = 0;
1985 }
1986 }
1987 if ((s >= strend) && bytes)
49704364 1988 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1989 }
1990 break;
1991 case 'P':
49704364
LW
1992 if (symptr->howlen == e_star)
1993 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1994 EXTEND(SP, 1);
08ca2aa3
TH
1995 if (sizeof(char*) <= strend - s) {
1996 char *aptr;
f337b084 1997 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1998 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
1999 /* newSVpvn generates undef if aptr is NULL */
2000 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2001 }
a6ec74c1
JH
2002 break;
2003#ifdef HAS_QUAD
2004 case 'q':
73cb7263 2005 while (len-- > 0) {
08ca2aa3 2006 Quad_t aquad;
f337b084
TH
2007 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2008 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2009 if (!checksum)
2010 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2011 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2012 else if (checksum > bits_in_uv)
2013 cdouble += (NV)aquad;
2014 else
2015 cuv += aquad;
2016 }
a6ec74c1
JH
2017 break;
2018 case 'Q':
73cb7263 2019 while (len-- > 0) {
08ca2aa3 2020 Uquad_t auquad;
f337b084
TH
2021 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2022 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2023 if (!checksum)
2024 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2025 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2026 else if (checksum > bits_in_uv)
2027 cdouble += (NV)auquad;
2028 else
2029 cuv += auquad;
a6ec74c1
JH
2030 }
2031 break;
08ca2aa3 2032#endif /* HAS_QUAD */
a6ec74c1
JH
2033 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2034 case 'f':
73cb7263 2035 while (len-- > 0) {
08ca2aa3 2036 float afloat;
f337b084 2037 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2038 DO_BO_UNPACK_N(afloat, float);
2039 if (!checksum)
2040 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2041 else
73cb7263 2042 cdouble += afloat;
fc241834 2043 }
a6ec74c1
JH
2044 break;
2045 case 'd':
73cb7263 2046 while (len-- > 0) {
08ca2aa3 2047 double adouble;
f337b084 2048 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2049 DO_BO_UNPACK_N(adouble, double);
2050 if (!checksum)
2051 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2052 else
73cb7263 2053 cdouble += adouble;
fc241834 2054 }
a6ec74c1 2055 break;
92d41999 2056 case 'F':
73cb7263 2057 while (len-- > 0) {
08ca2aa3 2058 NV anv;
f337b084 2059 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2060 DO_BO_UNPACK_N(anv, NV);
2061 if (!checksum)
2062 PUSHs(sv_2mortal(newSVnv(anv)));
2063 else
73cb7263 2064 cdouble += anv;
fc241834 2065 }
92d41999
JH
2066 break;
2067#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2068 case 'D':
73cb7263 2069 while (len-- > 0) {
08ca2aa3 2070 long double aldouble;
f337b084 2071 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2072 DO_BO_UNPACK_N(aldouble, long double);
2073 if (!checksum)
2074 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2075 else
2076 cdouble += aldouble;
92d41999
JH
2077 }
2078 break;
2079#endif
a6ec74c1
JH
2080 case 'u':
2081 /* MKS:
2082 * Initialise the decode mapping. By using a table driven
2083 * algorithm, the code will be character-set independent
2084 * (and just as fast as doing character arithmetic)
2085 */
2086 if (PL_uudmap['M'] == 0) {
2087 int i;
2088
2089 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2090 PL_uudmap[(U8)PL_uuemap[i]] = i;
2091 /*
2092 * Because ' ' and '`' map to the same value,
2093 * we need to decode them both the same.
2094 */
2095 PL_uudmap[' '] = 0;
2096 }
08ca2aa3
TH
2097 {
2098 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2099 sv = sv_2mortal(NEWSV(42, l));
2100 if (l) SvPOK_on(sv);
2101 }
2102 if (utf8) {
2103 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2104 I32 a, b, c, d;
2105 char hunk[4];
2106
2107 hunk[3] = '\0';
2108 while (len > 0) {
2109 next_uni_uu(aTHX_ &s, strend, &a);
2110 next_uni_uu(aTHX_ &s, strend, &b);
2111 next_uni_uu(aTHX_ &s, strend, &c);
2112 next_uni_uu(aTHX_ &s, strend, &d);
2113 hunk[0] = (char)((a << 2) | (b >> 4));
2114 hunk[1] = (char)((b << 4) | (c >> 2));
2115 hunk[2] = (char)((c << 6) | d);
2116 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2117 len -= 3;
2118 }
2119 if (s < strend) {
2120 if (*s == '\n') s++;
2121 else {
2122 /* possible checksum byte */
2123 char *skip = s+UTF8SKIP(s);
2124 if (skip < strend && *skip == '\n') s = skip+1;
2125 }
2126 }
2127 }
2128 } else {
fc241834
RGS
2129 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2130 I32 a, b, c, d;
2131 char hunk[4];
a6ec74c1 2132
fc241834
RGS
2133 hunk[3] = '\0';
2134 len = PL_uudmap[*(U8*)s++] & 077;
2135 while (len > 0) {
2136 if (s < strend && ISUUCHAR(*s))
2137 a = PL_uudmap[*(U8*)s++] & 077;
2138 else
2139 a = 0;
2140 if (s < strend && ISUUCHAR(*s))
2141 b = PL_uudmap[*(U8*)s++] & 077;
2142 else
2143 b = 0;
2144 if (s < strend && ISUUCHAR(*s))
2145 c = PL_uudmap[*(U8*)s++] & 077;
2146 else
2147 c = 0;
2148 if (s < strend && ISUUCHAR(*s))
2149 d = PL_uudmap[*(U8*)s++] & 077;
2150 else
2151 d = 0;
2152 hunk[0] = (char)((a << 2) | (b >> 4));
2153 hunk[1] = (char)((b << 4) | (c >> 2));
2154 hunk[2] = (char)((c << 6) | d);
2155 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2156 len -= 3;
2157 }
2158 if (*s == '\n')
2159 s++;
2160 else /* possible checksum byte */
2161 if (s + 1 < strend && s[1] == '\n')
2162 s += 2;
a6ec74c1 2163 }
08ca2aa3
TH
2164 }
2165 XPUSHs(sv);
a6ec74c1
JH
2166 break;
2167 }
49704364 2168
a6ec74c1 2169 if (checksum) {
1109a392 2170 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2171 (checksum > bits_in_uv &&
08ca2aa3
TH
2172 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2173 NV trouble, anv;
a6ec74c1 2174
08ca2aa3 2175 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2176 while (checksum >= 16) {
2177 checksum -= 16;
08ca2aa3 2178 anv *= 65536.0;
a6ec74c1 2179 }
a6ec74c1 2180 while (cdouble < 0.0)
08ca2aa3
TH
2181 cdouble += anv;
2182 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2183 sv = newSVnv(cdouble);
a6ec74c1
JH
2184 }
2185 else {
fa8ec7c1
NC
2186 if (checksum < bits_in_uv) {
2187 UV mask = ((UV)1 << checksum) - 1;
92d41999 2188 cuv &= mask;
a6ec74c1 2189 }
c4c5f44a 2190 sv = newSVuv(cuv);
a6ec74c1
JH
2191 }
2192 XPUSHs(sv_2mortal(sv));
2193 checksum = 0;
2194 }
fc241834 2195
49704364
LW
2196 if (symptr->flags & FLAG_SLASH){
2197 if (SP - PL_stack_base - start_sp_offset <= 0)
2198 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2199 if( next_symbol(symptr) ){
2200 if( symptr->howlen == e_number )
2201 Perl_croak(aTHX_ "Count after length/code in unpack" );
2202 if( beyond ){
2203 /* ...end of char buffer then no decent length available */
2204 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2205 } else {
2206 /* take top of stack (hope it's numeric) */
2207 len = POPi;
2208 if( len < 0 )
2209 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2210 }
2211 } else {
2212 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2213 }
2214 datumtype = symptr->code;
21c16052 2215 explicit_length = FALSE;
49704364
LW
2216 goto redo_switch;
2217 }
a6ec74c1 2218 }
49704364 2219
18529408
IZ
2220 if (new_s)
2221 *new_s = s;
2222 PUTBACK;
2223 return SP - PL_stack_base - start_sp_offset;
2224}
2225
2226PP(pp_unpack)
2227{
2228 dSP;
bab9c0ac 2229 dPOPPOPssrl;
18529408
IZ
2230 I32 gimme = GIMME_V;
2231 STRLEN llen;
2232 STRLEN rlen;
08ca2aa3
TH
2233 char *pat = SvPV(left, llen);
2234 char *s = SvPV(right, rlen);
18529408 2235 char *strend = s + rlen;
08ca2aa3
TH
2236 char *patend = pat + llen;
2237 I32 cnt;
18529408
IZ
2238
2239 PUTBACK;
7accc089 2240 cnt = unpackstring(pat, patend, s, strend,
49704364 2241 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2242 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2243
18529408
IZ
2244 SPAGAIN;
2245 if ( !cnt && gimme == G_SCALAR )
2246 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2247 RETURN;
2248}
2249
f337b084
TH
2250STATIC U8 *
2251doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2252{
f337b084 2253 *h++ = PL_uuemap[len];
a6ec74c1 2254 while (len > 2) {
f337b084
TH
2255 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2256 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2257 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2258 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2259 s += 3;
2260 len -= 3;
2261 }
2262 if (len > 0) {
2263 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2267 *h++ = PL_uuemap[0];
a6ec74c1 2268 }
f337b084
TH
2269 *h++ = '\n';
2270 return h;
a6ec74c1
JH
2271}
2272
2273STATIC SV *
2274S_is_an_int(pTHX_ char *s, STRLEN l)
2275{
2276 STRLEN n_a;
2277 SV *result = newSVpvn(s, l);
2278 char *result_c = SvPV(result, n_a); /* convenience */
2279 char *out = result_c;
2280 bool skip = 1;
2281 bool ignore = 0;
2282
2283 while (*s) {
2284 switch (*s) {
2285 case ' ':
2286 break;
2287 case '+':
2288 if (!skip) {
2289 SvREFCNT_dec(result);
2290 return (NULL);
2291 }
2292 break;
2293 case '0':
2294 case '1':
2295 case '2':
2296 case '3':
2297 case '4':
2298 case '5':
2299 case '6':
2300 case '7':
2301 case '8':
2302 case '9':
2303 skip = 0;
2304 if (!ignore) {
2305 *(out++) = *s;
2306 }
2307 break;
2308 case '.':
2309 ignore = 1;
2310 break;
2311 default:
2312 SvREFCNT_dec(result);
2313 return (NULL);
2314 }
2315 s++;
2316 }
2317 *(out++) = '\0';
2318 SvCUR_set(result, out - result_c);
2319 return (result);
2320}
2321
2322/* pnum must be '\0' terminated */
2323STATIC int
2324S_div128(pTHX_ SV *pnum, bool *done)
2325{
2326 STRLEN len;
2327 char *s = SvPV(pnum, len);
2328 int m = 0;
2329 int r = 0;
2330 char *t = s;
2331
2332 *done = 1;
2333 while (*t) {
2334 int i;
2335
2336 i = m * 10 + (*t - '0');
2337 m = i & 0x7F;
2338 r = (i >> 7); /* r < 10 */
2339 if (r) {
2340 *done = 0;
2341 }
2342 *(t++) = '0' + r;
2343 }
2344 *(t++) = '\0';
2345 SvCUR_set(pnum, (STRLEN) (t - s));
2346 return (m);
2347}
2348
49704364 2349
a6ec74c1 2350
18529408
IZ
2351/*
2352=for apidoc pack_cat
2353
7accc089
JH
2354The engine implementing pack() Perl function. Note: parameters next_in_list and
2355flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2356
2357=cut */
2358
49704364 2359
18529408
IZ
2360void
2361Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2362{
49704364
LW
2363 tempsym_t sym = { 0 };
2364 sym.patptr = pat;
2365 sym.patend = patend;
7accc089
JH
2366 sym.flags = FLAG_PACK;
2367
2368 (void)pack_rec( cat, &sym, beglist, endlist );
2369}
2370
2371
2372/*
2373=for apidoc packlist
2374
2375The engine implementing pack() Perl function.
2376
2377=cut */
2378
2379
2380void
2381Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2382{
f337b084 2383 STRLEN no_len;
7accc089 2384 tempsym_t sym = { 0 };
f337b084 2385
7accc089
JH
2386 sym.patptr = pat;
2387 sym.patend = patend;
2388 sym.flags = FLAG_PACK;
49704364 2389
f337b084
TH
2390 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2391 Also make sure any UTF8 flag is loaded */
2392 SvPV_force(cat, no_len);
2393 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2394
49704364
LW
2395 (void)pack_rec( cat, &sym, beglist, endlist );
2396}
2397
f337b084
TH
2398/* like sv_utf8_upgrade, but also repoint the group start markers */
2399STATIC void
2400marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2401 STRLEN len;
2402 tempsym_t *group;
2403 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2404
2405 if (SvUTF8(sv)) return;
2406
2407 from_start = SvPVX(sv);
2408 from_end = from_start + SvCUR(sv);
2409 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2410 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2411 if (from_ptr == from_end) {
2412 /* Simple case: no character needs to be changed */
2413 SvUTF8_on(sv);
2414 return;
2415 }
2416
3473cf63 2417 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2418 New('U', to_start, len, char);
2419 Copy(from_start, to_start, from_ptr-from_start, char);
2420 to_ptr = to_start + (from_ptr-from_start);
2421
2422 New('U', marks, sym_ptr->level+2, char *);
2423 for (group=sym_ptr; group; group = group->previous)
2424 marks[group->level] = from_start + group->strbeg;
2425 marks[sym_ptr->level+1] = from_end+1;
2426 for (m = marks; *m < from_ptr; m++)
2427 *m = to_start + (*m-from_start);
2428
2429 for (;from_ptr < from_end; from_ptr++) {
2430 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2431 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2432 }
2433 *to_ptr = 0;
2434
2435 while (*m == from_ptr) *m++ = to_ptr;
2436 if (m != marks + sym_ptr->level+1) {
2437 Safefree(marks);
2438 Safefree(to_start);
2439 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2440 }
2441 for (group=sym_ptr; group; group = group->previous)
2442 group->strbeg = marks[group->level] - to_start;
2443 Safefree(marks);
2444
2445 if (SvOOK(sv)) {
2446 if (SvIVX(sv)) {
b162af07 2447 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2448 from_start -= SvIVX(sv);
2449 SvIV_set(sv, 0);
2450 }
2451 SvFLAGS(sv) &= ~SVf_OOK;
2452 }
2453 if (SvLEN(sv) != 0)
2454 Safefree(from_start);
f880fe2f 2455 SvPV_set(sv, to_start);
b162af07
SP
2456 SvCUR_set(sv, to_ptr - to_start);
2457 SvLEN_set(sv, len);
f337b084
TH
2458 SvUTF8_on(sv);
2459}
2460
2461/* Exponential string grower. Makes string extension effectively O(n)
2462 needed says how many extra bytes we need (not counting the final '\0')
2463 Only grows the string if there is an actual lack of space
2464*/
2465STATIC char *
2466sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2467 STRLEN cur = SvCUR(sv);
2468 STRLEN len = SvLEN(sv);
2469 STRLEN extend;
2470 if (len - cur > needed) return SvPVX(sv);
2471 extend = needed > len ? needed : len;
2472 return SvGROW(sv, len+extend+1);
2473}
49704364
LW
2474
2475STATIC
2476SV **
f337b084 2477S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2478{
49704364 2479 tempsym_t lookahead;
f337b084
TH
2480 I32 items = endlist - beglist;
2481 bool found = next_symbol(symptr);
2482 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2483
2484 if (symptr->level == 0 && found && symptr->code == 'U') {
2485 marked_upgrade(aTHX_ cat, symptr);
2486 symptr->flags |= FLAG_DO_UTF8;
2487 utf8 = 0;
49704364 2488 }
f337b084 2489 symptr->strbeg = SvCUR(cat);
49704364
LW
2490
2491 while (found) {
f337b084
TH
2492 SV *fromstr;
2493 STRLEN fromlen;
2494 I32 len;
a6ec74c1 2495 SV *lengthcode = Nullsv;
49704364 2496 I32 datumtype = symptr->code;
f337b084
TH
2497 howlen_t howlen = symptr->howlen;
2498 char *start = SvPVX(cat);
2499 char *cur = start + SvCUR(cat);
49704364 2500
f337b084
TH
2501#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2502
2503 switch (howlen) {
fc241834 2504 case e_star:
f337b084
TH
2505 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2506 0 : items;
2507 break;
2508 default:
2509 /* e_no_len and e_number */
2510 len = symptr->length;
49704364
LW
2511 break;
2512 }
2513
f337b084 2514 if (len) {
a7a3cfaa 2515 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2516
a7a3cfaa
TH
2517 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2518 /* We can process this letter. */
2519 STRLEN size = props & PACK_SIZE_MASK;
2520 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2521 }
f337b084
TH
2522 }
2523
49704364
LW
2524 /* Look ahead for next symbol. Do we have code/code? */
2525 lookahead = *symptr;
2526 found = next_symbol(&lookahead);
246f24af
TH
2527 if (symptr->flags & FLAG_SLASH) {
2528 IV count;
f337b084 2529 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2530 if (strchr("aAZ", lookahead.code)) {
2531 if (lookahead.howlen == e_number) count = lookahead.length;
2532 else {
2533 if (items > 0)
2534 count = DO_UTF8(*beglist) ?
2535 sv_len_utf8(*beglist) : sv_len(*beglist);
2536 else count = 0;
2537 if (lookahead.code == 'Z') count++;
2538 }
2539 } else {
2540 if (lookahead.howlen == e_number && lookahead.length < items)
2541 count = lookahead.length;
2542 else count = items;
2543 }
2544 lookahead.howlen = e_number;
2545 lookahead.length = count;
2546 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2547 }
49704364 2548
fc241834
RGS
2549 /* Code inside the switch must take care to properly update
2550 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2551 doesn't simply leave using break */
1109a392 2552 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2553 default:
f337b084
TH
2554 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2555 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2556 case '%':
49704364 2557 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2558 {
2559 char *from;
2560#ifdef PERL_PACK_CAN_SHRIEKSIGN
2561 case '.' | TYPE_IS_SHRIEKING:
2562#endif
2563 case '.':
2564 if (howlen == e_star) from = start;
2565 else if (len == 0) from = cur;
2566 else {
2567 tempsym_t *group = symptr;
2568
2569 while (--len && group) group = group->previous;
2570 from = group ? start + group->strbeg : start;
2571 }
2572 fromstr = NEXTFROM;
2573 len = SvIV(fromstr);
2574 goto resize;
2575#ifdef PERL_PACK_CAN_SHRIEKSIGN
2576 case '@' | TYPE_IS_SHRIEKING:
2577#endif
a6ec74c1 2578 case '@':
28be1210
TH
2579 from = start + symptr->strbeg;
2580 resize:
2581#ifdef PERL_PACK_CAN_SHRIEKSIGN
2582 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2583#else /* PERL_PACK_CAN_SHRIEKSIGN */
2584 if (utf8)
2585#endif
2586 if (len >= 0) {
2587 while (len && from < cur) {
2588 from += UTF8SKIP(from);
2589 len--;
2590 }
2591 if (from > cur)
2592 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2593 if (len) {
2594 /* Here we know from == cur */
2595 grow:
2596 GROWING(0, cat, start, cur, len);
2597 Zero(cur, len, char);
2598 cur += len;
2599 } else if (from < cur) {
2600 len = cur - from;
2601 goto shrink;
2602 } else goto no_change;
2603 } else {
2604 cur = from;
2605 len = -len;
2606 goto utf8_shrink;
f337b084 2607 }
28be1210
TH
2608 else {
2609 len -= cur - from;
f337b084 2610 if (len > 0) goto grow;
28be1210 2611 if (len == 0) goto no_change;
fc241834 2612 len = -len;
28be1210 2613 goto shrink;
f337b084 2614 }
a6ec74c1 2615 break;
28be1210 2616 }
fc241834 2617 case '(': {
49704364 2618 tempsym_t savsym = *symptr;
66c611c5
MHM
2619 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2620 symptr->flags |= group_modifiers;
49704364
LW
2621 symptr->patend = savsym.grpend;
2622 symptr->level++;
f337b084 2623 symptr->previous = &lookahead;
18529408 2624 while (len--) {
f337b084
TH
2625 U32 was_utf8;
2626 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2627 else symptr->flags &= ~FLAG_PARSE_UTF8;
2628 was_utf8 = SvUTF8(cat);
49704364 2629 symptr->patptr = savsym.grpbeg;
f337b084
TH
2630 beglist = pack_rec(cat, symptr, beglist, endlist);
2631 if (SvUTF8(cat) != was_utf8)
2632 /* This had better be an upgrade while in utf8==0 mode */
2633 utf8 = 1;
2634
49704364 2635 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2636 break; /* No way to continue */
2637 }
f337b084
TH
2638 lookahead.flags = symptr->flags & ~group_modifiers;
2639 goto no_change;
18529408 2640 }
62f95557
IZ
2641 case 'X' | TYPE_IS_SHRIEKING:
2642 if (!len) /* Avoid division by 0 */
2643 len = 1;
f337b084
TH
2644 if (utf8) {
2645 char *hop, *last;
2646 I32 l = len;
2647 hop = last = start;
2648 while (hop < cur) {
2649 hop += UTF8SKIP(hop);
2650 if (--l == 0) {
2651 last = hop;
2652 l = len;
2653 }
2654 }
2655 if (last > cur)
2656 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2657 cur = last;
2658 break;
2659 }
2660 len = (cur-start) % len;
62f95557 2661 /* FALL THROUGH */
a6ec74c1 2662 case 'X':
f337b084
TH
2663 if (utf8) {
2664 if (len < 1) goto no_change;
28be1210 2665 utf8_shrink:
f337b084
TH
2666 while (len > 0) {
2667 if (cur <= start)
28be1210
TH
2668 Perl_croak(aTHX_ "'%c' outside of string in pack",
2669 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2670 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2671 if (cur <= start)
28be1210
TH
2672 Perl_croak(aTHX_ "'%c' outside of string in pack",
2673 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2674 }
2675 len--;
2676 }
2677 } else {
fc241834 2678 shrink:
f337b084 2679 if (cur - start < len)
28be1210
TH
2680 Perl_croak(aTHX_ "'%c' outside of string in pack",
2681 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2682 cur -= len;
2683 }
2684 if (cur < start+symptr->strbeg) {
2685 /* Make sure group starts don't point into the void */
2686 tempsym_t *group;
2687 STRLEN length = cur-start;
2688 for (group = symptr;
2689 group && length < group->strbeg;
2690 group = group->previous) group->strbeg = length;
2691 lookahead.strbeg = length;
2692 }
a6ec74c1 2693 break;
fc241834
RGS
2694 case 'x' | TYPE_IS_SHRIEKING: {
2695 I32 ai32;
62f95557
IZ
2696 if (!len) /* Avoid division by 0 */
2697 len = 1;
230e1fce 2698 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2699 else ai32 = (cur - start) % len;
2700 if (ai32 == 0) goto no_change;
2701 len -= ai32;
2702 }
2703 /* FALL THROUGH */
a6ec74c1 2704 case 'x':
f337b084 2705 goto grow;
a6ec74c1
JH
2706 case 'A':
2707 case 'Z':
f337b084
TH
2708 case 'a': {
2709 char *aptr;
2710
a6ec74c1
JH
2711 fromstr = NEXTFROM;
2712 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2713 if (DO_UTF8(fromstr)) {
2714 char *end, *s;
2715
2716 if (!utf8 && !SvUTF8(cat)) {
2717 marked_upgrade(aTHX_ cat, symptr);
2718 lookahead.flags |= FLAG_DO_UTF8;
2719 lookahead.strbeg = symptr->strbeg;
2720 utf8 = 1;
2721 start = SvPVX(cat);
2722 cur = start + SvCUR(cat);
2723 }
fc241834 2724 if (howlen == e_star) {
f337b084
TH
2725 if (utf8) goto string_copy;
2726 len = fromlen+1;
2727 }
2728 s = aptr;
2729 end = aptr + fromlen;
2730 fromlen = datumtype == 'Z' ? len-1 : len;
2731 while ((I32) fromlen > 0 && s < end) {
2732 s += UTF8SKIP(s);
2733 fromlen--;
2734 }
2735 if (s > end)
2736 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2737 if (utf8) {
fc241834 2738 len = fromlen;
f337b084
TH
2739 if (datumtype == 'Z') len++;
2740 fromlen = s-aptr;
2741 len += fromlen;
fc241834 2742
f337b084 2743 goto string_copy;
fc241834 2744 }
f337b084
TH
2745 fromlen = len - fromlen;
2746 if (datumtype == 'Z') fromlen--;
2747 if (howlen == e_star) {
2748 len = fromlen;
2749 if (datumtype == 'Z') len++;
fc241834 2750 }
f337b084 2751 GROWING(0, cat, start, cur, len);
fc241834 2752 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2753 datumtype | TYPE_IS_PACK))
2754 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2755 cur += fromlen;
a6ec74c1 2756 len -= fromlen;
f337b084
TH
2757 } else if (utf8) {
2758 if (howlen == e_star) {
2759 len = fromlen;
2760 if (datumtype == 'Z') len++;
a6ec74c1 2761 }
f337b084
TH
2762 if (len <= (I32) fromlen) {
2763 fromlen = len;
2764 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2765 }
fc241834 2766 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2767 upgrade, so:
2768 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2769 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2770 len -= fromlen;
2771 while (fromlen > 0) {
230e1fce 2772 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2773 aptr++;
2774 fromlen--;
fc241834 2775 }
f337b084
TH
2776 } else {
2777 string_copy:
2778 if (howlen == e_star) {
2779 len = fromlen;
2780 if (datumtype == 'Z') len++;
2781 }
2782 if (len <= (I32) fromlen) {
2783 fromlen = len;
2784 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2785 }
f337b084
TH
2786 GROWING(0, cat, start, cur, len);
2787 Copy(aptr, cur, fromlen, char);
2788 cur += fromlen;
2789 len -= fromlen;
a6ec74c1 2790 }
f337b084
TH
2791 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2792 cur += len;
a6ec74c1 2793 break;
f337b084 2794 }
a6ec74c1 2795 case 'B':
f337b084
TH
2796 case 'b': {
2797 char *str, *end;
2798 I32 l, field_len;
2799 U8 bits;
2800 bool utf8_source;
2801 U32 utf8_flags;
a6ec74c1 2802
fc241834
RGS
2803 fromstr = NEXTFROM;
2804 str = SvPV(fromstr, fromlen);
f337b084
TH
2805 end = str + fromlen;
2806 if (DO_UTF8(fromstr)) {
2807 utf8_source = TRUE;
2808 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2809 } else {
2810 utf8_source = FALSE;
2811 utf8_flags = 0; /* Unused, but keep compilers happy */
2812 }
2813 if (howlen == e_star) len = fromlen;
2814 field_len = (len+7)/8;
2815 GROWING(utf8, cat, start, cur, field_len);
2816 if (len > (I32)fromlen) len = fromlen;
2817 bits = 0;
2818 l = 0;
2819 if (datumtype == 'B')
2820 while (l++ < len) {
2821 if (utf8_source) {
2822 UV val;
2823 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2824 bits |= val & 1;
2825 } else bits |= *str++ & 1;
2826 if (l & 7) bits <<= 1;
fc241834 2827 else {
f337b084
TH
2828 PUSH_BYTE(utf8, cur, bits);
2829 bits = 0;
a6ec74c1
JH
2830 }
2831 }
f337b084
TH
2832 else
2833 /* datumtype == 'b' */
2834 while (l++ < len) {
2835 if (utf8_source) {
2836 UV val;
2837 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2838 if (val & 1) bits |= 0x80;
2839 } else if (*str++ & 1)
2840 bits |= 0x80;
2841 if (l & 7) bits >>= 1;
fc241834 2842 else {
f337b084
TH
2843 PUSH_BYTE(utf8, cur, bits);
2844 bits = 0;
a6ec74c1
JH
2845 }
2846 }
f337b084
TH
2847 l--;
2848 if (l & 7) {
fc241834 2849 if (datumtype == 'B')
f337b084 2850 bits <<= 7 - (l & 7);
fc241834 2851 else
f337b084
TH
2852 bits >>= 7 - (l & 7);
2853 PUSH_BYTE(utf8, cur, bits);
2854 l += 7;
a6ec74c1 2855 }
f337b084
TH
2856 /* Determine how many chars are left in the requested field */
2857 l /= 8;
2858 if (howlen == e_star) field_len = 0;
2859 else field_len -= l;
2860 Zero(cur, field_len, char);
2861 cur += field_len;
a6ec74c1 2862 break;
f337b084 2863 }
a6ec74c1 2864 case 'H':
f337b084
TH
2865 case 'h': {
2866 char *str, *end;
2867 I32 l, field_len;
2868 U8 bits;
2869 bool utf8_source;
2870 U32 utf8_flags;
a6ec74c1 2871
fc241834
RGS
2872 fromstr = NEXTFROM;
2873 str = SvPV(fromstr, fromlen);
f337b084
TH
2874 end = str + fromlen;
2875 if (DO_UTF8(fromstr)) {
2876 utf8_source = TRUE;
2877 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2878 } else {
2879 utf8_source = FALSE;
2880 utf8_flags = 0; /* Unused, but keep compilers happy */
2881 }
2882 if (howlen == e_star) len = fromlen;
2883 field_len = (len+1)/2;
2884 GROWING(utf8, cat, start, cur, field_len);
2885 if (!utf8 && len > (I32)fromlen) len = fromlen;
2886 bits = 0;
2887 l = 0;
2888 if (datumtype == 'H')
2889 while (l++ < len) {
2890 if (utf8_source) {
2891 UV val;
2892 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2893 if (val < 256 && isALPHA(val))
2894 bits |= (val + 9) & 0xf;
a6ec74c1 2895 else
f337b084
TH
2896 bits |= val & 0xf;
2897 } else if (isALPHA(*str))
2898 bits |= (*str++ + 9) & 0xf;
2899 else
2900 bits |= *str++ & 0xf;
2901 if (l & 1) bits <<= 4;
fc241834 2902 else {
f337b084
TH
2903 PUSH_BYTE(utf8, cur, bits);
2904 bits = 0;
a6ec74c1
JH
2905 }
2906 }
f337b084
TH
2907 else
2908 while (l++ < len) {
2909 if (utf8_source) {
2910 UV val;
2911 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2912 if (val < 256 && isALPHA(val))
2913 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2914 else
f337b084
TH
2915 bits |= (val & 0xf) << 4;
2916 } else if (isALPHA(*str))
2917 bits |= ((*str++ + 9) & 0xf) << 4;
2918 else
2919 bits |= (*str++ & 0xf) << 4;
2920 if (l & 1) bits >>= 4;
fc241834 2921 else {
f337b084
TH
2922 PUSH_BYTE(utf8, cur, bits);
2923 bits = 0;
a6ec74c1 2924 }
fc241834 2925 }
f337b084
TH
2926 l--;
2927 if (l & 1) {
2928 PUSH_BYTE(utf8, cur, bits);
2929 l++;
2930 }
2931 /* Determine how many chars are left in the requested field */
2932 l /= 2;
2933 if (howlen == e_star) field_len = 0;
2934 else field_len -= l;
2935 Zero(cur, field_len, char);
2936 cur += field_len;
2937 break;
fc241834
RGS
2938 }
2939 case 'c':
f337b084
TH
2940 while (len-- > 0) {
2941 IV aiv;
2942 fromstr = NEXTFROM;
2943 aiv = SvIV(fromstr);
2944 if ((-128 > aiv || aiv > 127) &&
2945 ckWARN(WARN_PACK))
2946 Perl_warner(aTHX_ packWARN(WARN_PACK),
2947 "Character in 'c' format wrapped in pack");
2948 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2949 }
2950 break;
2951 case 'C':
f337b084
TH
2952 if (len == 0) {
2953 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2954 break;
2955 }
2956 GROWING(0, cat, start, cur, len);
a6ec74c1 2957 while (len-- > 0) {
f337b084 2958 IV aiv;
a6ec74c1 2959 fromstr = NEXTFROM;
f337b084
TH
2960 aiv = SvIV(fromstr);
2961 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2962 ckWARN(WARN_PACK))
2963 Perl_warner(aTHX_ packWARN(WARN_PACK),
2964 "Character in 'C' format wrapped in pack");
f337b084
TH
2965 *cur++ = aiv & 0xff;
2966 }
fc241834
RGS
2967 break;
2968 case 'W': {
2969 char *end;
2970 U8 in_bytes = IN_BYTES;
2971
2972 end = start+SvLEN(cat)-1;
2973 if (utf8) end -= UTF8_MAXLEN-1;
2974 while (len-- > 0) {
2975 UV auv;
2976 fromstr = NEXTFROM;
2977 auv = SvUV(fromstr);
2978 if (in_bytes) auv = auv % 0x100;
2979 if (utf8) {
2980 W_utf8:
2981 if (cur > end) {
2982 *cur = '\0';
b162af07 2983 SvCUR_set(cat, cur - start);
fc241834
RGS
2984
2985 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2986 end = start+SvLEN(cat)-UTF8_MAXLEN;
2987 }
230e1fce
NC
2988 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2989 NATIVE_TO_UNI(auv),
2990 ckWARN(WARN_UTF8) ?
2991 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2992 } else {
2993 if (auv >= 0x100) {
2994 if (!SvUTF8(cat)) {
2995 *cur = '\0';
b162af07 2996 SvCUR_set(cat, cur - start);
fc241834
RGS
2997 marked_upgrade(aTHX_ cat, symptr);
2998 lookahead.flags |= FLAG_DO_UTF8;
2999 lookahead.strbeg = symptr->strbeg;
3000 utf8 = 1;
3001 start = SvPVX(cat);
3002 cur = start + SvCUR(cat);
3003 end = start+SvLEN(cat)-UTF8_MAXLEN;
3004 goto W_utf8;
3005 }
3006 if (ckWARN(WARN_PACK))
3007 Perl_warner(aTHX_ packWARN(WARN_PACK),
3008 "Character in 'W' format wrapped in pack");
3009 auv &= 0xff;
3010 }
3011 if (cur >= end) {
3012 *cur = '\0';
b162af07 3013 SvCUR_set(cat, cur - start);
fc241834
RGS
3014 GROWING(0, cat, start, cur, len+1);
3015 end = start+SvLEN(cat)-1;
3016 }
fe2774ed 3017 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3018 }
3019 }
3020 break;
fc241834
RGS
3021 }
3022 case 'U': {
3023 char *end;
3024
3025 if (len == 0) {
3026 if (!(symptr->flags & FLAG_DO_UTF8)) {
3027 marked_upgrade(aTHX_ cat, symptr);
3028 lookahead.flags |= FLAG_DO_UTF8;
3029 lookahead.strbeg = symptr->strbeg;
3030 }
3031 utf8 = 0;
3032 goto no_change;
3033 }
3034
3035 end = start+SvLEN(cat);
3036 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3037 while (len-- > 0) {
fc241834 3038 UV auv;
a6ec74c1 3039 fromstr = NEXTFROM;
fc241834
RGS
3040 auv = SvUV(fromstr);
3041 if (utf8) {
230e1fce 3042 U8 buffer[UTF8_MAXLEN], *endb;
fc241834
RGS
3043 endb = uvuni_to_utf8_flags(buffer, auv,
3044 ckWARN(WARN_UTF8) ?
3045 0 : UNICODE_ALLOW_ANY);
3046 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3047 *cur = '\0';
b162af07 3048 SvCUR_set(cat, cur - start);
fc241834
RGS
3049 GROWING(0, cat, start, cur,
3050 len+(endb-buffer)*UTF8_EXPAND);
3051 end = start+SvLEN(cat);
3052 }
3053 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3054 } else {
3055 if (cur >= end) {
3056 *cur = '\0';
b162af07 3057 SvCUR_set(cat, cur - start);
fc241834
RGS
3058 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3059 end = start+SvLEN(cat)-UTF8_MAXLEN;
3060 }
230e1fce
NC
3061 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3062 ckWARN(WARN_UTF8) ?
3063 0 : UNICODE_ALLOW_ANY);
fc241834 3064 }
a6ec74c1 3065 }
a6ec74c1 3066 break;
fc241834 3067 }
a6ec74c1
JH
3068 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3069 case 'f':
a6ec74c1 3070 while (len-- > 0) {
f337b084
TH
3071 float afloat;
3072 NV anv;
a6ec74c1 3073 fromstr = NEXTFROM;
f337b084 3074 anv = SvNV(fromstr);
5cdb9e01 3075#ifdef __VOS__
f337b084 3076 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3077 during conversion from double to float into infinity, so we
3078 do it by hand. This code should either be generalized for
3079 any OS that needs it, or removed if and when VOS implements
3080 posix-976 (suggestion to support mapping to infinity).
3081 Paul.Green@stratus.com 02-04-02. */
f337b084 3082 if (anv > FLT_MAX)
fc241834 3083 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3084 else if (anv < -FLT_MAX)
fc241834 3085 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3086 else afloat = (float) anv;
3087#else /* __VOS__ */
baf3cf9c 3088# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3089 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3090 * on Alpha; fake it if we don't have them.
3091 */
f337b084 3092 if (anv > FLT_MAX)
fc241834 3093 afloat = FLT_MAX;
f337b084 3094 else if (anv < -FLT_MAX)
fc241834 3095 afloat = -FLT_MAX;
f337b084 3096 else afloat = (float)anv;
baf3cf9c 3097# else
f337b084 3098 afloat = (float)anv;
baf3cf9c 3099# endif
f337b084 3100#endif /* __VOS__ */
1109a392 3101 DO_BO_PACK_N(afloat, float);
f337b084 3102 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3103 }
3104 break;
3105 case 'd':
a6ec74c1 3106 while (len-- > 0) {
f337b084
TH
3107 double adouble;
3108 NV anv;
a6ec74c1 3109 fromstr = NEXTFROM;
f337b084 3110 anv = SvNV(fromstr);
5cdb9e01 3111#ifdef __VOS__
f337b084 3112 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3113 during conversion from long double to double into infinity,
3114 so we do it by hand. This code should either be generalized
3115 for any OS that needs it, or removed if and when VOS
3116 implements posix-976 (suggestion to support mapping to
3117 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3118 if (anv > DBL_MAX)
fc241834 3119 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3120 else if (anv < -DBL_MAX)
fc241834 3121 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3122 else adouble = (double) anv;
3123#else /* __VOS__ */
baf3cf9c 3124# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3125 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3126 * on Alpha; fake it if we don't have them.
3127 */
f337b084 3128 if (anv > DBL_MAX)
fc241834 3129 adouble = DBL_MAX;
f337b084 3130 else if (anv < -DBL_MAX)
fc241834 3131 adouble = -DBL_MAX;
f337b084 3132 else adouble = (double)anv;
baf3cf9c 3133# else
f337b084 3134 adouble = (double)anv;
baf3cf9c 3135# endif
f337b084 3136#endif /* __VOS__ */
1109a392 3137 DO_BO_PACK_N(adouble, double);
f337b084 3138 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3139 }
3140 break;
fc241834
RGS
3141 case 'F': {
3142 NV anv;
1109a392 3143 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3144 while (len-- > 0) {
3145 fromstr = NEXTFROM;
3146 anv = SvNV(fromstr);
1109a392 3147 DO_BO_PACK_N(anv, NV);
fc241834 3148 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3149 }
3150 break;
fc241834 3151 }
92d41999 3152#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3153 case 'D': {
3154 long double aldouble;
1109a392
MHM
3155 /* long doubles can have unused bits, which may be nonzero */
3156 Zero(&aldouble, 1, long double);
92d41999
JH
3157 while (len-- > 0) {
3158 fromstr = NEXTFROM;
3159 aldouble = (long double)SvNV(fromstr);
1109a392 3160 DO_BO_PACK_N(aldouble, long double);
fc241834 3161 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3162 }
3163 break;
fc241834 3164 }
92d41999 3165#endif
7212898e 3166#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3167 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3168#endif
a6ec74c1
JH
3169 case 'n':
3170 while (len-- > 0) {
f337b084 3171 I16 ai16;
a6ec74c1 3172 fromstr = NEXTFROM;
ef108786 3173 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3174#ifdef HAS_HTONS
ef108786 3175 ai16 = PerlSock_htons(ai16);
a6ec74c1 3176#endif
f337b084 3177 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3178 }
3179 break;
7212898e 3180#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3181 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3182#endif
a6ec74c1
JH
3183 case 'v':
3184 while (len-- > 0) {
f337b084 3185 I16 ai16;
a6ec74c1 3186 fromstr = NEXTFROM;
ef108786 3187 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3188#ifdef HAS_HTOVS
ef108786 3189 ai16 = htovs(ai16);
a6ec74c1 3190#endif
f337b084 3191 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3192 }
3193 break;
49704364 3194 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3195#if SHORTSIZE != SIZE16
fc241834 3196 while (len-- > 0) {
f337b084 3197 unsigned short aushort;
fc241834
RGS
3198 fromstr = NEXTFROM;
3199 aushort = SvUV(fromstr);
3200 DO_BO_PACK(aushort, s);
f337b084 3201 PUSH_VAR(utf8, cur, aushort);
fc241834 3202 }
49704364
LW
3203 break;
3204#else
3205 /* Fall through! */
a6ec74c1 3206#endif
49704364 3207 case 'S':
fc241834 3208 while (len-- > 0) {
f337b084 3209 U16 au16;
fc241834
RGS
3210 fromstr = NEXTFROM;
3211 au16 = (U16)SvUV(fromstr);
3212 DO_BO_PACK(au16, 16);
f337b084 3213 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3214 }
3215 break;
49704364 3216 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3217#if SHORTSIZE != SIZE16
fc241834 3218 while (len-- > 0) {
f337b084 3219 short ashort;
fc241834
RGS
3220 fromstr = NEXTFROM;
3221 ashort = SvIV(fromstr);
3222 DO_BO_PACK(ashort, s);
f337b084 3223 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3224 }
49704364
LW
3225 break;
3226#else
3227 /* Fall through! */
a6ec74c1 3228#endif
49704364
LW
3229 case 's':
3230 while (len-- > 0) {
f337b084 3231 I16 ai16;
49704364 3232 fromstr = NEXTFROM;
ef108786
MHM
3233 ai16 = (I16)SvIV(fromstr);
3234 DO_BO_PACK(ai16, 16);
f337b084 3235 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3236 }
3237 break;
3238 case 'I':
49704364 3239 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3240 while (len-- > 0) {
f337b084 3241 unsigned int auint;
a6ec74c1
JH
3242 fromstr = NEXTFROM;
3243 auint = SvUV(fromstr);
1109a392 3244 DO_BO_PACK(auint, i);
f337b084 3245 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3246 }
3247 break;
92d41999
JH
3248 case 'j':
3249 while (len-- > 0) {
f337b084 3250 IV aiv;
92d41999
JH
3251 fromstr = NEXTFROM;
3252 aiv = SvIV(fromstr);
1109a392
MHM
3253#if IVSIZE == INTSIZE
3254 DO_BO_PACK(aiv, i);
3255#elif IVSIZE == LONGSIZE
3256 DO_BO_PACK(aiv, l);
3257#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3258 DO_BO_PACK(aiv, 64);
f337b084
TH
3259#else
3260 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3261#endif
f337b084 3262 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3263 }
3264 break;
3265 case 'J':
3266 while (len-- > 0) {
f337b084 3267 UV auv;
92d41999
JH
3268 fromstr = NEXTFROM;
3269 auv = SvUV(fromstr);
1109a392
MHM
3270#if UVSIZE == INTSIZE
3271 DO_BO_PACK(auv, i);
3272#elif UVSIZE == LONGSIZE
3273 DO_BO_PACK(auv, l);
3274#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3275 DO_BO_PACK(auv, 64);
f337b084
TH
3276#else
3277 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3278#endif
f337b084 3279 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3280 }
3281 break;
a6ec74c1
JH
3282 case 'w':
3283 while (len-- > 0) {
f337b084 3284 NV anv;
a6ec74c1 3285 fromstr = NEXTFROM;
15e9f109 3286 anv = SvNV(fromstr);
a6ec74c1 3287
f337b084
TH
3288 if (anv < 0) {
3289 *cur = '\0';
b162af07 3290 SvCUR_set(cat, cur - start);
49704364 3291 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3292 }
a6ec74c1 3293
196b62db
NC
3294 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3295 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3296 any negative IVs will have already been got by the croak()
3297 above. IOK is untrue for fractions, so we test them
3298 against UV_MAX_P1. */
f337b084
TH
3299 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3300 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3301 char *in = buf + sizeof(buf);
196b62db 3302 UV auv = SvUV(fromstr);
a6ec74c1
JH
3303
3304 do {
eb160463 3305 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3306 auv >>= 7;
3307 } while (auv);
3308 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3309 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3310 in, (buf + sizeof(buf)) - in);
3311 } else if (SvPOKp(fromstr))
3312 goto w_string;
a6ec74c1 3313 else if (SvNOKp(fromstr)) {
0258719b
NC
3314 /* 10**NV_MAX_10_EXP is the largest power of 10
3315 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3316 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3317 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3318 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3319 Some C compilers are strict about integral constant
3320 expressions so we conservatively divide by a slightly
3321 smaller integer instead of multiplying by the exact
3322 floating-point value.
0258719b
NC
3323 */
3324#ifdef NV_MAX_10_EXP
f337b084 3325 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3326 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3327#else
f337b084 3328 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3329 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3330#endif
a6ec74c1
JH
3331 char *in = buf + sizeof(buf);
3332
15e9f109 3333 anv = Perl_floor(anv);
a6ec74c1 3334 do {
15e9f109 3335 NV next = Perl_floor(anv / 128);
a6ec74c1 3336 if (in <= buf) /* this cannot happen ;-) */
49704364 3337 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3338 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3339 anv = next;
3340 } while (anv > 0);
a6ec74c1 3341 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3342 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3343 in, (buf + sizeof(buf)) - in);
3344 } else {
735b914b
JH
3345 char *from, *result, *in;
3346 SV *norm;
3347 STRLEN len;
3348 bool done;
3349
f337b084 3350 w_string:
735b914b
JH
3351 /* Copy string and check for compliance */
3352 from = SvPV(fromstr, len);
3353 if ((norm = is_an_int(from, len)) == NULL)
49704364 3354 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3355
3356 New('w', result, len, char);
3357 in = result + len;
3358 done = FALSE;
f337b084 3359 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3360 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3361 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3362 in, (result + len) - in);
735b914b
JH
3363 Safefree(result);
3364 SvREFCNT_dec(norm); /* free norm */
fc241834 3365 }
a6ec74c1
JH
3366 }
3367 break;
3368 case 'i':
49704364 3369 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3370 while (len-- > 0) {
f337b084 3371 int aint;
a6ec74c1
JH
3372 fromstr = NEXTFROM;
3373 aint = SvIV(fromstr);
1109a392 3374 DO_BO_PACK(aint, i);
f337b084 3375 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3376 }
3377 break;
7212898e 3378#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3379 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3380#endif
a6ec74c1
JH
3381 case 'N':
3382 while (len-- > 0) {
f337b084 3383 U32 au32;
a6ec74c1 3384 fromstr = NEXTFROM;
ef108786 3385 au32 = SvUV(fromstr);
a6ec74c1 3386#ifdef HAS_HTONL
ef108786 3387 au32 = PerlSock_htonl(au32);
a6ec74c1 3388#endif
f337b084 3389 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3390 }
3391 break;
7212898e 3392#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3393 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3394#endif
a6ec74c1
JH
3395 case 'V':
3396 while (len-- > 0) {
f337b084 3397 U32 au32;
a6ec74c1 3398 fromstr = NEXTFROM;
ef108786 3399 au32 = SvUV(fromstr);
a6ec74c1 3400#ifdef HAS_HTOVL
ef108786 3401 au32 = htovl(au32);
a6ec74c1 3402#endif
f337b084 3403 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3404 }
3405 break;
49704364 3406 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3407#if LONGSIZE != SIZE32
fc241834 3408 while (len-- > 0) {
f337b084 3409 unsigned long aulong;
fc241834
RGS
3410 fromstr = NEXTFROM;
3411 aulong = SvUV(fromstr);
3412 DO_BO_PACK(aulong, l);
f337b084 3413 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3414 }
49704364
LW
3415 break;
3416#else
3417 /* Fall though! */
a6ec74c1 3418#endif
49704364 3419 case 'L':
fc241834 3420 while (len-- > 0) {
f337b084 3421 U32 au32;
fc241834
RGS
3422 fromstr = NEXTFROM;
3423 au32 = SvUV(fromstr);
3424 DO_BO_PACK(au32, 32);
f337b084 3425 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3426 }
3427 break;
49704364 3428 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3429#if LONGSIZE != SIZE32
fc241834 3430 while (len-- > 0) {
f337b084 3431 long along;
fc241834
RGS
3432 fromstr = NEXTFROM;
3433 along = SvIV(fromstr);
3434 DO_BO_PACK(along, l);
f337b084 3435 PUSH_VAR(utf8, cur, along);
a6ec74c1 3436 }
49704364
LW
3437 break;
3438#else
3439 /* Fall though! */
a6ec74c1 3440#endif
49704364
LW
3441 case 'l':
3442 while (len-- > 0) {
f337b084 3443 I32 ai32;
49704364 3444 fromstr = NEXTFROM;
ef108786
MHM
3445 ai32 = SvIV(fromstr);
3446 DO_BO_PACK(ai32, 32);
f337b084 3447 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3448 }
3449 break;
3450#ifdef HAS_QUAD
3451 case 'Q':
3452 while (len-- > 0) {
f337b084 3453 Uquad_t auquad;
a6ec74c1 3454 fromstr = NEXTFROM;
f337b084 3455 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3456 DO_BO_PACK(auquad, 64);
f337b084 3457 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3458 }
3459 break;
3460 case 'q':
3461 while (len-- > 0) {
f337b084 3462 Quad_t aquad;
a6ec74c1
JH
3463 fromstr = NEXTFROM;
3464 aquad = (Quad_t)SvIV(fromstr);
1109a392 3465 DO_BO_PACK(aquad, 64);
f337b084 3466 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3467 }
3468 break;
f337b084 3469#endif /* HAS_QUAD */
a6ec74c1
JH
3470 case 'P':
3471 len = 1; /* assume SV is correct length */
f337b084 3472 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3473 /* Fall through! */
a6ec74c1
JH
3474 case 'p':
3475 while (len-- > 0) {
f337b084
TH
3476 char *aptr;
3477
a6ec74c1 3478 fromstr = NEXTFROM;
28a4f200 3479 SvGETMAGIC(fromstr);
3480 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3481 else {
3482 STRLEN n_a;
3483 /* XXX better yet, could spirit away the string to
3484 * a safe spot and hang on to it until the result
3485 * of pack() (and all copies of the result) are
3486 * gone.
3487 */
f337b084
TH
3488 if (ckWARN(WARN_PACK) &&
3489 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3490 !SvREADONLY(fromstr)))) {
9014280d 3491 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3492 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3493 }
3494 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3495 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3496 else
28a4f200 3497 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3498 }
07409e01 3499 DO_BO_PACK_PC(aptr);
f337b084 3500 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3501 }
3502 break;
fc241834
RGS
3503 case 'u': {
3504 char *aptr, *aend;
3505 bool from_utf8;
f337b084 3506
a6ec74c1 3507 fromstr = NEXTFROM;
fc241834
RGS
3508 if (len <= 2) len = 45;
3509 else len = len / 3 * 3;
3510 if (len >= 64) {
3511 Perl_warner(aTHX_ packWARN(WARN_PACK),
3512 "Field too wide in 'u' format in pack");
3513 len = 63;
3514 }
a6ec74c1 3515 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3516 from_utf8 = DO_UTF8(fromstr);
3517 if (from_utf8) {
3518 aend = aptr + fromlen;
3519 fromlen = sv_len_utf8(fromstr);
3520 } else aend = NULL; /* Unused, but keep compilers happy */
3521 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3522 while (fromlen > 0) {
fc241834 3523 U8 *end;
a6ec74c1 3524 I32 todo;
fc241834 3525 U8 hunk[1+63/3*4+1];
a6ec74c1 3526
eb160463 3527 if ((I32)fromlen > len)
a6ec74c1
JH
3528 todo = len;
3529 else
3530 todo = fromlen;
fc241834
RGS
3531 if (from_utf8) {
3532 char buffer[64];
3533 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3534 'u' | TYPE_IS_PACK)) {
3535 *cur = '\0';
b162af07 3536 SvCUR_set(cat, cur - start);
fc241834
RGS
3537 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3538 }
3539 end = doencodes(hunk, buffer, todo);
3540 } else {
3541 end = doencodes(hunk, aptr, todo);
3542 aptr += todo;
3543 }
3544 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3545 fromlen -= todo;
3546 }
a6ec74c1
JH
3547 break;
3548 }
f337b084
TH
3549 }
3550 *cur = '\0';
b162af07 3551 SvCUR_set(cat, cur - start);
f337b084 3552 no_change:
49704364 3553 *symptr = lookahead;
a6ec74c1 3554 }
49704364 3555 return beglist;
18529408
IZ
3556}
3557#undef NEXTFROM
3558
3559
3560PP(pp_pack)
3561{
3562 dSP; dMARK; dORIGMARK; dTARGET;
3563 register SV *cat = TARG;
3564 STRLEN fromlen;
3565 register char *pat = SvPVx(*++MARK, fromlen);
3566 register char *patend = pat + fromlen;
3567
3568 MARK++;
3569 sv_setpvn(cat, "", 0);
f337b084 3570 SvUTF8_off(cat);
18529408 3571
7accc089 3572 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3573
a6ec74c1
JH
3574 SvSETMAGIC(cat);
3575 SP = ORIGMARK;
3576 PUSHs(cat);
3577 RETURN;
3578}
a6ec74c1 3579
73cb7263
NC
3580/*
3581 * Local variables:
3582 * c-indentation-style: bsd
3583 * c-basic-offset: 4
3584 * indent-tabs-mode: t
3585 * End:
3586 *
edf815fd 3587 * vim: shiftwidth=4:
73cb7263 3588*/