This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Tests need to skip if there is no Encode configured.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084 121#define PUSH_VAR(utf8, aptr, var) \
230e1fce 122 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 123
49704364
LW
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e 185#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 186# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 187#else
28be1210 188# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
200# define DO_BO_UNPACK_PTR(var, type, pre_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast)
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
7212898e 232# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
7212898e 246# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e
NC
276# if PTRSIZE == INTSIZE
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279# elif PTRSIZE == LONGSIZE
280# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
282# else
283# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285# endif
1109a392 286
7212898e 287# if defined(my_htolen) && defined(my_letohn) && \
1109a392 288 defined(my_htoben) && defined(my_betohn)
7212898e 289# define DO_BO_UNPACK_N(var, type) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
294 default: break; \
295 } \
296 } STMT_END
297
7212898e 298# define DO_BO_PACK_N(var, type) \
1109a392 299 STMT_START { \
66c611c5 300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
303 default: break; \
304 } \
305 } STMT_END
7212898e
NC
306# else
307# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309# endif
310
f337b084 311#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 312
78d46eaa 313#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 314#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
315#define PACK_SIZE_MASK 0x3F
316
78d46eaa
NC
317/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
319
320#if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
322#endif
323
324typedef U8 packprops_t;
78d46eaa
NC
325#if 'J'-'I' == 1
326/* ASCII */
a7a3cfaa
TH
327const packprops_t packprops[512] = {
328 /* normal */
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 335#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 336 /* D */ LONG_DOUBLESIZE,
80a13697 337#else
a7a3cfaa 338 0,
80a13697 339#endif
a7a3cfaa
TH
340 0,
341 /* F */ NVSIZE,
342 0, 0,
343 /* I */ sizeof(unsigned int),
344 /* J */ UVSIZE,
345 0,
346 /* L */ SIZE32,
347 0,
348 /* N */ SIZE32,
349 0, 0,
80a13697 350#if defined(HAS_QUAD)
a7a3cfaa 351 /* Q */ sizeof(Uquad_t),
80a13697 352#else
a7a3cfaa 353 0,
80a13697 354#endif
a7a3cfaa
TH
355 0,
356 /* S */ SIZE16,
357 0,
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
359 /* V */ SIZE32,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
364 0,
365 /* f */ sizeof(float),
366 0, 0,
367 /* i */ sizeof(int),
368 /* j */ IVSIZE,
369 0,
370 /* l */ SIZE32,
371 0,
372 /* n */ SIZE16,
373 0,
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 375#if defined(HAS_QUAD)
a7a3cfaa 376 /* q */ sizeof(Quad_t),
80a13697 377#else
a7a3cfaa 378 0,
80a13697 379#endif
a7a3cfaa
TH
380 0,
381 /* s */ SIZE16,
382 0, 0,
383 /* v */ SIZE16,
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
394 /* shrieking */
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
401 0, 0,
402 /* L */ sizeof(unsigned long),
403 0,
7212898e 404#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 405 /* N */ SIZE32,
7212898e 406#else
a7a3cfaa 407 0,
7212898e 408#endif
a7a3cfaa
TH
409 0, 0, 0, 0,
410 /* S */ sizeof(unsigned short),
411 0, 0,
7212898e 412#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 413 /* V */ SIZE32,
7212898e 414#else
a7a3cfaa 415 0,
7212898e 416#endif
a7a3cfaa
TH
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
418 0, 0,
419 /* i */ sizeof(int),
420 0, 0,
421 /* l */ sizeof(long),
422 0,
7212898e 423#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 424 /* n */ SIZE16,
7212898e 425#else
a7a3cfaa 426 0,
7212898e 427#endif
a7a3cfaa
TH
428 0, 0, 0, 0,
429 /* s */ sizeof(short),
430 0, 0,
7212898e 431#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 432 /* v */ SIZE16,
7212898e 433#else
a7a3cfaa 434 0,
7212898e 435#endif
a7a3cfaa
TH
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
445};
446#else
447/* EBCDIC (or bust) */
a7a3cfaa
TH
448const packprops_t packprops[512] = {
449 /* normal */
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
461 0,
462 /* f */ sizeof(float),
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0, 0, 0, 0, 0, 0,
466 /* j */ IVSIZE,
467 0,
468 /* l */ SIZE32,
469 0,
470 /* n */ SIZE16,
471 0,
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 473#if defined(HAS_QUAD)
a7a3cfaa 474 /* q */ sizeof(Quad_t),
80a13697 475#else
a7a3cfaa 476 0,
80a13697 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 /* s */ SIZE16,
480 0, 0,
481 /* v */ SIZE16,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 486#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 487 /* D */ LONG_DOUBLESIZE,
80a13697 488#else
a7a3cfaa 489 0,
80a13697 490#endif
a7a3cfaa
TH
491 0,
492 /* F */ NVSIZE,
493 0, 0,
494 /* I */ sizeof(unsigned int),
495 0, 0, 0, 0, 0, 0, 0,
496 /* J */ UVSIZE,
497 0,
498 /* L */ SIZE32,
499 0,
500 /* N */ SIZE32,
501 0, 0,
80a13697 502#if defined(HAS_QUAD)
a7a3cfaa 503 /* Q */ sizeof(Uquad_t),
80a13697 504#else
a7a3cfaa 505 0,
80a13697 506#endif
a7a3cfaa
TH
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 /* S */ SIZE16,
509 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
511 /* V */ SIZE32,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 /* shrieking */
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
528 0,
7212898e 529#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 530 /* n */ SIZE16,
7212898e 531#else
a7a3cfaa 532 0,
7212898e 533#endif
a7a3cfaa
TH
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
536 0, 0,
7212898e 537#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 538 /* v */ SIZE16,
7212898e 539#else
a7a3cfaa 540 0,
7212898e 541#endif
a7a3cfaa
TH
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
548 0,
7212898e 549#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 550 /* N */ SIZE32,
7212898e 551#else
a7a3cfaa 552 0,
7212898e 553#endif
a7a3cfaa
TH
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
556 0, 0,
7212898e 557#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 558 /* V */ SIZE32,
7212898e 559#else
a7a3cfaa 560 0,
7212898e 561#endif
a7a3cfaa
TH
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
564};
565#endif
566
08ca2aa3 567STATIC U8
f337b084 568uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
569{
570 UV val;
571 STRLEN retlen;
f337b084
TH
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 580 if (val >= 0x100) {
f337b084 581 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
f337b084 584 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
585 val &= 0xff;
586 }
587 *s += retlen;
fe2774ed 588 return (U8)val;
08ca2aa3
TH
589}
590
f337b084
TH
591#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
593 *(U8 *)(s)++)
594
595STATIC bool
f337b084 596uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
597{
598 UV val;
599 STRLEN retlen;
600 char *from = *s;
601 int bad = 0;
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
f337b084 606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
609 bad |= 1;
610 } else from += retlen;
611 if (val >= 0x100) {
612 bad |= 2;
613 val &= 0xff;
614 }
fe2774ed 615 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
616 }
617 /* We have enough characters for the buffer. Did we have problems ? */
618 if (bad) {
619 if (bad & 1) {
620 /* Rewalk the string fragment while warning */
621 char *ptr;
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
f337b084 625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
626 }
627 if (from > end) from = end;
628 }
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
fc241834 633 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
635 }
636 *s = from;
637 return TRUE;
638}
639
640STATIC bool
641next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
642{
643 UV val;
644 STRLEN retlen;
f337b084 645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
648 *out = 0;
649 return FALSE;
650 }
651 *out = PL_uudmap[val] & 077;
f337b084 652 *s += retlen;
08ca2aa3
TH
653 return TRUE;
654}
78d46eaa 655
f337b084
TH
656STATIC void
657bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
660 char *d = *dest;
661 while (start < end) {
662 int length =
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
664 switch(length) {
665 case 1:
666 *d++ = buffer[0];
667 break;
668 case 2:
669 *d++ = buffer[0];
670 *d++ = buffer[1];
671 break;
672 default:
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
674 *start, length);
675 }
676 start++;
677 }
678 *dest = d;
679}
680
230e1fce
NC
681#define PUSH_BYTES(utf8, cur, buf, len) \
682STMT_START { \
683 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
684 else { \
685 Copy(buf, cur, len, char); \
686 (cur) += (len); \
687 } \
f337b084
TH
688} STMT_END
689
690#define GROWING(utf8, cat, start, cur, in_len) \
691STMT_START { \
692 STRLEN glen = (in_len); \
3473cf63 693 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
697 } \
698} STMT_END
699
700#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
701STMT_START { \
702 STRLEN glen = (in_len); \
703 STRLEN gl = glen; \
3473cf63 704 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
706 *cur = '\0'; \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
710 } \
711 PUSH_BYTES(utf8, cur, buf, glen); \
712} STMT_END
713
714#define PUSH_BYTE(utf8, s, byte) \
715STMT_START { \
716 if (utf8) { \
717 U8 au8 = (byte); \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
720} STMT_END
721
722/* Only to be used inside a loop (see the break) */
723#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
724STMT_START { \
725 STRLEN retlen; \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
729 *cur = '\0'; \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
731 } \
732 str += retlen; \
733} STMT_END
734
206947d2 735/* Returns the sizeof() struct described by pat */
028d1f6d 736STATIC I32
f337b084 737S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 738{
f337b084 739 I32 total = 0;
206947d2 740
49704364 741 while (next_symbol(symptr)) {
f337b084
TH
742 I32 len;
743 int star, size;
f337b084
TH
744
745 switch (symptr->howlen) {
fc241834 746 case e_star:
49704364
LW
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
749 break;
f337b084
TH
750 default:
751 /* e_no_len and e_number */
752 len = symptr->length;
753 break;
49704364
LW
754 }
755
a7a3cfaa 756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
757 if (!size) {
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
760 default:
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
28be1210
TH
764#ifdef PERL_PACK_CAN_SHRIEKSIGN
765 case '.' | TYPE_IS_SHRIEKING:
766 case '@' | TYPE_IS_SHRIEKING:
767#endif
80a13697 768 case '@':
28be1210 769 case '.':
80a13697
NC
770 case '/':
771 case 'U': /* XXXX Is it correct? */
772 case 'w':
773 case 'u':
774 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 775 (int) TYPE_NO_MODIFIERS(symptr->code),
80a13697
NC
776 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
777 case '%':
778 size = 0;
779 break;
780 case '(':
fc241834
RGS
781 {
782 tempsym_t savsym = *symptr;
783 symptr->patptr = savsym.grpbeg;
784 symptr->patend = savsym.grpend;
785 /* XXXX Theoretically, we need to measure many times at
786 different positions, since the subexpression may contain
787 alignment commands, but be not of aligned length.
788 Need to detect this and croak(). */
789 size = measure_struct(symptr);
790 *symptr = savsym;
791 break;
792 }
80a13697
NC
793 case 'X' | TYPE_IS_SHRIEKING:
794 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
795 */
796 if (!len) /* Avoid division by 0 */
797 len = 1;
798 len = total % len; /* Assumed: the start is aligned. */
799 /* FALL THROUGH */
800 case 'X':
801 size = -1;
802 if (total < len)
803 Perl_croak(aTHX_ "'X' outside of string in %s",
804 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
805 break;
806 case 'x' | TYPE_IS_SHRIEKING:
807 if (!len) /* Avoid division by 0 */
808 len = 1;
809 star = total % len; /* Assumed: the start is aligned. */
810 if (star) /* Other portable ways? */
811 len = len - star;
812 else
813 len = 0;
814 /* FALL THROUGH */
815 case 'x':
816 case 'A':
817 case 'Z':
818 case 'a':
80a13697
NC
819 size = 1;
820 break;
821 case 'B':
822 case 'b':
823 len = (len + 7)/8;
824 size = 1;
825 break;
826 case 'H':
827 case 'h':
828 len = (len + 1)/2;
829 size = 1;
830 break;
78d46eaa 831
80a13697
NC
832 case 'P':
833 len = 1;
834 size = sizeof(char*);
78d46eaa
NC
835 break;
836 }
206947d2
IZ
837 }
838 total += len * size;
839 }
840 return total;
841}
842
49704364
LW
843
844/* locate matching closing parenthesis or bracket
845 * returns char pointer to char after match, or NULL
846 */
847STATIC char *
848S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 849{
49704364
LW
850 while (patptr < patend) {
851 char c = *patptr++;
852
853 if (isSPACE(c))
854 continue;
855 else if (c == ender)
856 return patptr-1;
857 else if (c == '#') {
858 while (patptr < patend && *patptr != '\n')
859 patptr++;
860 continue;
861 } else if (c == '(')
862 patptr = group_end(patptr, patend, ')') + 1;
863 else if (c == '[')
864 patptr = group_end(patptr, patend, ']') + 1;
18529408 865 }
49704364
LW
866 Perl_croak(aTHX_ "No group ending character '%c' found in template",
867 ender);
868 return 0;
18529408
IZ
869}
870
49704364
LW
871
872/* Convert unsigned decimal number to binary.
873 * Expects a pointer to the first digit and address of length variable
874 * Advances char pointer to 1st non-digit char and returns number
fc241834 875 */
18529408 876STATIC char *
49704364
LW
877S_get_num(pTHX_ register char *patptr, I32 *lenptr )
878{
879 I32 len = *patptr++ - '0';
880 while (isDIGIT(*patptr)) {
881 if (len >= 0x7FFFFFFF/10)
882 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
883 len = (len * 10) + (*patptr++ - '0');
884 }
885 *lenptr = len;
886 return patptr;
887}
888
889/* The marvellous template parsing routine: Using state stored in *symptr,
890 * locates next template code and count
891 */
892STATIC bool
f337b084 893S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 894{
fc241834
RGS
895 char* patptr = symptr->patptr;
896 char* patend = symptr->patend;
f337b084 897 const char *allowed = "";
49704364
LW
898
899 symptr->flags &= ~FLAG_SLASH;
900
901 while (patptr < patend) {
902 if (isSPACE(*patptr))
903 patptr++;
904 else if (*patptr == '#') {
905 patptr++;
906 while (patptr < patend && *patptr != '\n')
907 patptr++;
908 if (patptr < patend)
909 patptr++;
910 } else {
fc241834 911 /* We should have found a template code */
49704364 912 I32 code = *patptr++ & 0xFF;
66c611c5 913 U32 inherited_modifiers = 0;
49704364
LW
914
915 if (code == ','){ /* grandfather in commas but with a warning */
916 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
917 symptr->flags |= FLAG_COMMA;
918 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
919 "Invalid type ',' in %s",
920 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
921 }
922 continue;
923 }
fc241834 924
49704364 925 /* for '(', skip to ')' */
fc241834 926 if (code == '(') {
49704364
LW
927 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
928 Perl_croak(aTHX_ "()-group starts with a count in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
930 symptr->grpbeg = patptr;
931 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
932 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
933 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
934 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
935 }
936
66c611c5
MHM
937 /* look for group modifiers to inherit */
938 if (TYPE_ENDIANNESS(symptr->flags)) {
939 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
940 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
941 }
942
1109a392
MHM
943 /* look for modifiers */
944 while (patptr < patend) {
1109a392
MHM
945 I32 modifier = 0;
946 switch (*patptr) {
947 case '!':
948 modifier = TYPE_IS_SHRIEKING;
7212898e 949 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 950 break;
7212898e 951#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
952 case '>':
953 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 954 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
955 break;
956 case '<':
957 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 958 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 959 break;
f337b084 960#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
961 default:
962 break;
963 }
66c611c5 964
1109a392
MHM
965 if (modifier == 0)
966 break;
66c611c5 967
1109a392
MHM
968 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
969 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
970 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
971
972 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
973 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
974 (int) TYPE_NO_MODIFIERS(code),
975 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
976 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
977 TYPE_ENDIANNESS_MASK)
978 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
979 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
980
1109a392
MHM
981 if (ckWARN(WARN_UNPACK)) {
982 if (code & modifier)
983 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
984 "Duplicate modifier '%c' after '%c' in %s",
985 *patptr, (int) TYPE_NO_MODIFIERS(code),
986 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
987 }
66c611c5 988
1109a392
MHM
989 code |= modifier;
990 patptr++;
49704364
LW
991 }
992
66c611c5
MHM
993 /* inherit modifiers */
994 code |= inherited_modifiers;
995
fc241834 996 /* look for count and/or / */
49704364
LW
997 if (patptr < patend) {
998 if (isDIGIT(*patptr)) {
999 patptr = get_num( patptr, &symptr->length );
1000 symptr->howlen = e_number;
1001
1002 } else if (*patptr == '*') {
1003 patptr++;
1004 symptr->howlen = e_star;
1005
1006 } else if (*patptr == '[') {
fc241834 1007 char* lenptr = ++patptr;
49704364
LW
1008 symptr->howlen = e_number;
1009 patptr = group_end( patptr, patend, ']' ) + 1;
1010 /* what kind of [] is it? */
1011 if (isDIGIT(*lenptr)) {
1012 lenptr = get_num( lenptr, &symptr->length );
1013 if( *lenptr != ']' )
1014 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1015 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1016 } else {
1017 tempsym_t savsym = *symptr;
1018 symptr->patend = patptr-1;
1019 symptr->patptr = lenptr;
1020 savsym.length = measure_struct(symptr);
1021 *symptr = savsym;
1022 }
1023 } else {
1024 symptr->howlen = e_no_len;
1025 symptr->length = 1;
1026 }
1027
1028 /* try to find / */
1029 while (patptr < patend) {
1030 if (isSPACE(*patptr))
1031 patptr++;
1032 else if (*patptr == '#') {
1033 patptr++;
1034 while (patptr < patend && *patptr != '\n')
1035 patptr++;
1036 if (patptr < patend)
1037 patptr++;
1038 } else {
66c611c5 1039 if (*patptr == '/') {
49704364
LW
1040 symptr->flags |= FLAG_SLASH;
1041 patptr++;
66c611c5
MHM
1042 if (patptr < patend &&
1043 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
LW
1044 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1045 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1046 }
1047 break;
1048 }
18529408 1049 }
49704364
LW
1050 } else {
1051 /* at end - no count, no / */
1052 symptr->howlen = e_no_len;
1053 symptr->length = 1;
1054 }
1055
1056 symptr->code = code;
fc241834 1057 symptr->patptr = patptr;
49704364 1058 return TRUE;
18529408 1059 }
49704364 1060 }
fc241834 1061 symptr->patptr = patptr;
49704364 1062 return FALSE;
18529408
IZ
1063}
1064
18529408 1065/*
fc241834 1066 There is no way to cleanly handle the case where we should process the
08ca2aa3 1067 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1068 (e.g. estimates like strend-s as an upper bound for the number of
1069 characters left wouldn't work). So if we foresee the need of this
1070 (pattern starts with U or contains U0), we want to work on the encoded
1071 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1072 themselves if they need to do a lot of unpacks like this on it
1073*/
fc241834 1074STATIC bool
08ca2aa3
TH
1075need_utf8(const char *pat, const char *patend)
1076{
1077 bool first = TRUE;
1078 while (pat < patend) {
1079 if (pat[0] == '#') {
1080 pat++;
1081 pat = memchr(pat, '\n', patend-pat);
1082 if (!pat) return FALSE;
1083 } else if (pat[0] == 'U') {
1084 if (first || pat[1] == '0') return TRUE;
1085 } else first = FALSE;
1086 pat++;
1087 }
1088 return FALSE;
1089}
1090
1091STATIC char
1092first_symbol(const char *pat, const char *patend) {
1093 while (pat < patend) {
1094 if (pat[0] != '#') return pat[0];
1095 pat++;
1096 pat = memchr(pat, '\n', patend-pat);
1097 if (!pat) return 0;
1098 pat++;
1099 }
1100 return 0;
1101}
1102
1103/*
18529408
IZ
1104=for apidoc unpack_str
1105
7accc089
JH
1106The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1107and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1108
1109=cut */
1110
1111I32
f337b084 1112Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1113{
49704364 1114 tempsym_t sym = { 0 };
08ca2aa3 1115
f337b084 1116 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1117 else if (need_utf8(pat, patend)) {
1118 /* We probably should try to avoid this in case a scalar context call
1119 wouldn't get to the "U0" */
1120 STRLEN len = strend - s;
230e1fce 1121 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1122 SAVEFREEPV(s);
1123 strend = s + len;
f337b084 1124 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1125 }
1126
f337b084
TH
1127 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1128 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1129
49704364
LW
1130 sym.patptr = pat;
1131 sym.patend = patend;
1132 sym.flags = flags;
1133
1134 return unpack_rec(&sym, s, s, strend, NULL );
1135}
1136
7accc089
JH
1137/*
1138=for apidoc unpackstring
1139
608d3aed
LW
1140The engine implementing unpack() Perl function. C<unpackstring> puts the
1141extracted list items on the stack and returns the number of elements.
1142Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1143
1144=cut */
1145
1146I32
f337b084 1147Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1148{
1149 tempsym_t sym = { 0 };
08ca2aa3 1150
f337b084 1151 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1152 else if (need_utf8(pat, patend)) {
1153 /* We probably should try to avoid this in case a scalar context call
1154 wouldn't get to the "U0" */
1155 STRLEN len = strend - s;
230e1fce 1156 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1157 SAVEFREEPV(s);
1158 strend = s + len;
f337b084 1159 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1160 }
1161
f337b084
TH
1162 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1163 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1164
7accc089
JH
1165 sym.patptr = pat;
1166 sym.patend = patend;
1167 sym.flags = flags;
1168
1169 return unpack_rec(&sym, s, s, strend, NULL );
1170}
1171
49704364
LW
1172STATIC
1173I32
08ca2aa3 1174S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1175{
a6ec74c1 1176 dSP;
18529408
IZ
1177 SV *sv;
1178 I32 start_sp_offset = SP - PL_stack_base;
49704364 1179 howlen_t howlen;
a6ec74c1 1180
a6ec74c1 1181 I32 checksum = 0;
92d41999 1182 UV cuv = 0;
a6ec74c1 1183 NV cdouble = 0.0;
f337b084 1184 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1185 bool beyond = FALSE;
21c16052 1186 bool explicit_length;
49704364 1187 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1188 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1189 symptr->strbeg = s - strbeg;
49704364 1190
49704364 1191 while (next_symbol(symptr)) {
a7a3cfaa 1192 packprops_t props;
f337b084
TH
1193 I32 len, ai32;
1194 I32 datumtype = symptr->code;
206947d2 1195 /* do first one only unless in list context
08ca2aa3 1196 / is implemented by unpacking the count, then popping it from the
206947d2 1197 stack, so must check that we're not in the middle of a / */
49704364 1198 if ( unpack_only_one
206947d2 1199 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1200 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1201 break;
49704364 1202
f337b084 1203 switch (howlen = symptr->howlen) {
fc241834
RGS
1204 case e_star:
1205 len = strend - strbeg; /* long enough */
49704364 1206 break;
f337b084
TH
1207 default:
1208 /* e_no_len and e_number */
1209 len = symptr->length;
1210 break;
49704364 1211 }
18529408 1212
21c16052 1213 explicit_length = TRUE;
a6ec74c1 1214 redo_switch:
49704364 1215 beyond = s >= strend;
a7a3cfaa
TH
1216
1217 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1218 if (props) {
1219 /* props nonzero means we can process this letter. */
1220 long size = props & PACK_SIZE_MASK;
1221 long howmany = (strend - s) / size;
1222 if (len > howmany)
1223 len = howmany;
1224
1225 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1226 if (len && unpack_only_one) len = 1;
1227 EXTEND(SP, len);
1228 EXTEND_MORTAL(len);
78d46eaa
NC
1229 }
1230 }
a7a3cfaa 1231
1109a392 1232 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1233 default:
1109a392 1234 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1235
a6ec74c1 1236 case '%':
49704364 1237 if (howlen == e_no_len)
18529408 1238 len = 16; /* len is not specified */
a6ec74c1 1239 checksum = len;
92d41999 1240 cuv = 0;
a6ec74c1 1241 cdouble = 0;
18529408 1242 continue;
a6ec74c1 1243 break;
18529408
IZ
1244 case '(':
1245 {
49704364 1246 tempsym_t savsym = *symptr;
66c611c5
MHM
1247 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1248 symptr->flags |= group_modifiers;
49704364 1249 symptr->patend = savsym.grpend;
28be1210 1250 symptr->previous = &savsym;
49704364 1251 symptr->level++;
18529408
IZ
1252 PUTBACK;
1253 while (len--) {
49704364 1254 symptr->patptr = savsym.grpbeg;
f337b084
TH
1255 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1256 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1257 unpack_rec(symptr, s, strbeg, strend, &s);
1258 if (s == strend && savsym.howlen == e_star)
49704364 1259 break; /* No way to continue */
18529408
IZ
1260 }
1261 SPAGAIN;
28be1210 1262 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1263 *symptr = savsym;
18529408
IZ
1264 break;
1265 }
28be1210
TH
1266#ifdef PERL_PACK_CAN_SHRIEKSIGN
1267 case '.' | TYPE_IS_SHRIEKING:
1268#endif
1269 case '.': {
1270 char *from;
1271 SV *sv;
1272#ifdef PERL_PACK_CAN_SHRIEKSIGN
1273 bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1274#else /* PERL_PACK_CAN_SHRIEKSIGN */
1275 bool u8 = utf8;
1276#endif
1277 if (howlen == e_star) from = strbeg;
1278 else if (len <= 0) from = s;
1279 else {
1280 tempsym_t *group = symptr;
1281
1282 while (--len && group) group = group->previous;
1283 from = group ? strbeg + group->strbeg : strbeg;
1284 }
1285 sv = from <= s ?
00646304
CB
1286 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1287 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1288 XPUSHs(sv_2mortal(sv));
1289 break;
1290 }
1291#ifdef PERL_PACK_CAN_SHRIEKSIGN
1292 case '@' | TYPE_IS_SHRIEKING:
1293#endif
a6ec74c1 1294 case '@':
28be1210
TH
1295 s = strbeg + symptr->strbeg;
1296#ifdef PERL_PACK_CAN_SHRIEKSIGN
1297 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1298#else /* PERL_PACK_CAN_SHRIEKSIGN */
1299 if (utf8)
1300#endif
1301 {
08ca2aa3
TH
1302 while (len > 0) {
1303 if (s >= strend)
1304 Perl_croak(aTHX_ "'@' outside of string in unpack");
1305 s += UTF8SKIP(s);
1306 len--;
1307 }
1308 if (s > strend)
1309 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1310 } else {
28be1210 1311 if (strend-s < len)
fc241834 1312 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1313 s += len;
08ca2aa3 1314 }
a6ec74c1 1315 break;
62f95557
IZ
1316 case 'X' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1318 len = 1;
08ca2aa3
TH
1319 if (utf8) {
1320 char *hop, *last;
f337b084
TH
1321 I32 l = len;
1322 hop = last = strbeg;
1323 while (hop < s) {
1324 hop += UTF8SKIP(hop);
1325 if (--l == 0) {
08ca2aa3 1326 last = hop;
f337b084
TH
1327 l = len;
1328 }
fc241834 1329 }
f337b084
TH
1330 if (last > s)
1331 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1332 s = last;
1333 break;
f337b084
TH
1334 }
1335 len = (s - strbeg) % len;
62f95557 1336 /* FALL THROUGH */
a6ec74c1 1337 case 'X':
08ca2aa3
TH
1338 if (utf8) {
1339 while (len > 0) {
1340 if (s <= strbeg)
1341 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1342 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1343 if (s <= strbeg)
1344 Perl_croak(aTHX_ "'X' outside of string in unpack");
1345 }
1346 len--;
1347 }
1348 } else {
fc241834
RGS
1349 if (len > s - strbeg)
1350 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1351 s -= len;
08ca2aa3 1352 }
a6ec74c1 1353 break;
62f95557
IZ
1354 case 'x' | TYPE_IS_SHRIEKING:
1355 if (!len) /* Avoid division by 0 */
1356 len = 1;
230e1fce
NC
1357 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1358 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1359 if (ai32 == 0) break;
1360 len -= ai32;
62f95557 1361 /* FALL THROUGH */
a6ec74c1 1362 case 'x':
08ca2aa3
TH
1363 if (utf8) {
1364 while (len>0) {
1365 if (s >= strend)
1366 Perl_croak(aTHX_ "'x' outside of string in unpack");
1367 s += UTF8SKIP(s);
1368 len--;
1369 }
1370 } else {
fc241834
RGS
1371 if (len > strend - s)
1372 Perl_croak(aTHX_ "'x' outside of string in unpack");
1373 s += len;
f337b084 1374 }
a6ec74c1
JH
1375 break;
1376 case '/':
49704364
LW
1377 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1378 break;
a6ec74c1
JH
1379 case 'A':
1380 case 'Z':
1381 case 'a':
08ca2aa3
TH
1382 if (checksum) {
1383 /* Preliminary length estimate is assumed done in 'W' */
1384 if (len > strend - s) len = strend - s;
1385 goto W_checksum;
1386 }
1387 if (utf8) {
1388 I32 l;
1389 char *hop;
1390 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1391 if (hop >= strend) {
1392 if (hop > strend)
1393 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1394 break;
fc241834 1395 }
a6ec74c1 1396 }
08ca2aa3
TH
1397 if (hop > strend)
1398 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1399 len = hop - s;
1400 } else if (len > strend - s)
1401 len = strend - s;
1402
1403 if (datumtype == 'Z') {
1404 /* 'Z' strips stuff after first null */
f337b084
TH
1405 char *ptr, *end;
1406 end = s + len;
1407 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1408 sv = newSVpvn(s, ptr-s);
1409 if (howlen == e_star) /* exact for 'Z*' */
1410 len = ptr-s + (ptr != strend ? 1 : 0);
1411 } else if (datumtype == 'A') {
1412 /* 'A' strips both nulls and spaces */
1413 char *ptr;
18bdf90a
TH
1414 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1415 for (ptr = s+len-1; ptr >= s; ptr--)
1416 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1417 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1418 if (ptr >= s) ptr += UTF8SKIP(ptr);
1419 else ptr++;
28be1210 1420 if (ptr > s+len)
18bdf90a
TH
1421 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1422 } else {
1423 for (ptr = s+len-1; ptr >= s; ptr--)
1424 if (*ptr != 0 && !isSPACE(*ptr)) break;
1425 ptr++;
1426 }
08ca2aa3
TH
1427 sv = newSVpvn(s, ptr-s);
1428 } else sv = newSVpvn(s, len);
1429
1430 if (utf8) {
1431 SvUTF8_on(sv);
1432 /* Undo any upgrade done due to need_utf8() */
f337b084 1433 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1434 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1435 }
1436 XPUSHs(sv_2mortal(sv));
08ca2aa3 1437 s += len;
a6ec74c1
JH
1438 break;
1439 case 'B':
08ca2aa3
TH
1440 case 'b': {
1441 char *str;
49704364 1442 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1443 len = (strend - s) * 8;
1444 if (checksum) {
1445 if (!PL_bitcount) {
08ca2aa3 1446 int bits;
a6ec74c1
JH
1447 Newz(601, PL_bitcount, 256, char);
1448 for (bits = 1; bits < 256; bits++) {
1449 if (bits & 1) PL_bitcount[bits]++;
1450 if (bits & 2) PL_bitcount[bits]++;
1451 if (bits & 4) PL_bitcount[bits]++;
1452 if (bits & 8) PL_bitcount[bits]++;
1453 if (bits & 16) PL_bitcount[bits]++;
1454 if (bits & 32) PL_bitcount[bits]++;
1455 if (bits & 64) PL_bitcount[bits]++;
1456 if (bits & 128) PL_bitcount[bits]++;
1457 }
1458 }
f337b084 1459 if (utf8)
08ca2aa3 1460 while (len >= 8 && s < strend) {
f337b084 1461 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1462 len -= 8;
1463 }
f337b084 1464 else
fc241834 1465 while (len >= 8) {
08ca2aa3 1466 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1467 len -= 8;
1468 }
08ca2aa3
TH
1469 if (len && s < strend) {
1470 U8 bits;
f337b084
TH
1471 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1472 if (datumtype == 'b')
a6ec74c1 1473 while (len-- > 0) {
92d41999 1474 if (bits & 1) cuv++;
a6ec74c1
JH
1475 bits >>= 1;
1476 }
f337b084 1477 else
a6ec74c1 1478 while (len-- > 0) {
08ca2aa3 1479 if (bits & 0x80) cuv++;
a6ec74c1
JH
1480 bits <<= 1;
1481 }
fc241834 1482 }
a6ec74c1
JH
1483 break;
1484 }
08ca2aa3
TH
1485
1486 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1487 SvPOK_on(sv);
1488 str = SvPVX(sv);
1489 if (datumtype == 'b') {
f337b084 1490 U8 bits = 0;
08ca2aa3
TH
1491 ai32 = len;
1492 for (len = 0; len < ai32; len++) {
1493 if (len & 7) bits >>= 1;
1494 else if (utf8) {
1495 if (s >= strend) break;
f337b084 1496 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1497 } else bits = *(U8 *) s++;
1498 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1499 }
08ca2aa3 1500 } else {
f337b084 1501 U8 bits = 0;
08ca2aa3
TH
1502 ai32 = len;
1503 for (len = 0; len < ai32; len++) {
1504 if (len & 7) bits <<= 1;
1505 else if (utf8) {
1506 if (s >= strend) break;
f337b084 1507 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1508 } else bits = *(U8 *) s++;
1509 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1510 }
1511 }
1512 *str = '\0';
08ca2aa3
TH
1513 SvCUR_set(sv, str - SvPVX(sv));
1514 XPUSHs(sv);
a6ec74c1 1515 break;
08ca2aa3 1516 }
a6ec74c1 1517 case 'H':
08ca2aa3
TH
1518 case 'h': {
1519 char *str;
fc241834 1520 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1521 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1522 len = (strend - s) * 2;
fc241834 1523 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1524 SvPOK_on(sv);
1525 str = SvPVX(sv);
1526 if (datumtype == 'h') {
f337b084 1527 U8 bits = 0;
fc241834
RGS
1528 ai32 = len;
1529 for (len = 0; len < ai32; len++) {
1530 if (len & 1) bits >>= 4;
1531 else if (utf8) {
1532 if (s >= strend) break;
f337b084 1533 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1534 } else bits = * (U8 *) s++;
a6ec74c1
JH
1535 *str++ = PL_hexdigit[bits & 15];
1536 }
08ca2aa3 1537 } else {
f337b084 1538 U8 bits = 0;
08ca2aa3
TH
1539 ai32 = len;
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 1) bits <<= 4;
1542 else if (utf8) {
1543 if (s >= strend) break;
f337b084 1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1545 } else bits = *(U8 *) s++;
a6ec74c1
JH
1546 *str++ = PL_hexdigit[(bits >> 4) & 15];
1547 }
1548 }
1549 *str = '\0';
08ca2aa3
TH
1550 SvCUR_set(sv, str - SvPVX(sv));
1551 XPUSHs(sv);
a6ec74c1 1552 break;
08ca2aa3 1553 }
a6ec74c1 1554 case 'c':
73cb7263 1555 while (len-- > 0) {
f337b084 1556 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1557 if (aint >= 128) /* fake up signed chars */
1558 aint -= 256;
08ca2aa3 1559 if (!checksum)
ac7f3b1b 1560 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1561 else if (checksum > bits_in_uv)
1562 cdouble += (NV)aint;
1563 else
1564 cuv += aint;
a6ec74c1
JH
1565 }
1566 break;
1567 case 'C':
08ca2aa3
TH
1568 case 'W':
1569 W_checksum:
35bcd338 1570 if (len == 0) {
fc241834 1571 if (explicit_length && datumtype == 'C')
08ca2aa3 1572 /* Switch to "character" mode */
f337b084 1573 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1574 break;
1575 }
fc241834 1576 if (datumtype == 'C' ?
f337b084
TH
1577 (symptr->flags & FLAG_DO_UTF8) &&
1578 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1579 while (len-- > 0 && s < strend) {
1580 UV val;
1581 STRLEN retlen;
f337b084
TH
1582 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1583 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1584 if (retlen == (STRLEN) -1 || retlen == 0)
1585 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1586 s += retlen;
1587 if (!checksum)
1588 PUSHs(sv_2mortal(newSVuv((UV) val)));
1589 else if (checksum > bits_in_uv)
1590 cdouble += (NV) val;
d6d3e8bd 1591 else
08ca2aa3 1592 cuv += val;
fc241834 1593 }
08ca2aa3 1594 } else if (!checksum)
a6ec74c1 1595 while (len-- > 0) {
08ca2aa3
TH
1596 U8 ch = *(U8 *) s++;
1597 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1598 }
08ca2aa3
TH
1599 else if (checksum > bits_in_uv)
1600 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1601 else
1602 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1603 break;
1604 case 'U':
35bcd338 1605 if (len == 0) {
08ca2aa3
TH
1606 if (explicit_length) {
1607 /* Switch to "bytes in UTF-8" mode */
f337b084 1608 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1609 else
1610 /* Should be impossible due to the need_utf8() test */
1611 Perl_croak(aTHX_ "U0 mode on a byte string");
1612 }
35bcd338
JH
1613 break;
1614 }
08ca2aa3 1615 if (len > strend - s) len = strend - s;
fc241834 1616 if (!checksum) {
08ca2aa3
TH
1617 if (len && unpack_only_one) len = 1;
1618 EXTEND(SP, len);
1619 EXTEND_MORTAL(len);
fc241834 1620 }
08ca2aa3
TH
1621 while (len-- > 0 && s < strend) {
1622 STRLEN retlen;
1623 UV auv;
1624 if (utf8) {
1625 U8 result[UTF8_MAXLEN];
1626 char *ptr;
1627 STRLEN len;
1628 ptr = s;
1629 /* Bug: warns about bad utf8 even if we are short on bytes
1630 and will break out of the loop */
230e1fce
NC
1631 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1632 'U'))
08ca2aa3
TH
1633 break;
1634 len = UTF8SKIP(result);
fc241834 1635 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1636 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1637 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1638 s = ptr;
1639 } else {
1640 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1641 if (retlen == (STRLEN) -1 || retlen == 0)
1642 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1643 s += retlen;
1644 }
1645 if (!checksum)
1646 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1647 else if (checksum > bits_in_uv)
08ca2aa3 1648 cdouble += (NV) auv;
73cb7263 1649 else
08ca2aa3 1650 cuv += auv;
a6ec74c1
JH
1651 }
1652 break;
49704364
LW
1653 case 's' | TYPE_IS_SHRIEKING:
1654#if SHORTSIZE != SIZE16
73cb7263 1655 while (len-- > 0) {
08ca2aa3 1656 short ashort;
f337b084
TH
1657 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1658 DO_BO_UNPACK(ashort, s);
08ca2aa3 1659 if (!checksum)
ac7f3b1b 1660 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1661 else if (checksum > bits_in_uv)
1662 cdouble += (NV)ashort;
1663 else
1664 cuv += ashort;
49704364
LW
1665 }
1666 break;
1667#else
1668 /* Fallthrough! */
a6ec74c1 1669#endif
49704364 1670 case 's':
73cb7263 1671 while (len-- > 0) {
08ca2aa3
TH
1672 I16 ai16;
1673
1674#if U16SIZE > SIZE16
1675 ai16 = 0;
1676#endif
f337b084 1677 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1678 DO_BO_UNPACK(ai16, 16);
1109a392 1679#if U16SIZE > SIZE16
73cb7263
NC
1680 if (ai16 > 32767)
1681 ai16 -= 65536;
a6ec74c1 1682#endif
08ca2aa3 1683 if (!checksum)
ac7f3b1b 1684 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1685 else if (checksum > bits_in_uv)
1686 cdouble += (NV)ai16;
1687 else
1688 cuv += ai16;
a6ec74c1
JH
1689 }
1690 break;
49704364
LW
1691 case 'S' | TYPE_IS_SHRIEKING:
1692#if SHORTSIZE != SIZE16
73cb7263 1693 while (len-- > 0) {
08ca2aa3 1694 unsigned short aushort;
f337b084
TH
1695 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1696 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1697 if (!checksum)
1698 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1699 else if (checksum > bits_in_uv)
1700 cdouble += (NV)aushort;
1701 else
1702 cuv += aushort;
49704364
LW
1703 }
1704 break;
1705#else
1706 /* Fallhrough! */
1707#endif
a6ec74c1
JH
1708 case 'v':
1709 case 'n':
1710 case 'S':
73cb7263 1711 while (len-- > 0) {
08ca2aa3
TH
1712 U16 au16;
1713#if U16SIZE > SIZE16
1714 au16 = 0;
1715#endif
f337b084 1716 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1717 DO_BO_UNPACK(au16, 16);
a6ec74c1 1718#ifdef HAS_NTOHS
73cb7263
NC
1719 if (datumtype == 'n')
1720 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1721#endif
1722#ifdef HAS_VTOHS
73cb7263
NC
1723 if (datumtype == 'v')
1724 au16 = vtohs(au16);
a6ec74c1 1725#endif
08ca2aa3
TH
1726 if (!checksum)
1727 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1728 else if (checksum > bits_in_uv)
f337b084 1729 cdouble += (NV) au16;
73cb7263
NC
1730 else
1731 cuv += au16;
a6ec74c1
JH
1732 }
1733 break;
7212898e 1734#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1735 case 'v' | TYPE_IS_SHRIEKING:
1736 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1737 while (len-- > 0) {
08ca2aa3
TH
1738 I16 ai16;
1739# if U16SIZE > SIZE16
1740 ai16 = 0;
1741# endif
f337b084 1742 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1743# ifdef HAS_NTOHS
73cb7263 1744 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1745 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1746# endif /* HAS_NTOHS */
1747# ifdef HAS_VTOHS
73cb7263 1748 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1749 ai16 = (I16) vtohs((U16) ai16);
1750# endif /* HAS_VTOHS */
1751 if (!checksum)
ac7f3b1b 1752 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1753 else if (checksum > bits_in_uv)
08ca2aa3 1754 cdouble += (NV) ai16;
73cb7263
NC
1755 else
1756 cuv += ai16;
068bd2e7
MHM
1757 }
1758 break;
08ca2aa3 1759#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1760 case 'i':
49704364 1761 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1762 while (len-- > 0) {
08ca2aa3 1763 int aint;
f337b084
TH
1764 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1765 DO_BO_UNPACK(aint, i);
08ca2aa3 1766 if (!checksum)
ac7f3b1b 1767 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1768 else if (checksum > bits_in_uv)
1769 cdouble += (NV)aint;
1770 else
1771 cuv += aint;
a6ec74c1
JH
1772 }
1773 break;
1774 case 'I':
49704364 1775 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1776 while (len-- > 0) {
08ca2aa3 1777 unsigned int auint;
f337b084
TH
1778 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1779 DO_BO_UNPACK(auint, i);
08ca2aa3 1780 if (!checksum)
ac7f3b1b 1781 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1782 else if (checksum > bits_in_uv)
1783 cdouble += (NV)auint;
1784 else
1785 cuv += auint;
a6ec74c1
JH
1786 }
1787 break;
92d41999 1788 case 'j':
73cb7263 1789 while (len-- > 0) {
08ca2aa3 1790 IV aiv;
f337b084 1791 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1792#if IVSIZE == INTSIZE
f337b084 1793 DO_BO_UNPACK(aiv, i);
1109a392 1794#elif IVSIZE == LONGSIZE
f337b084 1795 DO_BO_UNPACK(aiv, l);
1109a392 1796#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1797 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1798#else
1799 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1800#endif
08ca2aa3 1801 if (!checksum)
ac7f3b1b 1802 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1803 else if (checksum > bits_in_uv)
1804 cdouble += (NV)aiv;
1805 else
1806 cuv += aiv;
92d41999
JH
1807 }
1808 break;
1809 case 'J':
73cb7263 1810 while (len-- > 0) {
08ca2aa3 1811 UV auv;
f337b084 1812 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1813#if IVSIZE == INTSIZE
f337b084 1814 DO_BO_UNPACK(auv, i);
08ca2aa3 1815#elif IVSIZE == LONGSIZE
f337b084 1816 DO_BO_UNPACK(auv, l);
08ca2aa3 1817#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1818 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1819#else
1820 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1821#endif
08ca2aa3 1822 if (!checksum)
ac7f3b1b 1823 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1824 else if (checksum > bits_in_uv)
1825 cdouble += (NV)auv;
1826 else
1827 cuv += auv;
92d41999
JH
1828 }
1829 break;
49704364
LW
1830 case 'l' | TYPE_IS_SHRIEKING:
1831#if LONGSIZE != SIZE32
73cb7263 1832 while (len-- > 0) {
08ca2aa3 1833 long along;
f337b084
TH
1834 SHIFT_VAR(utf8, s, strend, along, datumtype);
1835 DO_BO_UNPACK(along, l);
08ca2aa3 1836 if (!checksum)
ac7f3b1b 1837 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1838 else if (checksum > bits_in_uv)
1839 cdouble += (NV)along;
1840 else
1841 cuv += along;
49704364
LW
1842 }
1843 break;
1844#else
1845 /* Fallthrough! */
a6ec74c1 1846#endif
49704364 1847 case 'l':
73cb7263 1848 while (len-- > 0) {
08ca2aa3
TH
1849 I32 ai32;
1850#if U32SIZE > SIZE32
1851 ai32 = 0;
1852#endif
f337b084 1853 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1854 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1855#if U32SIZE > SIZE32
08ca2aa3 1856 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1857#endif
08ca2aa3 1858 if (!checksum)
ac7f3b1b 1859 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1860 else if (checksum > bits_in_uv)
1861 cdouble += (NV)ai32;
1862 else
1863 cuv += ai32;
a6ec74c1
JH
1864 }
1865 break;
49704364
LW
1866 case 'L' | TYPE_IS_SHRIEKING:
1867#if LONGSIZE != SIZE32
73cb7263 1868 while (len-- > 0) {
08ca2aa3 1869 unsigned long aulong;
f337b084
TH
1870 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1871 DO_BO_UNPACK(aulong, l);
08ca2aa3 1872 if (!checksum)
ac7f3b1b 1873 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1874 else if (checksum > bits_in_uv)
1875 cdouble += (NV)aulong;
1876 else
1877 cuv += aulong;
49704364
LW
1878 }
1879 break;
1880#else
1881 /* Fall through! */
1882#endif
a6ec74c1
JH
1883 case 'V':
1884 case 'N':
1885 case 'L':
73cb7263 1886 while (len-- > 0) {
08ca2aa3
TH
1887 U32 au32;
1888#if U32SIZE > SIZE32
1889 au32 = 0;
1890#endif
f337b084 1891 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1892 DO_BO_UNPACK(au32, 32);
a6ec74c1 1893#ifdef HAS_NTOHL
73cb7263
NC
1894 if (datumtype == 'N')
1895 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1896#endif
1897#ifdef HAS_VTOHL
73cb7263
NC
1898 if (datumtype == 'V')
1899 au32 = vtohl(au32);
a6ec74c1 1900#endif
08ca2aa3 1901 if (!checksum)
fc241834
RGS
1902 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)au32;
1905 else
1906 cuv += au32;
a6ec74c1
JH
1907 }
1908 break;
7212898e 1909#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1910 case 'V' | TYPE_IS_SHRIEKING:
1911 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1912 while (len-- > 0) {
08ca2aa3
TH
1913 I32 ai32;
1914# if U32SIZE > SIZE32
1915 ai32 = 0;
1916# endif
f337b084 1917 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1918# ifdef HAS_NTOHL
73cb7263
NC
1919 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1920 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1921# endif
1922# ifdef HAS_VTOHL
73cb7263
NC
1923 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1924 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1925# endif
1926 if (!checksum)
ac7f3b1b 1927 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1928 else if (checksum > bits_in_uv)
1929 cdouble += (NV)ai32;
1930 else
1931 cuv += ai32;
068bd2e7
MHM
1932 }
1933 break;
08ca2aa3 1934#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1935 case 'p':
a6ec74c1 1936 while (len-- > 0) {
08ca2aa3 1937 char *aptr;
f337b084 1938 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
5512a2f9 1939 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1940 /* newSVpv generates undef if aptr is NULL */
1941 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1942 }
1943 break;
1944 case 'w':
a6ec74c1
JH
1945 {
1946 UV auv = 0;
1947 U32 bytes = 0;
fc241834 1948
08ca2aa3
TH
1949 while (len > 0 && s < strend) {
1950 U8 ch;
f337b084 1951 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1952 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1953 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1954 if (ch < 0x80) {
a6ec74c1 1955 bytes = 0;
ac7f3b1b 1956 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1957 len--;
1958 auv = 0;
08ca2aa3 1959 continue;
a6ec74c1 1960 }
08ca2aa3 1961 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1962 char *t;
1963 STRLEN n_a;
1964
1965 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1966 while (s < strend) {
f337b084 1967 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1968 sv = mul128(sv, (U8)(ch & 0x7f));
1969 if (!(ch & 0x80)) {
a6ec74c1
JH
1970 bytes = 0;
1971 break;
1972 }
1973 }
1974 t = SvPV(sv, n_a);
1975 while (*t == '0')
1976 t++;
1977 sv_chop(sv, t);
1978 PUSHs(sv_2mortal(sv));
1979 len--;
1980 auv = 0;
1981 }
1982 }
1983 if ((s >= strend) && bytes)
49704364 1984 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1985 }
1986 break;
1987 case 'P':
49704364
LW
1988 if (symptr->howlen == e_star)
1989 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1990 EXTEND(SP, 1);
08ca2aa3
TH
1991 if (sizeof(char*) <= strend - s) {
1992 char *aptr;
f337b084 1993 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
08ca2aa3 1994 DO_BO_UNPACK_P(aptr);
fc241834
RGS
1995 /* newSVpvn generates undef if aptr is NULL */
1996 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1997 }
a6ec74c1
JH
1998 break;
1999#ifdef HAS_QUAD
2000 case 'q':
73cb7263 2001 while (len-- > 0) {
08ca2aa3 2002 Quad_t aquad;
f337b084
TH
2003 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2004 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2005 if (!checksum)
2006 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2007 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2008 else if (checksum > bits_in_uv)
2009 cdouble += (NV)aquad;
2010 else
2011 cuv += aquad;
2012 }
a6ec74c1
JH
2013 break;
2014 case 'Q':
73cb7263 2015 while (len-- > 0) {
08ca2aa3 2016 Uquad_t auquad;
f337b084
TH
2017 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2018 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2019 if (!checksum)
2020 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2021 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2022 else if (checksum > bits_in_uv)
2023 cdouble += (NV)auquad;
2024 else
2025 cuv += auquad;
a6ec74c1
JH
2026 }
2027 break;
08ca2aa3 2028#endif /* HAS_QUAD */
a6ec74c1
JH
2029 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2030 case 'f':
73cb7263 2031 while (len-- > 0) {
08ca2aa3 2032 float afloat;
f337b084 2033 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2034 DO_BO_UNPACK_N(afloat, float);
2035 if (!checksum)
2036 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2037 else
73cb7263 2038 cdouble += afloat;
fc241834 2039 }
a6ec74c1
JH
2040 break;
2041 case 'd':
73cb7263 2042 while (len-- > 0) {
08ca2aa3 2043 double adouble;
f337b084 2044 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2045 DO_BO_UNPACK_N(adouble, double);
2046 if (!checksum)
2047 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2048 else
73cb7263 2049 cdouble += adouble;
fc241834 2050 }
a6ec74c1 2051 break;
92d41999 2052 case 'F':
73cb7263 2053 while (len-- > 0) {
08ca2aa3 2054 NV anv;
f337b084 2055 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2056 DO_BO_UNPACK_N(anv, NV);
2057 if (!checksum)
2058 PUSHs(sv_2mortal(newSVnv(anv)));
2059 else
73cb7263 2060 cdouble += anv;
fc241834 2061 }
92d41999
JH
2062 break;
2063#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2064 case 'D':
73cb7263 2065 while (len-- > 0) {
08ca2aa3 2066 long double aldouble;
f337b084 2067 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2068 DO_BO_UNPACK_N(aldouble, long double);
2069 if (!checksum)
2070 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2071 else
2072 cdouble += aldouble;
92d41999
JH
2073 }
2074 break;
2075#endif
a6ec74c1
JH
2076 case 'u':
2077 /* MKS:
2078 * Initialise the decode mapping. By using a table driven
2079 * algorithm, the code will be character-set independent
2080 * (and just as fast as doing character arithmetic)
2081 */
2082 if (PL_uudmap['M'] == 0) {
2083 int i;
2084
2085 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2086 PL_uudmap[(U8)PL_uuemap[i]] = i;
2087 /*
2088 * Because ' ' and '`' map to the same value,
2089 * we need to decode them both the same.
2090 */
2091 PL_uudmap[' '] = 0;
2092 }
08ca2aa3
TH
2093 {
2094 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2095 sv = sv_2mortal(NEWSV(42, l));
2096 if (l) SvPOK_on(sv);
2097 }
2098 if (utf8) {
2099 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2100 I32 a, b, c, d;
2101 char hunk[4];
2102
2103 hunk[3] = '\0';
2104 while (len > 0) {
2105 next_uni_uu(aTHX_ &s, strend, &a);
2106 next_uni_uu(aTHX_ &s, strend, &b);
2107 next_uni_uu(aTHX_ &s, strend, &c);
2108 next_uni_uu(aTHX_ &s, strend, &d);
2109 hunk[0] = (char)((a << 2) | (b >> 4));
2110 hunk[1] = (char)((b << 4) | (c >> 2));
2111 hunk[2] = (char)((c << 6) | d);
2112 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2113 len -= 3;
2114 }
2115 if (s < strend) {
2116 if (*s == '\n') s++;
2117 else {
2118 /* possible checksum byte */
2119 char *skip = s+UTF8SKIP(s);
2120 if (skip < strend && *skip == '\n') s = skip+1;
2121 }
2122 }
2123 }
2124 } else {
fc241834
RGS
2125 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2126 I32 a, b, c, d;
2127 char hunk[4];
a6ec74c1 2128
fc241834
RGS
2129 hunk[3] = '\0';
2130 len = PL_uudmap[*(U8*)s++] & 077;
2131 while (len > 0) {
2132 if (s < strend && ISUUCHAR(*s))
2133 a = PL_uudmap[*(U8*)s++] & 077;
2134 else
2135 a = 0;
2136 if (s < strend && ISUUCHAR(*s))
2137 b = PL_uudmap[*(U8*)s++] & 077;
2138 else
2139 b = 0;
2140 if (s < strend && ISUUCHAR(*s))
2141 c = PL_uudmap[*(U8*)s++] & 077;
2142 else
2143 c = 0;
2144 if (s < strend && ISUUCHAR(*s))
2145 d = PL_uudmap[*(U8*)s++] & 077;
2146 else
2147 d = 0;
2148 hunk[0] = (char)((a << 2) | (b >> 4));
2149 hunk[1] = (char)((b << 4) | (c >> 2));
2150 hunk[2] = (char)((c << 6) | d);
2151 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2152 len -= 3;
2153 }
2154 if (*s == '\n')
2155 s++;
2156 else /* possible checksum byte */
2157 if (s + 1 < strend && s[1] == '\n')
2158 s += 2;
a6ec74c1 2159 }
08ca2aa3
TH
2160 }
2161 XPUSHs(sv);
a6ec74c1
JH
2162 break;
2163 }
49704364 2164
a6ec74c1 2165 if (checksum) {
1109a392 2166 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2167 (checksum > bits_in_uv &&
08ca2aa3
TH
2168 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2169 NV trouble, anv;
a6ec74c1 2170
08ca2aa3 2171 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2172 while (checksum >= 16) {
2173 checksum -= 16;
08ca2aa3 2174 anv *= 65536.0;
a6ec74c1 2175 }
a6ec74c1 2176 while (cdouble < 0.0)
08ca2aa3
TH
2177 cdouble += anv;
2178 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2179 sv = newSVnv(cdouble);
a6ec74c1
JH
2180 }
2181 else {
fa8ec7c1
NC
2182 if (checksum < bits_in_uv) {
2183 UV mask = ((UV)1 << checksum) - 1;
92d41999 2184 cuv &= mask;
a6ec74c1 2185 }
c4c5f44a 2186 sv = newSVuv(cuv);
a6ec74c1
JH
2187 }
2188 XPUSHs(sv_2mortal(sv));
2189 checksum = 0;
2190 }
fc241834 2191
49704364
LW
2192 if (symptr->flags & FLAG_SLASH){
2193 if (SP - PL_stack_base - start_sp_offset <= 0)
2194 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2195 if( next_symbol(symptr) ){
2196 if( symptr->howlen == e_number )
2197 Perl_croak(aTHX_ "Count after length/code in unpack" );
2198 if( beyond ){
2199 /* ...end of char buffer then no decent length available */
2200 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2201 } else {
2202 /* take top of stack (hope it's numeric) */
2203 len = POPi;
2204 if( len < 0 )
2205 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2206 }
2207 } else {
2208 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2209 }
2210 datumtype = symptr->code;
21c16052 2211 explicit_length = FALSE;
49704364
LW
2212 goto redo_switch;
2213 }
a6ec74c1 2214 }
49704364 2215
18529408
IZ
2216 if (new_s)
2217 *new_s = s;
2218 PUTBACK;
2219 return SP - PL_stack_base - start_sp_offset;
2220}
2221
2222PP(pp_unpack)
2223{
2224 dSP;
bab9c0ac 2225 dPOPPOPssrl;
18529408
IZ
2226 I32 gimme = GIMME_V;
2227 STRLEN llen;
2228 STRLEN rlen;
08ca2aa3
TH
2229 char *pat = SvPV(left, llen);
2230 char *s = SvPV(right, rlen);
18529408 2231 char *strend = s + rlen;
08ca2aa3
TH
2232 char *patend = pat + llen;
2233 I32 cnt;
18529408
IZ
2234
2235 PUTBACK;
7accc089 2236 cnt = unpackstring(pat, patend, s, strend,
49704364 2237 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2238 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2239
18529408
IZ
2240 SPAGAIN;
2241 if ( !cnt && gimme == G_SCALAR )
2242 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2243 RETURN;
2244}
2245
f337b084
TH
2246STATIC U8 *
2247doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2248{
f337b084 2249 *h++ = PL_uuemap[len];
a6ec74c1 2250 while (len > 2) {
f337b084
TH
2251 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2252 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2253 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2254 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2255 s += 3;
2256 len -= 3;
2257 }
2258 if (len > 0) {
2259 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2260 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2261 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2262 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2263 *h++ = PL_uuemap[0];
a6ec74c1 2264 }
f337b084
TH
2265 *h++ = '\n';
2266 return h;
a6ec74c1
JH
2267}
2268
2269STATIC SV *
2270S_is_an_int(pTHX_ char *s, STRLEN l)
2271{
2272 STRLEN n_a;
2273 SV *result = newSVpvn(s, l);
2274 char *result_c = SvPV(result, n_a); /* convenience */
2275 char *out = result_c;
2276 bool skip = 1;
2277 bool ignore = 0;
2278
2279 while (*s) {
2280 switch (*s) {
2281 case ' ':
2282 break;
2283 case '+':
2284 if (!skip) {
2285 SvREFCNT_dec(result);
2286 return (NULL);
2287 }
2288 break;
2289 case '0':
2290 case '1':
2291 case '2':
2292 case '3':
2293 case '4':
2294 case '5':
2295 case '6':
2296 case '7':
2297 case '8':
2298 case '9':
2299 skip = 0;
2300 if (!ignore) {
2301 *(out++) = *s;
2302 }
2303 break;
2304 case '.':
2305 ignore = 1;
2306 break;
2307 default:
2308 SvREFCNT_dec(result);
2309 return (NULL);
2310 }
2311 s++;
2312 }
2313 *(out++) = '\0';
2314 SvCUR_set(result, out - result_c);
2315 return (result);
2316}
2317
2318/* pnum must be '\0' terminated */
2319STATIC int
2320S_div128(pTHX_ SV *pnum, bool *done)
2321{
2322 STRLEN len;
2323 char *s = SvPV(pnum, len);
2324 int m = 0;
2325 int r = 0;
2326 char *t = s;
2327
2328 *done = 1;
2329 while (*t) {
2330 int i;
2331
2332 i = m * 10 + (*t - '0');
2333 m = i & 0x7F;
2334 r = (i >> 7); /* r < 10 */
2335 if (r) {
2336 *done = 0;
2337 }
2338 *(t++) = '0' + r;
2339 }
2340 *(t++) = '\0';
2341 SvCUR_set(pnum, (STRLEN) (t - s));
2342 return (m);
2343}
2344
49704364 2345
a6ec74c1 2346
18529408
IZ
2347/*
2348=for apidoc pack_cat
2349
7accc089
JH
2350The engine implementing pack() Perl function. Note: parameters next_in_list and
2351flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2352
2353=cut */
2354
49704364 2355
18529408
IZ
2356void
2357Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2358{
49704364
LW
2359 tempsym_t sym = { 0 };
2360 sym.patptr = pat;
2361 sym.patend = patend;
7accc089
JH
2362 sym.flags = FLAG_PACK;
2363
2364 (void)pack_rec( cat, &sym, beglist, endlist );
2365}
2366
2367
2368/*
2369=for apidoc packlist
2370
2371The engine implementing pack() Perl function.
2372
2373=cut */
2374
2375
2376void
2377Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2378{
f337b084 2379 STRLEN no_len;
7accc089 2380 tempsym_t sym = { 0 };
f337b084 2381
7accc089
JH
2382 sym.patptr = pat;
2383 sym.patend = patend;
2384 sym.flags = FLAG_PACK;
49704364 2385
f337b084
TH
2386 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2387 Also make sure any UTF8 flag is loaded */
2388 SvPV_force(cat, no_len);
2389 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2390
49704364
LW
2391 (void)pack_rec( cat, &sym, beglist, endlist );
2392}
2393
f337b084
TH
2394/* like sv_utf8_upgrade, but also repoint the group start markers */
2395STATIC void
2396marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2397 STRLEN len;
2398 tempsym_t *group;
2399 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2400
2401 if (SvUTF8(sv)) return;
2402
2403 from_start = SvPVX(sv);
2404 from_end = from_start + SvCUR(sv);
2405 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2406 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2407 if (from_ptr == from_end) {
2408 /* Simple case: no character needs to be changed */
2409 SvUTF8_on(sv);
2410 return;
2411 }
2412
3473cf63 2413 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2414 New('U', to_start, len, char);
2415 Copy(from_start, to_start, from_ptr-from_start, char);
2416 to_ptr = to_start + (from_ptr-from_start);
2417
2418 New('U', marks, sym_ptr->level+2, char *);
2419 for (group=sym_ptr; group; group = group->previous)
2420 marks[group->level] = from_start + group->strbeg;
2421 marks[sym_ptr->level+1] = from_end+1;
2422 for (m = marks; *m < from_ptr; m++)
2423 *m = to_start + (*m-from_start);
2424
2425 for (;from_ptr < from_end; from_ptr++) {
2426 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2427 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2428 }
2429 *to_ptr = 0;
2430
2431 while (*m == from_ptr) *m++ = to_ptr;
2432 if (m != marks + sym_ptr->level+1) {
2433 Safefree(marks);
2434 Safefree(to_start);
2435 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2436 }
2437 for (group=sym_ptr; group; group = group->previous)
2438 group->strbeg = marks[group->level] - to_start;
2439 Safefree(marks);
2440
2441 if (SvOOK(sv)) {
2442 if (SvIVX(sv)) {
2443 SvLEN(sv) += SvIVX(sv);
2444 from_start -= SvIVX(sv);
2445 SvIV_set(sv, 0);
2446 }
2447 SvFLAGS(sv) &= ~SVf_OOK;
2448 }
2449 if (SvLEN(sv) != 0)
2450 Safefree(from_start);
2451 SvPVX(sv) = to_start;
2452 SvCUR(sv) = to_ptr - to_start;
2453 SvLEN(sv) = len;
2454 SvUTF8_on(sv);
2455}
2456
2457/* Exponential string grower. Makes string extension effectively O(n)
2458 needed says how many extra bytes we need (not counting the final '\0')
2459 Only grows the string if there is an actual lack of space
2460*/
2461STATIC char *
2462sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2463 STRLEN cur = SvCUR(sv);
2464 STRLEN len = SvLEN(sv);
2465 STRLEN extend;
2466 if (len - cur > needed) return SvPVX(sv);
2467 extend = needed > len ? needed : len;
2468 return SvGROW(sv, len+extend+1);
2469}
49704364
LW
2470
2471STATIC
2472SV **
f337b084 2473S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2474{
49704364 2475 tempsym_t lookahead;
f337b084
TH
2476 I32 items = endlist - beglist;
2477 bool found = next_symbol(symptr);
2478 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2479
2480 if (symptr->level == 0 && found && symptr->code == 'U') {
2481 marked_upgrade(aTHX_ cat, symptr);
2482 symptr->flags |= FLAG_DO_UTF8;
2483 utf8 = 0;
49704364 2484 }
f337b084 2485 symptr->strbeg = SvCUR(cat);
49704364
LW
2486
2487 while (found) {
f337b084
TH
2488 SV *fromstr;
2489 STRLEN fromlen;
2490 I32 len;
a6ec74c1 2491 SV *lengthcode = Nullsv;
49704364 2492 I32 datumtype = symptr->code;
f337b084
TH
2493 howlen_t howlen = symptr->howlen;
2494 char *start = SvPVX(cat);
2495 char *cur = start + SvCUR(cat);
49704364 2496
f337b084
TH
2497#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2498
2499 switch (howlen) {
fc241834 2500 case e_star:
f337b084
TH
2501 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2502 0 : items;
2503 break;
2504 default:
2505 /* e_no_len and e_number */
2506 len = symptr->length;
49704364
LW
2507 break;
2508 }
2509
f337b084 2510 if (len) {
a7a3cfaa 2511 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2512
a7a3cfaa
TH
2513 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2514 /* We can process this letter. */
2515 STRLEN size = props & PACK_SIZE_MASK;
2516 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2517 }
f337b084
TH
2518 }
2519
49704364
LW
2520 /* Look ahead for next symbol. Do we have code/code? */
2521 lookahead = *symptr;
2522 found = next_symbol(&lookahead);
246f24af
TH
2523 if (symptr->flags & FLAG_SLASH) {
2524 IV count;
f337b084 2525 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2526 if (strchr("aAZ", lookahead.code)) {
2527 if (lookahead.howlen == e_number) count = lookahead.length;
2528 else {
2529 if (items > 0)
2530 count = DO_UTF8(*beglist) ?
2531 sv_len_utf8(*beglist) : sv_len(*beglist);
2532 else count = 0;
2533 if (lookahead.code == 'Z') count++;
2534 }
2535 } else {
2536 if (lookahead.howlen == e_number && lookahead.length < items)
2537 count = lookahead.length;
2538 else count = items;
2539 }
2540 lookahead.howlen = e_number;
2541 lookahead.length = count;
2542 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2543 }
49704364 2544
fc241834
RGS
2545 /* Code inside the switch must take care to properly update
2546 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2547 doesn't simply leave using break */
1109a392 2548 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2549 default:
f337b084
TH
2550 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2551 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2552 case '%':
49704364 2553 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2554 {
2555 char *from;
2556#ifdef PERL_PACK_CAN_SHRIEKSIGN
2557 case '.' | TYPE_IS_SHRIEKING:
2558#endif
2559 case '.':
2560 if (howlen == e_star) from = start;
2561 else if (len == 0) from = cur;
2562 else {
2563 tempsym_t *group = symptr;
2564
2565 while (--len && group) group = group->previous;
2566 from = group ? start + group->strbeg : start;
2567 }
2568 fromstr = NEXTFROM;
2569 len = SvIV(fromstr);
2570 goto resize;
2571#ifdef PERL_PACK_CAN_SHRIEKSIGN
2572 case '@' | TYPE_IS_SHRIEKING:
2573#endif
a6ec74c1 2574 case '@':
28be1210
TH
2575 from = start + symptr->strbeg;
2576 resize:
2577#ifdef PERL_PACK_CAN_SHRIEKSIGN
2578 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2579#else /* PERL_PACK_CAN_SHRIEKSIGN */
2580 if (utf8)
2581#endif
2582 if (len >= 0) {
2583 while (len && from < cur) {
2584 from += UTF8SKIP(from);
2585 len--;
2586 }
2587 if (from > cur)
2588 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2589 if (len) {
2590 /* Here we know from == cur */
2591 grow:
2592 GROWING(0, cat, start, cur, len);
2593 Zero(cur, len, char);
2594 cur += len;
2595 } else if (from < cur) {
2596 len = cur - from;
2597 goto shrink;
2598 } else goto no_change;
2599 } else {
2600 cur = from;
2601 len = -len;
2602 goto utf8_shrink;
f337b084 2603 }
28be1210
TH
2604 else {
2605 len -= cur - from;
f337b084 2606 if (len > 0) goto grow;
28be1210 2607 if (len == 0) goto no_change;
fc241834 2608 len = -len;
28be1210 2609 goto shrink;
f337b084 2610 }
a6ec74c1 2611 break;
28be1210 2612 }
fc241834 2613 case '(': {
49704364 2614 tempsym_t savsym = *symptr;
66c611c5
MHM
2615 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2616 symptr->flags |= group_modifiers;
49704364
LW
2617 symptr->patend = savsym.grpend;
2618 symptr->level++;
f337b084 2619 symptr->previous = &lookahead;
18529408 2620 while (len--) {
f337b084
TH
2621 U32 was_utf8;
2622 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2623 else symptr->flags &= ~FLAG_PARSE_UTF8;
2624 was_utf8 = SvUTF8(cat);
49704364 2625 symptr->patptr = savsym.grpbeg;
f337b084
TH
2626 beglist = pack_rec(cat, symptr, beglist, endlist);
2627 if (SvUTF8(cat) != was_utf8)
2628 /* This had better be an upgrade while in utf8==0 mode */
2629 utf8 = 1;
2630
49704364 2631 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2632 break; /* No way to continue */
2633 }
f337b084
TH
2634 lookahead.flags = symptr->flags & ~group_modifiers;
2635 goto no_change;
18529408 2636 }
62f95557
IZ
2637 case 'X' | TYPE_IS_SHRIEKING:
2638 if (!len) /* Avoid division by 0 */
2639 len = 1;
f337b084
TH
2640 if (utf8) {
2641 char *hop, *last;
2642 I32 l = len;
2643 hop = last = start;
2644 while (hop < cur) {
2645 hop += UTF8SKIP(hop);
2646 if (--l == 0) {
2647 last = hop;
2648 l = len;
2649 }
2650 }
2651 if (last > cur)
2652 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2653 cur = last;
2654 break;
2655 }
2656 len = (cur-start) % len;
62f95557 2657 /* FALL THROUGH */
a6ec74c1 2658 case 'X':
f337b084
TH
2659 if (utf8) {
2660 if (len < 1) goto no_change;
28be1210 2661 utf8_shrink:
f337b084
TH
2662 while (len > 0) {
2663 if (cur <= start)
28be1210
TH
2664 Perl_croak(aTHX_ "'%c' outside of string in pack",
2665 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2666 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2667 if (cur <= start)
28be1210
TH
2668 Perl_croak(aTHX_ "'%c' outside of string in pack",
2669 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2670 }
2671 len--;
2672 }
2673 } else {
fc241834 2674 shrink:
f337b084 2675 if (cur - start < len)
28be1210
TH
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2678 cur -= len;
2679 }
2680 if (cur < start+symptr->strbeg) {
2681 /* Make sure group starts don't point into the void */
2682 tempsym_t *group;
2683 STRLEN length = cur-start;
2684 for (group = symptr;
2685 group && length < group->strbeg;
2686 group = group->previous) group->strbeg = length;
2687 lookahead.strbeg = length;
2688 }
a6ec74c1 2689 break;
fc241834
RGS
2690 case 'x' | TYPE_IS_SHRIEKING: {
2691 I32 ai32;
62f95557
IZ
2692 if (!len) /* Avoid division by 0 */
2693 len = 1;
230e1fce 2694 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2695 else ai32 = (cur - start) % len;
2696 if (ai32 == 0) goto no_change;
2697 len -= ai32;
2698 }
2699 /* FALL THROUGH */
a6ec74c1 2700 case 'x':
f337b084 2701 goto grow;
a6ec74c1
JH
2702 case 'A':
2703 case 'Z':
f337b084
TH
2704 case 'a': {
2705 char *aptr;
2706
a6ec74c1
JH
2707 fromstr = NEXTFROM;
2708 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2709 if (DO_UTF8(fromstr)) {
2710 char *end, *s;
2711
2712 if (!utf8 && !SvUTF8(cat)) {
2713 marked_upgrade(aTHX_ cat, symptr);
2714 lookahead.flags |= FLAG_DO_UTF8;
2715 lookahead.strbeg = symptr->strbeg;
2716 utf8 = 1;
2717 start = SvPVX(cat);
2718 cur = start + SvCUR(cat);
2719 }
fc241834 2720 if (howlen == e_star) {
f337b084
TH
2721 if (utf8) goto string_copy;
2722 len = fromlen+1;
2723 }
2724 s = aptr;
2725 end = aptr + fromlen;
2726 fromlen = datumtype == 'Z' ? len-1 : len;
2727 while ((I32) fromlen > 0 && s < end) {
2728 s += UTF8SKIP(s);
2729 fromlen--;
2730 }
2731 if (s > end)
2732 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2733 if (utf8) {
fc241834 2734 len = fromlen;
f337b084
TH
2735 if (datumtype == 'Z') len++;
2736 fromlen = s-aptr;
2737 len += fromlen;
fc241834 2738
f337b084 2739 goto string_copy;
fc241834 2740 }
f337b084
TH
2741 fromlen = len - fromlen;
2742 if (datumtype == 'Z') fromlen--;
2743 if (howlen == e_star) {
2744 len = fromlen;
2745 if (datumtype == 'Z') len++;
fc241834 2746 }
f337b084 2747 GROWING(0, cat, start, cur, len);
fc241834 2748 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2749 datumtype | TYPE_IS_PACK))
2750 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2751 cur += fromlen;
a6ec74c1 2752 len -= fromlen;
f337b084
TH
2753 } else if (utf8) {
2754 if (howlen == e_star) {
2755 len = fromlen;
2756 if (datumtype == 'Z') len++;
a6ec74c1 2757 }
f337b084
TH
2758 if (len <= (I32) fromlen) {
2759 fromlen = len;
2760 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2761 }
fc241834 2762 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2763 upgrade, so:
2764 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2765 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2766 len -= fromlen;
2767 while (fromlen > 0) {
230e1fce 2768 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2769 aptr++;
2770 fromlen--;
fc241834 2771 }
f337b084
TH
2772 } else {
2773 string_copy:
2774 if (howlen == e_star) {
2775 len = fromlen;
2776 if (datumtype == 'Z') len++;
2777 }
2778 if (len <= (I32) fromlen) {
2779 fromlen = len;
2780 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2781 }
f337b084
TH
2782 GROWING(0, cat, start, cur, len);
2783 Copy(aptr, cur, fromlen, char);
2784 cur += fromlen;
2785 len -= fromlen;
a6ec74c1 2786 }
f337b084
TH
2787 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2788 cur += len;
a6ec74c1 2789 break;
f337b084 2790 }
a6ec74c1 2791 case 'B':
f337b084
TH
2792 case 'b': {
2793 char *str, *end;
2794 I32 l, field_len;
2795 U8 bits;
2796 bool utf8_source;
2797 U32 utf8_flags;
a6ec74c1 2798
fc241834
RGS
2799 fromstr = NEXTFROM;
2800 str = SvPV(fromstr, fromlen);
f337b084
TH
2801 end = str + fromlen;
2802 if (DO_UTF8(fromstr)) {
2803 utf8_source = TRUE;
2804 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2805 } else {
2806 utf8_source = FALSE;
2807 utf8_flags = 0; /* Unused, but keep compilers happy */
2808 }
2809 if (howlen == e_star) len = fromlen;
2810 field_len = (len+7)/8;
2811 GROWING(utf8, cat, start, cur, field_len);
2812 if (len > (I32)fromlen) len = fromlen;
2813 bits = 0;
2814 l = 0;
2815 if (datumtype == 'B')
2816 while (l++ < len) {
2817 if (utf8_source) {
2818 UV val;
2819 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2820 bits |= val & 1;
2821 } else bits |= *str++ & 1;
2822 if (l & 7) bits <<= 1;
fc241834 2823 else {
f337b084
TH
2824 PUSH_BYTE(utf8, cur, bits);
2825 bits = 0;
a6ec74c1
JH
2826 }
2827 }
f337b084
TH
2828 else
2829 /* datumtype == 'b' */
2830 while (l++ < len) {
2831 if (utf8_source) {
2832 UV val;
2833 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2834 if (val & 1) bits |= 0x80;
2835 } else if (*str++ & 1)
2836 bits |= 0x80;
2837 if (l & 7) bits >>= 1;
fc241834 2838 else {
f337b084
TH
2839 PUSH_BYTE(utf8, cur, bits);
2840 bits = 0;
a6ec74c1
JH
2841 }
2842 }
f337b084
TH
2843 l--;
2844 if (l & 7) {
fc241834 2845 if (datumtype == 'B')
f337b084 2846 bits <<= 7 - (l & 7);
fc241834 2847 else
f337b084
TH
2848 bits >>= 7 - (l & 7);
2849 PUSH_BYTE(utf8, cur, bits);
2850 l += 7;
a6ec74c1 2851 }
f337b084
TH
2852 /* Determine how many chars are left in the requested field */
2853 l /= 8;
2854 if (howlen == e_star) field_len = 0;
2855 else field_len -= l;
2856 Zero(cur, field_len, char);
2857 cur += field_len;
a6ec74c1 2858 break;
f337b084 2859 }
a6ec74c1 2860 case 'H':
f337b084
TH
2861 case 'h': {
2862 char *str, *end;
2863 I32 l, field_len;
2864 U8 bits;
2865 bool utf8_source;
2866 U32 utf8_flags;
a6ec74c1 2867
fc241834
RGS
2868 fromstr = NEXTFROM;
2869 str = SvPV(fromstr, fromlen);
f337b084
TH
2870 end = str + fromlen;
2871 if (DO_UTF8(fromstr)) {
2872 utf8_source = TRUE;
2873 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2874 } else {
2875 utf8_source = FALSE;
2876 utf8_flags = 0; /* Unused, but keep compilers happy */
2877 }
2878 if (howlen == e_star) len = fromlen;
2879 field_len = (len+1)/2;
2880 GROWING(utf8, cat, start, cur, field_len);
2881 if (!utf8 && len > (I32)fromlen) len = fromlen;
2882 bits = 0;
2883 l = 0;
2884 if (datumtype == 'H')
2885 while (l++ < len) {
2886 if (utf8_source) {
2887 UV val;
2888 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2889 if (val < 256 && isALPHA(val))
2890 bits |= (val + 9) & 0xf;
a6ec74c1 2891 else
f337b084
TH
2892 bits |= val & 0xf;
2893 } else if (isALPHA(*str))
2894 bits |= (*str++ + 9) & 0xf;
2895 else
2896 bits |= *str++ & 0xf;
2897 if (l & 1) bits <<= 4;
fc241834 2898 else {
f337b084
TH
2899 PUSH_BYTE(utf8, cur, bits);
2900 bits = 0;
a6ec74c1
JH
2901 }
2902 }
f337b084
TH
2903 else
2904 while (l++ < len) {
2905 if (utf8_source) {
2906 UV val;
2907 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2908 if (val < 256 && isALPHA(val))
2909 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2910 else
f337b084
TH
2911 bits |= (val & 0xf) << 4;
2912 } else if (isALPHA(*str))
2913 bits |= ((*str++ + 9) & 0xf) << 4;
2914 else
2915 bits |= (*str++ & 0xf) << 4;
2916 if (l & 1) bits >>= 4;
fc241834 2917 else {
f337b084
TH
2918 PUSH_BYTE(utf8, cur, bits);
2919 bits = 0;
a6ec74c1 2920 }
fc241834 2921 }
f337b084
TH
2922 l--;
2923 if (l & 1) {
2924 PUSH_BYTE(utf8, cur, bits);
2925 l++;
2926 }
2927 /* Determine how many chars are left in the requested field */
2928 l /= 2;
2929 if (howlen == e_star) field_len = 0;
2930 else field_len -= l;
2931 Zero(cur, field_len, char);
2932 cur += field_len;
2933 break;
fc241834
RGS
2934 }
2935 case 'c':
f337b084
TH
2936 while (len-- > 0) {
2937 IV aiv;
2938 fromstr = NEXTFROM;
2939 aiv = SvIV(fromstr);
2940 if ((-128 > aiv || aiv > 127) &&
2941 ckWARN(WARN_PACK))
2942 Perl_warner(aTHX_ packWARN(WARN_PACK),
2943 "Character in 'c' format wrapped in pack");
2944 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2945 }
2946 break;
2947 case 'C':
f337b084
TH
2948 if (len == 0) {
2949 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2950 break;
2951 }
2952 GROWING(0, cat, start, cur, len);
a6ec74c1 2953 while (len-- > 0) {
f337b084 2954 IV aiv;
a6ec74c1 2955 fromstr = NEXTFROM;
f337b084
TH
2956 aiv = SvIV(fromstr);
2957 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2958 ckWARN(WARN_PACK))
2959 Perl_warner(aTHX_ packWARN(WARN_PACK),
2960 "Character in 'C' format wrapped in pack");
f337b084
TH
2961 *cur++ = aiv & 0xff;
2962 }
fc241834
RGS
2963 break;
2964 case 'W': {
2965 char *end;
2966 U8 in_bytes = IN_BYTES;
2967
2968 end = start+SvLEN(cat)-1;
2969 if (utf8) end -= UTF8_MAXLEN-1;
2970 while (len-- > 0) {
2971 UV auv;
2972 fromstr = NEXTFROM;
2973 auv = SvUV(fromstr);
2974 if (in_bytes) auv = auv % 0x100;
2975 if (utf8) {
2976 W_utf8:
2977 if (cur > end) {
2978 *cur = '\0';
2979 SvCUR(cat) = cur - start;
2980
2981 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2982 end = start+SvLEN(cat)-UTF8_MAXLEN;
2983 }
230e1fce
NC
2984 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2985 NATIVE_TO_UNI(auv),
2986 ckWARN(WARN_UTF8) ?
2987 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2988 } else {
2989 if (auv >= 0x100) {
2990 if (!SvUTF8(cat)) {
2991 *cur = '\0';
2992 SvCUR(cat) = cur - start;
2993 marked_upgrade(aTHX_ cat, symptr);
2994 lookahead.flags |= FLAG_DO_UTF8;
2995 lookahead.strbeg = symptr->strbeg;
2996 utf8 = 1;
2997 start = SvPVX(cat);
2998 cur = start + SvCUR(cat);
2999 end = start+SvLEN(cat)-UTF8_MAXLEN;
3000 goto W_utf8;
3001 }
3002 if (ckWARN(WARN_PACK))
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'W' format wrapped in pack");
3005 auv &= 0xff;
3006 }
3007 if (cur >= end) {
3008 *cur = '\0';
3009 SvCUR(cat) = cur - start;
3010 GROWING(0, cat, start, cur, len+1);
3011 end = start+SvLEN(cat)-1;
3012 }
fe2774ed 3013 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3014 }
3015 }
3016 break;
fc241834
RGS
3017 }
3018 case 'U': {
3019 char *end;
3020
3021 if (len == 0) {
3022 if (!(symptr->flags & FLAG_DO_UTF8)) {
3023 marked_upgrade(aTHX_ cat, symptr);
3024 lookahead.flags |= FLAG_DO_UTF8;
3025 lookahead.strbeg = symptr->strbeg;
3026 }
3027 utf8 = 0;
3028 goto no_change;
3029 }
3030
3031 end = start+SvLEN(cat);
3032 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3033 while (len-- > 0) {
fc241834 3034 UV auv;
a6ec74c1 3035 fromstr = NEXTFROM;
fc241834
RGS
3036 auv = SvUV(fromstr);
3037 if (utf8) {
230e1fce 3038 U8 buffer[UTF8_MAXLEN], *endb;
fc241834
RGS
3039 endb = uvuni_to_utf8_flags(buffer, auv,
3040 ckWARN(WARN_UTF8) ?
3041 0 : UNICODE_ALLOW_ANY);
3042 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3043 *cur = '\0';
3044 SvCUR(cat) = cur - start;
3045 GROWING(0, cat, start, cur,
3046 len+(endb-buffer)*UTF8_EXPAND);
3047 end = start+SvLEN(cat);
3048 }
3049 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3050 } else {
3051 if (cur >= end) {
3052 *cur = '\0';
3053 SvCUR(cat) = cur - start;
3054 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3055 end = start+SvLEN(cat)-UTF8_MAXLEN;
3056 }
230e1fce
NC
3057 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3058 ckWARN(WARN_UTF8) ?
3059 0 : UNICODE_ALLOW_ANY);
fc241834 3060 }
a6ec74c1 3061 }
a6ec74c1 3062 break;
fc241834 3063 }
a6ec74c1
JH
3064 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3065 case 'f':
a6ec74c1 3066 while (len-- > 0) {
f337b084
TH
3067 float afloat;
3068 NV anv;
a6ec74c1 3069 fromstr = NEXTFROM;
f337b084 3070 anv = SvNV(fromstr);
5cdb9e01 3071#ifdef __VOS__
f337b084 3072 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3073 during conversion from double to float into infinity, so we
3074 do it by hand. This code should either be generalized for
3075 any OS that needs it, or removed if and when VOS implements
3076 posix-976 (suggestion to support mapping to infinity).
3077 Paul.Green@stratus.com 02-04-02. */
f337b084 3078 if (anv > FLT_MAX)
fc241834 3079 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3080 else if (anv < -FLT_MAX)
fc241834 3081 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3082 else afloat = (float) anv;
3083#else /* __VOS__ */
baf3cf9c 3084# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3085 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3086 * on Alpha; fake it if we don't have them.
3087 */
f337b084 3088 if (anv > FLT_MAX)
fc241834 3089 afloat = FLT_MAX;
f337b084 3090 else if (anv < -FLT_MAX)
fc241834 3091 afloat = -FLT_MAX;
f337b084 3092 else afloat = (float)anv;
baf3cf9c 3093# else
f337b084 3094 afloat = (float)anv;
baf3cf9c 3095# endif
f337b084 3096#endif /* __VOS__ */
1109a392 3097 DO_BO_PACK_N(afloat, float);
f337b084 3098 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3099 }
3100 break;
3101 case 'd':
a6ec74c1 3102 while (len-- > 0) {
f337b084
TH
3103 double adouble;
3104 NV anv;
a6ec74c1 3105 fromstr = NEXTFROM;
f337b084 3106 anv = SvNV(fromstr);
5cdb9e01 3107#ifdef __VOS__
f337b084 3108 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3109 during conversion from long double to double into infinity,
3110 so we do it by hand. This code should either be generalized
3111 for any OS that needs it, or removed if and when VOS
3112 implements posix-976 (suggestion to support mapping to
3113 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3114 if (anv > DBL_MAX)
fc241834 3115 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3116 else if (anv < -DBL_MAX)
fc241834 3117 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3118 else adouble = (double) anv;
3119#else /* __VOS__ */
baf3cf9c 3120# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3121 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3122 * on Alpha; fake it if we don't have them.
3123 */
f337b084 3124 if (anv > DBL_MAX)
fc241834 3125 adouble = DBL_MAX;
f337b084 3126 else if (anv < -DBL_MAX)
fc241834 3127 adouble = -DBL_MAX;
f337b084 3128 else adouble = (double)anv;
baf3cf9c 3129# else
f337b084 3130 adouble = (double)anv;
baf3cf9c 3131# endif
f337b084 3132#endif /* __VOS__ */
1109a392 3133 DO_BO_PACK_N(adouble, double);
f337b084 3134 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3135 }
3136 break;
fc241834
RGS
3137 case 'F': {
3138 NV anv;
1109a392 3139 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3140 while (len-- > 0) {
3141 fromstr = NEXTFROM;
3142 anv = SvNV(fromstr);
1109a392 3143 DO_BO_PACK_N(anv, NV);
fc241834 3144 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3145 }
3146 break;
fc241834 3147 }
92d41999 3148#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3149 case 'D': {
3150 long double aldouble;
1109a392
MHM
3151 /* long doubles can have unused bits, which may be nonzero */
3152 Zero(&aldouble, 1, long double);
92d41999
JH
3153 while (len-- > 0) {
3154 fromstr = NEXTFROM;
3155 aldouble = (long double)SvNV(fromstr);
1109a392 3156 DO_BO_PACK_N(aldouble, long double);
fc241834 3157 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3158 }
3159 break;
fc241834 3160 }
92d41999 3161#endif
7212898e 3162#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3163 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3164#endif
a6ec74c1
JH
3165 case 'n':
3166 while (len-- > 0) {
f337b084 3167 I16 ai16;
a6ec74c1 3168 fromstr = NEXTFROM;
ef108786 3169 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3170#ifdef HAS_HTONS
ef108786 3171 ai16 = PerlSock_htons(ai16);
a6ec74c1 3172#endif
f337b084 3173 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3174 }
3175 break;
7212898e 3176#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3177 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3178#endif
a6ec74c1
JH
3179 case 'v':
3180 while (len-- > 0) {
f337b084 3181 I16 ai16;
a6ec74c1 3182 fromstr = NEXTFROM;
ef108786 3183 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3184#ifdef HAS_HTOVS
ef108786 3185 ai16 = htovs(ai16);
a6ec74c1 3186#endif
f337b084 3187 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3188 }
3189 break;
49704364 3190 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3191#if SHORTSIZE != SIZE16
fc241834 3192 while (len-- > 0) {
f337b084 3193 unsigned short aushort;
fc241834
RGS
3194 fromstr = NEXTFROM;
3195 aushort = SvUV(fromstr);
3196 DO_BO_PACK(aushort, s);
f337b084 3197 PUSH_VAR(utf8, cur, aushort);
fc241834 3198 }
49704364
LW
3199 break;
3200#else
3201 /* Fall through! */
a6ec74c1 3202#endif
49704364 3203 case 'S':
fc241834 3204 while (len-- > 0) {
f337b084 3205 U16 au16;
fc241834
RGS
3206 fromstr = NEXTFROM;
3207 au16 = (U16)SvUV(fromstr);
3208 DO_BO_PACK(au16, 16);
f337b084 3209 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3210 }
3211 break;
49704364 3212 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3213#if SHORTSIZE != SIZE16
fc241834 3214 while (len-- > 0) {
f337b084 3215 short ashort;
fc241834
RGS
3216 fromstr = NEXTFROM;
3217 ashort = SvIV(fromstr);
3218 DO_BO_PACK(ashort, s);
f337b084 3219 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3220 }
49704364
LW
3221 break;
3222#else
3223 /* Fall through! */
a6ec74c1 3224#endif
49704364
LW
3225 case 's':
3226 while (len-- > 0) {
f337b084 3227 I16 ai16;
49704364 3228 fromstr = NEXTFROM;
ef108786
MHM
3229 ai16 = (I16)SvIV(fromstr);
3230 DO_BO_PACK(ai16, 16);
f337b084 3231 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3232 }
3233 break;
3234 case 'I':
49704364 3235 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3236 while (len-- > 0) {
f337b084 3237 unsigned int auint;
a6ec74c1
JH
3238 fromstr = NEXTFROM;
3239 auint = SvUV(fromstr);
1109a392 3240 DO_BO_PACK(auint, i);
f337b084 3241 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3242 }
3243 break;
92d41999
JH
3244 case 'j':
3245 while (len-- > 0) {
f337b084 3246 IV aiv;
92d41999
JH
3247 fromstr = NEXTFROM;
3248 aiv = SvIV(fromstr);
1109a392
MHM
3249#if IVSIZE == INTSIZE
3250 DO_BO_PACK(aiv, i);
3251#elif IVSIZE == LONGSIZE
3252 DO_BO_PACK(aiv, l);
3253#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3254 DO_BO_PACK(aiv, 64);
f337b084
TH
3255#else
3256 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3257#endif
f337b084 3258 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3259 }
3260 break;
3261 case 'J':
3262 while (len-- > 0) {
f337b084 3263 UV auv;
92d41999
JH
3264 fromstr = NEXTFROM;
3265 auv = SvUV(fromstr);
1109a392
MHM
3266#if UVSIZE == INTSIZE
3267 DO_BO_PACK(auv, i);
3268#elif UVSIZE == LONGSIZE
3269 DO_BO_PACK(auv, l);
3270#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3271 DO_BO_PACK(auv, 64);
f337b084
TH
3272#else
3273 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3274#endif
f337b084 3275 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3276 }
3277 break;
a6ec74c1
JH
3278 case 'w':
3279 while (len-- > 0) {
f337b084 3280 NV anv;
a6ec74c1 3281 fromstr = NEXTFROM;
15e9f109 3282 anv = SvNV(fromstr);
a6ec74c1 3283
f337b084
TH
3284 if (anv < 0) {
3285 *cur = '\0';
3286 SvCUR(cat) = cur - start;
49704364 3287 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3288 }
a6ec74c1 3289
196b62db
NC
3290 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3291 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3292 any negative IVs will have already been got by the croak()
3293 above. IOK is untrue for fractions, so we test them
3294 against UV_MAX_P1. */
f337b084
TH
3295 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3296 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3297 char *in = buf + sizeof(buf);
196b62db 3298 UV auv = SvUV(fromstr);
a6ec74c1
JH
3299
3300 do {
eb160463 3301 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3302 auv >>= 7;
3303 } while (auv);
3304 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3305 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3306 in, (buf + sizeof(buf)) - in);
3307 } else if (SvPOKp(fromstr))
3308 goto w_string;
a6ec74c1 3309 else if (SvNOKp(fromstr)) {
0258719b
NC
3310 /* 10**NV_MAX_10_EXP is the largest power of 10
3311 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3312 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3313 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3314 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3315 Some C compilers are strict about integral constant
3316 expressions so we conservatively divide by a slightly
3317 smaller integer instead of multiplying by the exact
3318 floating-point value.
0258719b
NC
3319 */
3320#ifdef NV_MAX_10_EXP
f337b084 3321 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3322 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3323#else
f337b084 3324 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3325 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3326#endif
a6ec74c1
JH
3327 char *in = buf + sizeof(buf);
3328
15e9f109 3329 anv = Perl_floor(anv);
a6ec74c1 3330 do {
15e9f109 3331 NV next = Perl_floor(anv / 128);
a6ec74c1 3332 if (in <= buf) /* this cannot happen ;-) */
49704364 3333 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3334 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3335 anv = next;
3336 } while (anv > 0);
a6ec74c1 3337 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3338 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3339 in, (buf + sizeof(buf)) - in);
3340 } else {
735b914b
JH
3341 char *from, *result, *in;
3342 SV *norm;
3343 STRLEN len;
3344 bool done;
3345
f337b084 3346 w_string:
735b914b
JH
3347 /* Copy string and check for compliance */
3348 from = SvPV(fromstr, len);
3349 if ((norm = is_an_int(from, len)) == NULL)
49704364 3350 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3351
3352 New('w', result, len, char);
3353 in = result + len;
3354 done = FALSE;
f337b084 3355 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3356 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3357 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3358 in, (result + len) - in);
735b914b
JH
3359 Safefree(result);
3360 SvREFCNT_dec(norm); /* free norm */
fc241834 3361 }
a6ec74c1
JH
3362 }
3363 break;
3364 case 'i':
49704364 3365 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3366 while (len-- > 0) {
f337b084 3367 int aint;
a6ec74c1
JH
3368 fromstr = NEXTFROM;
3369 aint = SvIV(fromstr);
1109a392 3370 DO_BO_PACK(aint, i);
f337b084 3371 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3372 }
3373 break;
7212898e 3374#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3375 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3376#endif
a6ec74c1
JH
3377 case 'N':
3378 while (len-- > 0) {
f337b084 3379 U32 au32;
a6ec74c1 3380 fromstr = NEXTFROM;
ef108786 3381 au32 = SvUV(fromstr);
a6ec74c1 3382#ifdef HAS_HTONL
ef108786 3383 au32 = PerlSock_htonl(au32);
a6ec74c1 3384#endif
f337b084 3385 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3386 }
3387 break;
7212898e 3388#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3389 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3390#endif
a6ec74c1
JH
3391 case 'V':
3392 while (len-- > 0) {
f337b084 3393 U32 au32;
a6ec74c1 3394 fromstr = NEXTFROM;
ef108786 3395 au32 = SvUV(fromstr);
a6ec74c1 3396#ifdef HAS_HTOVL
ef108786 3397 au32 = htovl(au32);
a6ec74c1 3398#endif
f337b084 3399 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3400 }
3401 break;
49704364 3402 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3403#if LONGSIZE != SIZE32
fc241834 3404 while (len-- > 0) {
f337b084 3405 unsigned long aulong;
fc241834
RGS
3406 fromstr = NEXTFROM;
3407 aulong = SvUV(fromstr);
3408 DO_BO_PACK(aulong, l);
f337b084 3409 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3410 }
49704364
LW
3411 break;
3412#else
3413 /* Fall though! */
a6ec74c1 3414#endif
49704364 3415 case 'L':
fc241834 3416 while (len-- > 0) {
f337b084 3417 U32 au32;
fc241834
RGS
3418 fromstr = NEXTFROM;
3419 au32 = SvUV(fromstr);
3420 DO_BO_PACK(au32, 32);
f337b084 3421 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3422 }
3423 break;
49704364 3424 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3425#if LONGSIZE != SIZE32
fc241834 3426 while (len-- > 0) {
f337b084 3427 long along;
fc241834
RGS
3428 fromstr = NEXTFROM;
3429 along = SvIV(fromstr);
3430 DO_BO_PACK(along, l);
f337b084 3431 PUSH_VAR(utf8, cur, along);
a6ec74c1 3432 }
49704364
LW
3433 break;
3434#else
3435 /* Fall though! */
a6ec74c1 3436#endif
49704364
LW
3437 case 'l':
3438 while (len-- > 0) {
f337b084 3439 I32 ai32;
49704364 3440 fromstr = NEXTFROM;
ef108786
MHM
3441 ai32 = SvIV(fromstr);
3442 DO_BO_PACK(ai32, 32);
f337b084 3443 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3444 }
3445 break;
3446#ifdef HAS_QUAD
3447 case 'Q':
3448 while (len-- > 0) {
f337b084 3449 Uquad_t auquad;
a6ec74c1 3450 fromstr = NEXTFROM;
f337b084 3451 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3452 DO_BO_PACK(auquad, 64);
f337b084 3453 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3454 }
3455 break;
3456 case 'q':
3457 while (len-- > 0) {
f337b084 3458 Quad_t aquad;
a6ec74c1
JH
3459 fromstr = NEXTFROM;
3460 aquad = (Quad_t)SvIV(fromstr);
1109a392 3461 DO_BO_PACK(aquad, 64);
f337b084 3462 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3463 }
3464 break;
f337b084 3465#endif /* HAS_QUAD */
a6ec74c1
JH
3466 case 'P':
3467 len = 1; /* assume SV is correct length */
f337b084 3468 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3469 /* Fall through! */
a6ec74c1
JH
3470 case 'p':
3471 while (len-- > 0) {
f337b084
TH
3472 char *aptr;
3473
a6ec74c1 3474 fromstr = NEXTFROM;
28a4f200 3475 SvGETMAGIC(fromstr);
3476 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3477 else {
3478 STRLEN n_a;
3479 /* XXX better yet, could spirit away the string to
3480 * a safe spot and hang on to it until the result
3481 * of pack() (and all copies of the result) are
3482 * gone.
3483 */
f337b084
TH
3484 if (ckWARN(WARN_PACK) &&
3485 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3486 !SvREADONLY(fromstr)))) {
9014280d 3487 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3488 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3489 }
3490 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3491 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3492 else
28a4f200 3493 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3494 }
1109a392 3495 DO_BO_PACK_P(aptr);
f337b084 3496 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3497 }
3498 break;
fc241834
RGS
3499 case 'u': {
3500 char *aptr, *aend;
3501 bool from_utf8;
f337b084 3502
a6ec74c1 3503 fromstr = NEXTFROM;
fc241834
RGS
3504 if (len <= 2) len = 45;
3505 else len = len / 3 * 3;
3506 if (len >= 64) {
3507 Perl_warner(aTHX_ packWARN(WARN_PACK),
3508 "Field too wide in 'u' format in pack");
3509 len = 63;
3510 }
a6ec74c1 3511 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3512 from_utf8 = DO_UTF8(fromstr);
3513 if (from_utf8) {
3514 aend = aptr + fromlen;
3515 fromlen = sv_len_utf8(fromstr);
3516 } else aend = NULL; /* Unused, but keep compilers happy */
3517 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3518 while (fromlen > 0) {
fc241834 3519 U8 *end;
a6ec74c1 3520 I32 todo;
fc241834 3521 U8 hunk[1+63/3*4+1];
a6ec74c1 3522
eb160463 3523 if ((I32)fromlen > len)
a6ec74c1
JH
3524 todo = len;
3525 else
3526 todo = fromlen;
fc241834
RGS
3527 if (from_utf8) {
3528 char buffer[64];
3529 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3530 'u' | TYPE_IS_PACK)) {
3531 *cur = '\0';
3532 SvCUR(cat) = cur - start;
3533 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3534 }
3535 end = doencodes(hunk, buffer, todo);
3536 } else {
3537 end = doencodes(hunk, aptr, todo);
3538 aptr += todo;
3539 }
3540 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3541 fromlen -= todo;
3542 }
a6ec74c1
JH
3543 break;
3544 }
f337b084
TH
3545 }
3546 *cur = '\0';
3547 SvCUR(cat) = cur - start;
3548 no_change:
49704364 3549 *symptr = lookahead;
a6ec74c1 3550 }
49704364 3551 return beglist;
18529408
IZ
3552}
3553#undef NEXTFROM
3554
3555
3556PP(pp_pack)
3557{
3558 dSP; dMARK; dORIGMARK; dTARGET;
3559 register SV *cat = TARG;
3560 STRLEN fromlen;
3561 register char *pat = SvPVx(*++MARK, fromlen);
3562 register char *patend = pat + fromlen;
3563
3564 MARK++;
3565 sv_setpvn(cat, "", 0);
f337b084 3566 SvUTF8_off(cat);
18529408 3567
7accc089 3568 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3569
a6ec74c1
JH
3570 SvSETMAGIC(cat);
3571 SP = ORIGMARK;
3572 PUSHs(cat);
3573 RETURN;
3574}
a6ec74c1 3575
73cb7263
NC
3576/*
3577 * Local variables:
3578 * c-indentation-style: bsd
3579 * c-basic-offset: 4
3580 * indent-tabs-mode: t
3581 * End:
3582 *
edf815fd 3583 * vim: shiftwidth=4:
73cb7263 3584*/