This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perldelta suggestions on (un)?pack by Ton Hospel
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084
TH
121#define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
123
49704364
LW
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e
NC
185#ifdef PERL_PACK_CAN_SHRIEKSIGN
186#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
187#else
188#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
200# define DO_BO_UNPACK_PTR(var, type, pre_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast)
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
7212898e 232# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
7212898e 246# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e
NC
276# if PTRSIZE == INTSIZE
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279# elif PTRSIZE == LONGSIZE
280# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
282# else
283# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285# endif
1109a392 286
7212898e 287# if defined(my_htolen) && defined(my_letohn) && \
1109a392 288 defined(my_htoben) && defined(my_betohn)
7212898e 289# define DO_BO_UNPACK_N(var, type) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
294 default: break; \
295 } \
296 } STMT_END
297
7212898e 298# define DO_BO_PACK_N(var, type) \
1109a392 299 STMT_START { \
66c611c5 300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
303 default: break; \
304 } \
305 } STMT_END
7212898e
NC
306# else
307# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309# endif
310
f337b084 311#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 312
78d46eaa 313#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 314#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
315#define PACK_SIZE_MASK 0x3F
316
78d46eaa
NC
317/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
319
320#if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
322#endif
323
324typedef U8 packprops_t;
78d46eaa
NC
325#if 'J'-'I' == 1
326/* ASCII */
a7a3cfaa
TH
327const packprops_t packprops[512] = {
328 /* normal */
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 335#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 336 /* D */ LONG_DOUBLESIZE,
80a13697 337#else
a7a3cfaa 338 0,
80a13697 339#endif
a7a3cfaa
TH
340 0,
341 /* F */ NVSIZE,
342 0, 0,
343 /* I */ sizeof(unsigned int),
344 /* J */ UVSIZE,
345 0,
346 /* L */ SIZE32,
347 0,
348 /* N */ SIZE32,
349 0, 0,
80a13697 350#if defined(HAS_QUAD)
a7a3cfaa 351 /* Q */ sizeof(Uquad_t),
80a13697 352#else
a7a3cfaa 353 0,
80a13697 354#endif
a7a3cfaa
TH
355 0,
356 /* S */ SIZE16,
357 0,
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
359 /* V */ SIZE32,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
364 0,
365 /* f */ sizeof(float),
366 0, 0,
367 /* i */ sizeof(int),
368 /* j */ IVSIZE,
369 0,
370 /* l */ SIZE32,
371 0,
372 /* n */ SIZE16,
373 0,
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 375#if defined(HAS_QUAD)
a7a3cfaa 376 /* q */ sizeof(Quad_t),
80a13697 377#else
a7a3cfaa 378 0,
80a13697 379#endif
a7a3cfaa
TH
380 0,
381 /* s */ SIZE16,
382 0, 0,
383 /* v */ SIZE16,
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
394 /* shrieking */
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
401 0, 0,
402 /* L */ sizeof(unsigned long),
403 0,
7212898e 404#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 405 /* N */ SIZE32,
7212898e 406#else
a7a3cfaa 407 0,
7212898e 408#endif
a7a3cfaa
TH
409 0, 0, 0, 0,
410 /* S */ sizeof(unsigned short),
411 0, 0,
7212898e 412#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 413 /* V */ SIZE32,
7212898e 414#else
a7a3cfaa 415 0,
7212898e 416#endif
a7a3cfaa
TH
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
418 0, 0,
419 /* i */ sizeof(int),
420 0, 0,
421 /* l */ sizeof(long),
422 0,
7212898e 423#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 424 /* n */ SIZE16,
7212898e 425#else
a7a3cfaa 426 0,
7212898e 427#endif
a7a3cfaa
TH
428 0, 0, 0, 0,
429 /* s */ sizeof(short),
430 0, 0,
7212898e 431#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 432 /* v */ SIZE16,
7212898e 433#else
a7a3cfaa 434 0,
7212898e 435#endif
a7a3cfaa
TH
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
445};
446#else
447/* EBCDIC (or bust) */
a7a3cfaa
TH
448const packprops_t packprops[512] = {
449 /* normal */
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
461 0,
462 /* f */ sizeof(float),
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0, 0, 0, 0, 0, 0,
466 /* j */ IVSIZE,
467 0,
468 /* l */ SIZE32,
469 0,
470 /* n */ SIZE16,
471 0,
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 473#if defined(HAS_QUAD)
a7a3cfaa 474 /* q */ sizeof(Quad_t),
80a13697 475#else
a7a3cfaa 476 0,
80a13697 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 /* s */ SIZE16,
480 0, 0,
481 /* v */ SIZE16,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 486#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 487 /* D */ LONG_DOUBLESIZE,
80a13697 488#else
a7a3cfaa 489 0,
80a13697 490#endif
a7a3cfaa
TH
491 0,
492 /* F */ NVSIZE,
493 0, 0,
494 /* I */ sizeof(unsigned int),
495 0, 0, 0, 0, 0, 0, 0,
496 /* J */ UVSIZE,
497 0,
498 /* L */ SIZE32,
499 0,
500 /* N */ SIZE32,
501 0, 0,
80a13697 502#if defined(HAS_QUAD)
a7a3cfaa 503 /* Q */ sizeof(Uquad_t),
80a13697 504#else
a7a3cfaa 505 0,
80a13697 506#endif
a7a3cfaa
TH
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 /* S */ SIZE16,
509 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
511 /* V */ SIZE32,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 /* shrieking */
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
528 0,
7212898e 529#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 530 /* n */ SIZE16,
7212898e 531#else
a7a3cfaa 532 0,
7212898e 533#endif
a7a3cfaa
TH
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
536 0, 0,
7212898e 537#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 538 /* v */ SIZE16,
7212898e 539#else
a7a3cfaa 540 0,
7212898e 541#endif
a7a3cfaa
TH
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
548 0,
7212898e 549#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 550 /* N */ SIZE32,
7212898e 551#else
a7a3cfaa 552 0,
7212898e 553#endif
a7a3cfaa
TH
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
556 0, 0,
7212898e 557#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 558 /* V */ SIZE32,
7212898e 559#else
a7a3cfaa 560 0,
7212898e 561#endif
a7a3cfaa
TH
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
564};
565#endif
566
08ca2aa3 567STATIC U8
f337b084 568uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
569{
570 UV val;
571 STRLEN retlen;
f337b084
TH
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 580 if (val >= 0x100) {
f337b084 581 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
f337b084 584 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
585 val &= 0xff;
586 }
587 *s += retlen;
588 return val;
589}
590
f337b084
TH
591#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
593 *(U8 *)(s)++)
594
595STATIC bool
f337b084 596uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
597{
598 UV val;
599 STRLEN retlen;
600 char *from = *s;
601 int bad = 0;
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
f337b084 606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
609 bad |= 1;
610 } else from += retlen;
611 if (val >= 0x100) {
612 bad |= 2;
613 val &= 0xff;
614 }
615 *(U8 *)buf++ = val;
616 }
617 /* We have enough characters for the buffer. Did we have problems ? */
618 if (bad) {
619 if (bad & 1) {
620 /* Rewalk the string fragment while warning */
621 char *ptr;
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
f337b084 625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
626 }
627 if (from > end) from = end;
628 }
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
fc241834 633 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
635 }
636 *s = from;
637 return TRUE;
638}
639
640STATIC bool
641next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
642{
643 UV val;
644 STRLEN retlen;
f337b084 645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
648 *out = 0;
649 return FALSE;
650 }
651 *out = PL_uudmap[val] & 077;
f337b084 652 *s += retlen;
08ca2aa3
TH
653 return TRUE;
654}
78d46eaa 655
f337b084
TH
656STATIC void
657bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
660 char *d = *dest;
661 while (start < end) {
662 int length =
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
664 switch(length) {
665 case 1:
666 *d++ = buffer[0];
667 break;
668 case 2:
669 *d++ = buffer[0];
670 *d++ = buffer[1];
671 break;
672 default:
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
674 *start, length);
675 }
676 start++;
677 }
678 *dest = d;
679}
680
681#define PUSH_BYTES(utf8, cur, buf, len) \
682STMT_START { \
683 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
684 else { \
685 Copy(buf, cur, len, char); \
686 (cur) += (len); \
687 } \
688} STMT_END
689
690#define GROWING(utf8, cat, start, cur, in_len) \
691STMT_START { \
692 STRLEN glen = (in_len); \
3473cf63 693 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
697 } \
698} STMT_END
699
700#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
701STMT_START { \
702 STRLEN glen = (in_len); \
703 STRLEN gl = glen; \
3473cf63 704 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
706 *cur = '\0'; \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
710 } \
711 PUSH_BYTES(utf8, cur, buf, glen); \
712} STMT_END
713
714#define PUSH_BYTE(utf8, s, byte) \
715STMT_START { \
716 if (utf8) { \
717 U8 au8 = (byte); \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
720} STMT_END
721
722/* Only to be used inside a loop (see the break) */
723#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
724STMT_START { \
725 STRLEN retlen; \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
729 *cur = '\0'; \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
731 } \
732 str += retlen; \
733} STMT_END
734
206947d2 735/* Returns the sizeof() struct described by pat */
028d1f6d 736STATIC I32
f337b084 737S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 738{
f337b084 739 I32 total = 0;
206947d2 740
49704364 741 while (next_symbol(symptr)) {
f337b084
TH
742 I32 len;
743 int star, size;
f337b084
TH
744
745 switch (symptr->howlen) {
fc241834 746 case e_star:
49704364
LW
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
749 break;
f337b084
TH
750 default:
751 /* e_no_len and e_number */
752 len = symptr->length;
753 break;
49704364
LW
754 }
755
a7a3cfaa 756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
757 if (!size) {
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
760 default:
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
764 case '@':
765 case '/':
766 case 'U': /* XXXX Is it correct? */
767 case 'w':
768 case 'u':
769 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
770 (int)symptr->code,
771 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
772 case '%':
773 size = 0;
774 break;
775 case '(':
fc241834
RGS
776 {
777 tempsym_t savsym = *symptr;
778 symptr->patptr = savsym.grpbeg;
779 symptr->patend = savsym.grpend;
780 /* XXXX Theoretically, we need to measure many times at
781 different positions, since the subexpression may contain
782 alignment commands, but be not of aligned length.
783 Need to detect this and croak(). */
784 size = measure_struct(symptr);
785 *symptr = savsym;
786 break;
787 }
80a13697
NC
788 case 'X' | TYPE_IS_SHRIEKING:
789 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
790 */
791 if (!len) /* Avoid division by 0 */
792 len = 1;
793 len = total % len; /* Assumed: the start is aligned. */
794 /* FALL THROUGH */
795 case 'X':
796 size = -1;
797 if (total < len)
798 Perl_croak(aTHX_ "'X' outside of string in %s",
799 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
800 break;
801 case 'x' | TYPE_IS_SHRIEKING:
802 if (!len) /* Avoid division by 0 */
803 len = 1;
804 star = total % len; /* Assumed: the start is aligned. */
805 if (star) /* Other portable ways? */
806 len = len - star;
807 else
808 len = 0;
809 /* FALL THROUGH */
810 case 'x':
811 case 'A':
812 case 'Z':
813 case 'a':
80a13697
NC
814 size = 1;
815 break;
816 case 'B':
817 case 'b':
818 len = (len + 7)/8;
819 size = 1;
820 break;
821 case 'H':
822 case 'h':
823 len = (len + 1)/2;
824 size = 1;
825 break;
78d46eaa 826
80a13697
NC
827 case 'P':
828 len = 1;
829 size = sizeof(char*);
78d46eaa
NC
830 break;
831 }
206947d2
IZ
832 }
833 total += len * size;
834 }
835 return total;
836}
837
49704364
LW
838
839/* locate matching closing parenthesis or bracket
840 * returns char pointer to char after match, or NULL
841 */
842STATIC char *
843S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 844{
49704364
LW
845 while (patptr < patend) {
846 char c = *patptr++;
847
848 if (isSPACE(c))
849 continue;
850 else if (c == ender)
851 return patptr-1;
852 else if (c == '#') {
853 while (patptr < patend && *patptr != '\n')
854 patptr++;
855 continue;
856 } else if (c == '(')
857 patptr = group_end(patptr, patend, ')') + 1;
858 else if (c == '[')
859 patptr = group_end(patptr, patend, ']') + 1;
18529408 860 }
49704364
LW
861 Perl_croak(aTHX_ "No group ending character '%c' found in template",
862 ender);
863 return 0;
18529408
IZ
864}
865
49704364
LW
866
867/* Convert unsigned decimal number to binary.
868 * Expects a pointer to the first digit and address of length variable
869 * Advances char pointer to 1st non-digit char and returns number
fc241834 870 */
18529408 871STATIC char *
49704364
LW
872S_get_num(pTHX_ register char *patptr, I32 *lenptr )
873{
874 I32 len = *patptr++ - '0';
875 while (isDIGIT(*patptr)) {
876 if (len >= 0x7FFFFFFF/10)
877 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
878 len = (len * 10) + (*patptr++ - '0');
879 }
880 *lenptr = len;
881 return patptr;
882}
883
884/* The marvellous template parsing routine: Using state stored in *symptr,
885 * locates next template code and count
886 */
887STATIC bool
f337b084 888S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 889{
fc241834
RGS
890 char* patptr = symptr->patptr;
891 char* patend = symptr->patend;
f337b084 892 const char *allowed = "";
49704364
LW
893
894 symptr->flags &= ~FLAG_SLASH;
895
896 while (patptr < patend) {
897 if (isSPACE(*patptr))
898 patptr++;
899 else if (*patptr == '#') {
900 patptr++;
901 while (patptr < patend && *patptr != '\n')
902 patptr++;
903 if (patptr < patend)
904 patptr++;
905 } else {
fc241834 906 /* We should have found a template code */
49704364 907 I32 code = *patptr++ & 0xFF;
66c611c5 908 U32 inherited_modifiers = 0;
49704364
LW
909
910 if (code == ','){ /* grandfather in commas but with a warning */
911 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
912 symptr->flags |= FLAG_COMMA;
913 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
914 "Invalid type ',' in %s",
915 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
916 }
917 continue;
918 }
fc241834 919
49704364 920 /* for '(', skip to ')' */
fc241834 921 if (code == '(') {
49704364
LW
922 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
923 Perl_croak(aTHX_ "()-group starts with a count in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 symptr->grpbeg = patptr;
926 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
927 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
928 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
930 }
931
66c611c5
MHM
932 /* look for group modifiers to inherit */
933 if (TYPE_ENDIANNESS(symptr->flags)) {
934 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
935 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
936 }
937
1109a392
MHM
938 /* look for modifiers */
939 while (patptr < patend) {
1109a392
MHM
940 I32 modifier = 0;
941 switch (*patptr) {
942 case '!':
943 modifier = TYPE_IS_SHRIEKING;
7212898e 944 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 945 break;
7212898e 946#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
947 case '>':
948 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 949 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
950 break;
951 case '<':
952 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 953 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 954 break;
f337b084 955#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
956 default:
957 break;
958 }
66c611c5 959
1109a392
MHM
960 if (modifier == 0)
961 break;
66c611c5 962
1109a392
MHM
963 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
964 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
965 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
966
967 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
968 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
969 (int) TYPE_NO_MODIFIERS(code),
970 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
971 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
972 TYPE_ENDIANNESS_MASK)
973 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
974 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
975
1109a392
MHM
976 if (ckWARN(WARN_UNPACK)) {
977 if (code & modifier)
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Duplicate modifier '%c' after '%c' in %s",
980 *patptr, (int) TYPE_NO_MODIFIERS(code),
981 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
982 }
66c611c5 983
1109a392
MHM
984 code |= modifier;
985 patptr++;
49704364
LW
986 }
987
66c611c5
MHM
988 /* inherit modifiers */
989 code |= inherited_modifiers;
990
fc241834 991 /* look for count and/or / */
49704364
LW
992 if (patptr < patend) {
993 if (isDIGIT(*patptr)) {
994 patptr = get_num( patptr, &symptr->length );
995 symptr->howlen = e_number;
996
997 } else if (*patptr == '*') {
998 patptr++;
999 symptr->howlen = e_star;
1000
1001 } else if (*patptr == '[') {
fc241834 1002 char* lenptr = ++patptr;
49704364
LW
1003 symptr->howlen = e_number;
1004 patptr = group_end( patptr, patend, ']' ) + 1;
1005 /* what kind of [] is it? */
1006 if (isDIGIT(*lenptr)) {
1007 lenptr = get_num( lenptr, &symptr->length );
1008 if( *lenptr != ']' )
1009 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1010 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1011 } else {
1012 tempsym_t savsym = *symptr;
1013 symptr->patend = patptr-1;
1014 symptr->patptr = lenptr;
1015 savsym.length = measure_struct(symptr);
1016 *symptr = savsym;
1017 }
1018 } else {
1019 symptr->howlen = e_no_len;
1020 symptr->length = 1;
1021 }
1022
1023 /* try to find / */
1024 while (patptr < patend) {
1025 if (isSPACE(*patptr))
1026 patptr++;
1027 else if (*patptr == '#') {
1028 patptr++;
1029 while (patptr < patend && *patptr != '\n')
1030 patptr++;
1031 if (patptr < patend)
1032 patptr++;
1033 } else {
66c611c5 1034 if (*patptr == '/') {
49704364
LW
1035 symptr->flags |= FLAG_SLASH;
1036 patptr++;
66c611c5
MHM
1037 if (patptr < patend &&
1038 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
LW
1039 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1040 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1041 }
1042 break;
1043 }
18529408 1044 }
49704364
LW
1045 } else {
1046 /* at end - no count, no / */
1047 symptr->howlen = e_no_len;
1048 symptr->length = 1;
1049 }
1050
1051 symptr->code = code;
fc241834 1052 symptr->patptr = patptr;
49704364 1053 return TRUE;
18529408 1054 }
49704364 1055 }
fc241834 1056 symptr->patptr = patptr;
49704364 1057 return FALSE;
18529408
IZ
1058}
1059
18529408 1060/*
fc241834 1061 There is no way to cleanly handle the case where we should process the
08ca2aa3 1062 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1063 (e.g. estimates like strend-s as an upper bound for the number of
1064 characters left wouldn't work). So if we foresee the need of this
1065 (pattern starts with U or contains U0), we want to work on the encoded
1066 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1067 themselves if they need to do a lot of unpacks like this on it
1068*/
fc241834 1069STATIC bool
08ca2aa3
TH
1070need_utf8(const char *pat, const char *patend)
1071{
1072 bool first = TRUE;
1073 while (pat < patend) {
1074 if (pat[0] == '#') {
1075 pat++;
1076 pat = memchr(pat, '\n', patend-pat);
1077 if (!pat) return FALSE;
1078 } else if (pat[0] == 'U') {
1079 if (first || pat[1] == '0') return TRUE;
1080 } else first = FALSE;
1081 pat++;
1082 }
1083 return FALSE;
1084}
1085
1086STATIC char
1087first_symbol(const char *pat, const char *patend) {
1088 while (pat < patend) {
1089 if (pat[0] != '#') return pat[0];
1090 pat++;
1091 pat = memchr(pat, '\n', patend-pat);
1092 if (!pat) return 0;
1093 pat++;
1094 }
1095 return 0;
1096}
1097
1098/*
18529408
IZ
1099=for apidoc unpack_str
1100
7accc089
JH
1101The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1102and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1103
1104=cut */
1105
1106I32
f337b084 1107Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1108{
49704364 1109 tempsym_t sym = { 0 };
08ca2aa3 1110
f337b084 1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
f337b084 1116 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1117 SAVEFREEPV(s);
1118 strend = s + len;
f337b084 1119 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1120 }
1121
f337b084
TH
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1124
49704364
LW
1125 sym.patptr = pat;
1126 sym.patend = patend;
1127 sym.flags = flags;
1128
1129 return unpack_rec(&sym, s, s, strend, NULL );
1130}
1131
7accc089
JH
1132/*
1133=for apidoc unpackstring
1134
608d3aed
LW
1135The engine implementing unpack() Perl function. C<unpackstring> puts the
1136extracted list items on the stack and returns the number of elements.
1137Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1138
1139=cut */
1140
1141I32
f337b084 1142Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1143{
1144 tempsym_t sym = { 0 };
08ca2aa3 1145
f337b084 1146 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1147 else if (need_utf8(pat, patend)) {
1148 /* We probably should try to avoid this in case a scalar context call
1149 wouldn't get to the "U0" */
1150 STRLEN len = strend - s;
f337b084 1151 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1152 SAVEFREEPV(s);
1153 strend = s + len;
f337b084 1154 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1155 }
1156
f337b084
TH
1157 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1158 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1159
7accc089
JH
1160 sym.patptr = pat;
1161 sym.patend = patend;
1162 sym.flags = flags;
1163
1164 return unpack_rec(&sym, s, s, strend, NULL );
1165}
1166
49704364
LW
1167STATIC
1168I32
08ca2aa3 1169S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1170{
a6ec74c1 1171 dSP;
18529408
IZ
1172 SV *sv;
1173 I32 start_sp_offset = SP - PL_stack_base;
49704364 1174 howlen_t howlen;
a6ec74c1 1175
a6ec74c1 1176 I32 checksum = 0;
92d41999 1177 UV cuv = 0;
a6ec74c1 1178 NV cdouble = 0.0;
f337b084 1179 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364
LW
1180 char* strrelbeg = s;
1181 bool beyond = FALSE;
21c16052 1182 bool explicit_length;
49704364 1183 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1184 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
49704364 1185
49704364 1186 while (next_symbol(symptr)) {
a7a3cfaa 1187 packprops_t props;
f337b084
TH
1188 I32 len, ai32;
1189 I32 datumtype = symptr->code;
206947d2 1190 /* do first one only unless in list context
08ca2aa3 1191 / is implemented by unpacking the count, then popping it from the
206947d2 1192 stack, so must check that we're not in the middle of a / */
49704364 1193 if ( unpack_only_one
206947d2 1194 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1195 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1196 break;
49704364 1197
f337b084 1198 switch (howlen = symptr->howlen) {
fc241834
RGS
1199 case e_star:
1200 len = strend - strbeg; /* long enough */
49704364 1201 break;
f337b084
TH
1202 default:
1203 /* e_no_len and e_number */
1204 len = symptr->length;
1205 break;
49704364 1206 }
18529408 1207
21c16052 1208 explicit_length = TRUE;
a6ec74c1 1209 redo_switch:
49704364 1210 beyond = s >= strend;
a7a3cfaa
TH
1211
1212 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1213 if (props) {
1214 /* props nonzero means we can process this letter. */
1215 long size = props & PACK_SIZE_MASK;
1216 long howmany = (strend - s) / size;
1217 if (len > howmany)
1218 len = howmany;
1219
1220 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1221 if (len && unpack_only_one) len = 1;
1222 EXTEND(SP, len);
1223 EXTEND_MORTAL(len);
78d46eaa
NC
1224 }
1225 }
a7a3cfaa 1226
1109a392 1227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1228 default:
1109a392 1229 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1230
a6ec74c1 1231 case '%':
49704364 1232 if (howlen == e_no_len)
18529408 1233 len = 16; /* len is not specified */
a6ec74c1 1234 checksum = len;
92d41999 1235 cuv = 0;
a6ec74c1 1236 cdouble = 0;
18529408 1237 continue;
a6ec74c1 1238 break;
18529408
IZ
1239 case '(':
1240 {
49704364 1241 tempsym_t savsym = *symptr;
66c611c5
MHM
1242 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1243 symptr->flags |= group_modifiers;
49704364
LW
1244 symptr->patend = savsym.grpend;
1245 symptr->level++;
18529408
IZ
1246 PUTBACK;
1247 while (len--) {
49704364 1248 symptr->patptr = savsym.grpbeg;
f337b084
TH
1249 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1250 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1251 unpack_rec(symptr, s, strbeg, strend, &s);
1252 if (s == strend && savsym.howlen == e_star)
49704364 1253 break; /* No way to continue */
18529408
IZ
1254 }
1255 SPAGAIN;
66c611c5 1256 symptr->flags &= ~group_modifiers;
49704364
LW
1257 savsym.flags = symptr->flags;
1258 *symptr = savsym;
18529408
IZ
1259 break;
1260 }
a6ec74c1 1261 case '@':
08ca2aa3
TH
1262 if (utf8) {
1263 s = strrelbeg;
1264 while (len > 0) {
1265 if (s >= strend)
1266 Perl_croak(aTHX_ "'@' outside of string in unpack");
1267 s += UTF8SKIP(s);
1268 len--;
1269 }
1270 if (s > strend)
1271 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1272 } else {
fc241834
RGS
1273 if (len > strend - strrelbeg)
1274 Perl_croak(aTHX_ "'@' outside of string in unpack");
1275 s = strrelbeg + len;
08ca2aa3 1276 }
a6ec74c1 1277 break;
62f95557
IZ
1278 case 'X' | TYPE_IS_SHRIEKING:
1279 if (!len) /* Avoid division by 0 */
1280 len = 1;
08ca2aa3
TH
1281 if (utf8) {
1282 char *hop, *last;
f337b084
TH
1283 I32 l = len;
1284 hop = last = strbeg;
1285 while (hop < s) {
1286 hop += UTF8SKIP(hop);
1287 if (--l == 0) {
08ca2aa3 1288 last = hop;
f337b084
TH
1289 l = len;
1290 }
fc241834 1291 }
f337b084
TH
1292 if (last > s)
1293 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1294 s = last;
1295 break;
f337b084
TH
1296 }
1297 len = (s - strbeg) % len;
62f95557 1298 /* FALL THROUGH */
a6ec74c1 1299 case 'X':
08ca2aa3
TH
1300 if (utf8) {
1301 while (len > 0) {
1302 if (s <= strbeg)
1303 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1304 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1305 if (s <= strbeg)
1306 Perl_croak(aTHX_ "'X' outside of string in unpack");
1307 }
1308 len--;
1309 }
1310 } else {
fc241834
RGS
1311 if (len > s - strbeg)
1312 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1313 s -= len;
08ca2aa3 1314 }
a6ec74c1 1315 break;
62f95557
IZ
1316 case 'x' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1318 len = 1;
f337b084
TH
1319 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1320 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1321 if (ai32 == 0) break;
1322 len -= ai32;
62f95557 1323 /* FALL THROUGH */
a6ec74c1 1324 case 'x':
08ca2aa3
TH
1325 if (utf8) {
1326 while (len>0) {
1327 if (s >= strend)
1328 Perl_croak(aTHX_ "'x' outside of string in unpack");
1329 s += UTF8SKIP(s);
1330 len--;
1331 }
1332 } else {
fc241834
RGS
1333 if (len > strend - s)
1334 Perl_croak(aTHX_ "'x' outside of string in unpack");
1335 s += len;
f337b084 1336 }
a6ec74c1
JH
1337 break;
1338 case '/':
49704364
LW
1339 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1340 break;
a6ec74c1
JH
1341 case 'A':
1342 case 'Z':
1343 case 'a':
08ca2aa3
TH
1344 if (checksum) {
1345 /* Preliminary length estimate is assumed done in 'W' */
1346 if (len > strend - s) len = strend - s;
1347 goto W_checksum;
1348 }
1349 if (utf8) {
1350 I32 l;
1351 char *hop;
1352 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1353 if (hop >= strend) {
1354 if (hop > strend)
1355 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1356 break;
fc241834 1357 }
a6ec74c1 1358 }
08ca2aa3
TH
1359 if (hop > strend)
1360 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1361 len = hop - s;
1362 } else if (len > strend - s)
1363 len = strend - s;
1364
1365 if (datumtype == 'Z') {
1366 /* 'Z' strips stuff after first null */
f337b084
TH
1367 char *ptr, *end;
1368 end = s + len;
1369 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1370 sv = newSVpvn(s, ptr-s);
1371 if (howlen == e_star) /* exact for 'Z*' */
1372 len = ptr-s + (ptr != strend ? 1 : 0);
1373 } else if (datumtype == 'A') {
1374 /* 'A' strips both nulls and spaces */
1375 char *ptr;
1376 for (ptr = s+len-1; ptr >= s; ptr--)
1377 if (*ptr != 0 && !isSPACE(*ptr)) break;
1378 ptr++;
1379 sv = newSVpvn(s, ptr-s);
1380 } else sv = newSVpvn(s, len);
1381
1382 if (utf8) {
1383 SvUTF8_on(sv);
1384 /* Undo any upgrade done due to need_utf8() */
f337b084 1385 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1386 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1387 }
1388 XPUSHs(sv_2mortal(sv));
08ca2aa3 1389 s += len;
a6ec74c1
JH
1390 break;
1391 case 'B':
08ca2aa3
TH
1392 case 'b': {
1393 char *str;
49704364 1394 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1395 len = (strend - s) * 8;
1396 if (checksum) {
1397 if (!PL_bitcount) {
08ca2aa3 1398 int bits;
a6ec74c1
JH
1399 Newz(601, PL_bitcount, 256, char);
1400 for (bits = 1; bits < 256; bits++) {
1401 if (bits & 1) PL_bitcount[bits]++;
1402 if (bits & 2) PL_bitcount[bits]++;
1403 if (bits & 4) PL_bitcount[bits]++;
1404 if (bits & 8) PL_bitcount[bits]++;
1405 if (bits & 16) PL_bitcount[bits]++;
1406 if (bits & 32) PL_bitcount[bits]++;
1407 if (bits & 64) PL_bitcount[bits]++;
1408 if (bits & 128) PL_bitcount[bits]++;
1409 }
1410 }
f337b084 1411 if (utf8)
08ca2aa3 1412 while (len >= 8 && s < strend) {
f337b084 1413 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1414 len -= 8;
1415 }
f337b084 1416 else
fc241834 1417 while (len >= 8) {
08ca2aa3 1418 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1419 len -= 8;
1420 }
08ca2aa3
TH
1421 if (len && s < strend) {
1422 U8 bits;
f337b084
TH
1423 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1424 if (datumtype == 'b')
a6ec74c1 1425 while (len-- > 0) {
92d41999 1426 if (bits & 1) cuv++;
a6ec74c1
JH
1427 bits >>= 1;
1428 }
f337b084 1429 else
a6ec74c1 1430 while (len-- > 0) {
08ca2aa3 1431 if (bits & 0x80) cuv++;
a6ec74c1
JH
1432 bits <<= 1;
1433 }
fc241834 1434 }
a6ec74c1
JH
1435 break;
1436 }
08ca2aa3
TH
1437
1438 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1439 SvPOK_on(sv);
1440 str = SvPVX(sv);
1441 if (datumtype == 'b') {
f337b084 1442 U8 bits = 0;
08ca2aa3
TH
1443 ai32 = len;
1444 for (len = 0; len < ai32; len++) {
1445 if (len & 7) bits >>= 1;
1446 else if (utf8) {
1447 if (s >= strend) break;
f337b084 1448 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1449 } else bits = *(U8 *) s++;
1450 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1451 }
08ca2aa3 1452 } else {
f337b084 1453 U8 bits = 0;
08ca2aa3
TH
1454 ai32 = len;
1455 for (len = 0; len < ai32; len++) {
1456 if (len & 7) bits <<= 1;
1457 else if (utf8) {
1458 if (s >= strend) break;
f337b084 1459 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1460 } else bits = *(U8 *) s++;
1461 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1462 }
1463 }
1464 *str = '\0';
08ca2aa3
TH
1465 SvCUR_set(sv, str - SvPVX(sv));
1466 XPUSHs(sv);
a6ec74c1 1467 break;
08ca2aa3 1468 }
a6ec74c1 1469 case 'H':
08ca2aa3
TH
1470 case 'h': {
1471 char *str;
fc241834 1472 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1473 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1474 len = (strend - s) * 2;
fc241834 1475 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1476 SvPOK_on(sv);
1477 str = SvPVX(sv);
1478 if (datumtype == 'h') {
f337b084 1479 U8 bits = 0;
fc241834
RGS
1480 ai32 = len;
1481 for (len = 0; len < ai32; len++) {
1482 if (len & 1) bits >>= 4;
1483 else if (utf8) {
1484 if (s >= strend) break;
f337b084 1485 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1486 } else bits = * (U8 *) s++;
a6ec74c1
JH
1487 *str++ = PL_hexdigit[bits & 15];
1488 }
08ca2aa3 1489 } else {
f337b084 1490 U8 bits = 0;
08ca2aa3
TH
1491 ai32 = len;
1492 for (len = 0; len < ai32; len++) {
1493 if (len & 1) bits <<= 4;
1494 else if (utf8) {
1495 if (s >= strend) break;
f337b084 1496 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1497 } else bits = *(U8 *) s++;
a6ec74c1
JH
1498 *str++ = PL_hexdigit[(bits >> 4) & 15];
1499 }
1500 }
1501 *str = '\0';
08ca2aa3
TH
1502 SvCUR_set(sv, str - SvPVX(sv));
1503 XPUSHs(sv);
a6ec74c1 1504 break;
08ca2aa3 1505 }
a6ec74c1 1506 case 'c':
73cb7263 1507 while (len-- > 0) {
f337b084 1508 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1509 if (aint >= 128) /* fake up signed chars */
1510 aint -= 256;
08ca2aa3 1511 if (!checksum)
ac7f3b1b 1512 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV)aint;
1515 else
1516 cuv += aint;
a6ec74c1
JH
1517 }
1518 break;
1519 case 'C':
08ca2aa3
TH
1520 case 'W':
1521 W_checksum:
35bcd338 1522 if (len == 0) {
fc241834 1523 if (explicit_length && datumtype == 'C')
08ca2aa3 1524 /* Switch to "character" mode */
f337b084 1525 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1526 break;
1527 }
fc241834 1528 if (datumtype == 'C' ?
f337b084
TH
1529 (symptr->flags & FLAG_DO_UTF8) &&
1530 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1531 while (len-- > 0 && s < strend) {
1532 UV val;
1533 STRLEN retlen;
f337b084
TH
1534 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1535 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1536 if (retlen == (STRLEN) -1 || retlen == 0)
1537 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1538 s += retlen;
1539 if (!checksum)
1540 PUSHs(sv_2mortal(newSVuv((UV) val)));
1541 else if (checksum > bits_in_uv)
1542 cdouble += (NV) val;
d6d3e8bd 1543 else
08ca2aa3 1544 cuv += val;
fc241834 1545 }
08ca2aa3 1546 } else if (!checksum)
a6ec74c1 1547 while (len-- > 0) {
08ca2aa3
TH
1548 U8 ch = *(U8 *) s++;
1549 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1550 }
08ca2aa3
TH
1551 else if (checksum > bits_in_uv)
1552 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1553 else
1554 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1555 break;
1556 case 'U':
35bcd338 1557 if (len == 0) {
08ca2aa3
TH
1558 if (explicit_length) {
1559 /* Switch to "bytes in UTF-8" mode */
f337b084 1560 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1561 else
1562 /* Should be impossible due to the need_utf8() test */
1563 Perl_croak(aTHX_ "U0 mode on a byte string");
1564 }
35bcd338
JH
1565 break;
1566 }
08ca2aa3 1567 if (len > strend - s) len = strend - s;
fc241834 1568 if (!checksum) {
08ca2aa3
TH
1569 if (len && unpack_only_one) len = 1;
1570 EXTEND(SP, len);
1571 EXTEND_MORTAL(len);
fc241834 1572 }
08ca2aa3
TH
1573 while (len-- > 0 && s < strend) {
1574 STRLEN retlen;
1575 UV auv;
1576 if (utf8) {
1577 U8 result[UTF8_MAXLEN];
1578 char *ptr;
1579 STRLEN len;
1580 ptr = s;
1581 /* Bug: warns about bad utf8 even if we are short on bytes
1582 and will break out of the loop */
f337b084 1583 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
08ca2aa3
TH
1584 break;
1585 len = UTF8SKIP(result);
fc241834 1586 if (!uni_to_bytes(aTHX_ &ptr, strend,
f337b084 1587 &result[1], len-1, 'U')) break;
08ca2aa3
TH
1588 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1589 s = ptr;
1590 } else {
1591 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1592 if (retlen == (STRLEN) -1 || retlen == 0)
1593 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1594 s += retlen;
1595 }
1596 if (!checksum)
1597 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1598 else if (checksum > bits_in_uv)
08ca2aa3 1599 cdouble += (NV) auv;
73cb7263 1600 else
08ca2aa3 1601 cuv += auv;
a6ec74c1
JH
1602 }
1603 break;
49704364
LW
1604 case 's' | TYPE_IS_SHRIEKING:
1605#if SHORTSIZE != SIZE16
73cb7263 1606 while (len-- > 0) {
08ca2aa3 1607 short ashort;
f337b084
TH
1608 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1609 DO_BO_UNPACK(ashort, s);
08ca2aa3 1610 if (!checksum)
ac7f3b1b 1611 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1612 else if (checksum > bits_in_uv)
1613 cdouble += (NV)ashort;
1614 else
1615 cuv += ashort;
49704364
LW
1616 }
1617 break;
1618#else
1619 /* Fallthrough! */
a6ec74c1 1620#endif
49704364 1621 case 's':
73cb7263 1622 while (len-- > 0) {
08ca2aa3
TH
1623 I16 ai16;
1624
1625#if U16SIZE > SIZE16
1626 ai16 = 0;
1627#endif
f337b084 1628 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1629 DO_BO_UNPACK(ai16, 16);
1109a392 1630#if U16SIZE > SIZE16
73cb7263
NC
1631 if (ai16 > 32767)
1632 ai16 -= 65536;
a6ec74c1 1633#endif
08ca2aa3 1634 if (!checksum)
ac7f3b1b 1635 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV)ai16;
1638 else
1639 cuv += ai16;
a6ec74c1
JH
1640 }
1641 break;
49704364
LW
1642 case 'S' | TYPE_IS_SHRIEKING:
1643#if SHORTSIZE != SIZE16
73cb7263 1644 while (len-- > 0) {
08ca2aa3 1645 unsigned short aushort;
f337b084
TH
1646 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1647 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1648 if (!checksum)
1649 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aushort;
1652 else
1653 cuv += aushort;
49704364
LW
1654 }
1655 break;
1656#else
1657 /* Fallhrough! */
1658#endif
a6ec74c1
JH
1659 case 'v':
1660 case 'n':
1661 case 'S':
73cb7263 1662 while (len-- > 0) {
08ca2aa3
TH
1663 U16 au16;
1664#if U16SIZE > SIZE16
1665 au16 = 0;
1666#endif
f337b084 1667 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1668 DO_BO_UNPACK(au16, 16);
a6ec74c1 1669#ifdef HAS_NTOHS
73cb7263
NC
1670 if (datumtype == 'n')
1671 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1672#endif
1673#ifdef HAS_VTOHS
73cb7263
NC
1674 if (datumtype == 'v')
1675 au16 = vtohs(au16);
a6ec74c1 1676#endif
08ca2aa3
TH
1677 if (!checksum)
1678 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1679 else if (checksum > bits_in_uv)
f337b084 1680 cdouble += (NV) au16;
73cb7263
NC
1681 else
1682 cuv += au16;
a6ec74c1
JH
1683 }
1684 break;
7212898e 1685#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1686 case 'v' | TYPE_IS_SHRIEKING:
1687 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1688 while (len-- > 0) {
08ca2aa3
TH
1689 I16 ai16;
1690# if U16SIZE > SIZE16
1691 ai16 = 0;
1692# endif
f337b084 1693 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1694# ifdef HAS_NTOHS
73cb7263 1695 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1696 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1697# endif /* HAS_NTOHS */
1698# ifdef HAS_VTOHS
73cb7263 1699 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1700 ai16 = (I16) vtohs((U16) ai16);
1701# endif /* HAS_VTOHS */
1702 if (!checksum)
ac7f3b1b 1703 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1704 else if (checksum > bits_in_uv)
08ca2aa3 1705 cdouble += (NV) ai16;
73cb7263
NC
1706 else
1707 cuv += ai16;
068bd2e7
MHM
1708 }
1709 break;
08ca2aa3 1710#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1711 case 'i':
49704364 1712 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1713 while (len-- > 0) {
08ca2aa3 1714 int aint;
f337b084
TH
1715 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1716 DO_BO_UNPACK(aint, i);
08ca2aa3 1717 if (!checksum)
ac7f3b1b 1718 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1719 else if (checksum > bits_in_uv)
1720 cdouble += (NV)aint;
1721 else
1722 cuv += aint;
a6ec74c1
JH
1723 }
1724 break;
1725 case 'I':
49704364 1726 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1727 while (len-- > 0) {
08ca2aa3 1728 unsigned int auint;
f337b084
TH
1729 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1730 DO_BO_UNPACK(auint, i);
08ca2aa3 1731 if (!checksum)
ac7f3b1b 1732 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1733 else if (checksum > bits_in_uv)
1734 cdouble += (NV)auint;
1735 else
1736 cuv += auint;
a6ec74c1
JH
1737 }
1738 break;
92d41999 1739 case 'j':
73cb7263 1740 while (len-- > 0) {
08ca2aa3 1741 IV aiv;
f337b084 1742 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1743#if IVSIZE == INTSIZE
f337b084 1744 DO_BO_UNPACK(aiv, i);
1109a392 1745#elif IVSIZE == LONGSIZE
f337b084 1746 DO_BO_UNPACK(aiv, l);
1109a392 1747#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1748 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1749#else
1750 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1751#endif
08ca2aa3 1752 if (!checksum)
ac7f3b1b 1753 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1754 else if (checksum > bits_in_uv)
1755 cdouble += (NV)aiv;
1756 else
1757 cuv += aiv;
92d41999
JH
1758 }
1759 break;
1760 case 'J':
73cb7263 1761 while (len-- > 0) {
08ca2aa3 1762 UV auv;
f337b084 1763 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1764#if IVSIZE == INTSIZE
f337b084 1765 DO_BO_UNPACK(auv, i);
08ca2aa3 1766#elif IVSIZE == LONGSIZE
f337b084 1767 DO_BO_UNPACK(auv, l);
08ca2aa3 1768#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1769 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1770#else
1771 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1772#endif
08ca2aa3 1773 if (!checksum)
ac7f3b1b 1774 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1775 else if (checksum > bits_in_uv)
1776 cdouble += (NV)auv;
1777 else
1778 cuv += auv;
92d41999
JH
1779 }
1780 break;
49704364
LW
1781 case 'l' | TYPE_IS_SHRIEKING:
1782#if LONGSIZE != SIZE32
73cb7263 1783 while (len-- > 0) {
08ca2aa3 1784 long along;
f337b084
TH
1785 SHIFT_VAR(utf8, s, strend, along, datumtype);
1786 DO_BO_UNPACK(along, l);
08ca2aa3 1787 if (!checksum)
ac7f3b1b 1788 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1789 else if (checksum > bits_in_uv)
1790 cdouble += (NV)along;
1791 else
1792 cuv += along;
49704364
LW
1793 }
1794 break;
1795#else
1796 /* Fallthrough! */
a6ec74c1 1797#endif
49704364 1798 case 'l':
73cb7263 1799 while (len-- > 0) {
08ca2aa3
TH
1800 I32 ai32;
1801#if U32SIZE > SIZE32
1802 ai32 = 0;
1803#endif
f337b084 1804 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1805 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1806#if U32SIZE > SIZE32
08ca2aa3 1807 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1808#endif
08ca2aa3 1809 if (!checksum)
ac7f3b1b 1810 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)ai32;
1813 else
1814 cuv += ai32;
a6ec74c1
JH
1815 }
1816 break;
49704364
LW
1817 case 'L' | TYPE_IS_SHRIEKING:
1818#if LONGSIZE != SIZE32
73cb7263 1819 while (len-- > 0) {
08ca2aa3 1820 unsigned long aulong;
f337b084
TH
1821 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1822 DO_BO_UNPACK(aulong, l);
08ca2aa3 1823 if (!checksum)
ac7f3b1b 1824 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1825 else if (checksum > bits_in_uv)
1826 cdouble += (NV)aulong;
1827 else
1828 cuv += aulong;
49704364
LW
1829 }
1830 break;
1831#else
1832 /* Fall through! */
1833#endif
a6ec74c1
JH
1834 case 'V':
1835 case 'N':
1836 case 'L':
73cb7263 1837 while (len-- > 0) {
08ca2aa3
TH
1838 U32 au32;
1839#if U32SIZE > SIZE32
1840 au32 = 0;
1841#endif
f337b084 1842 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1843 DO_BO_UNPACK(au32, 32);
a6ec74c1 1844#ifdef HAS_NTOHL
73cb7263
NC
1845 if (datumtype == 'N')
1846 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1847#endif
1848#ifdef HAS_VTOHL
73cb7263
NC
1849 if (datumtype == 'V')
1850 au32 = vtohl(au32);
a6ec74c1 1851#endif
08ca2aa3 1852 if (!checksum)
fc241834
RGS
1853 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1854 else if (checksum > bits_in_uv)
1855 cdouble += (NV)au32;
1856 else
1857 cuv += au32;
a6ec74c1
JH
1858 }
1859 break;
7212898e 1860#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1861 case 'V' | TYPE_IS_SHRIEKING:
1862 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1863 while (len-- > 0) {
08ca2aa3
TH
1864 I32 ai32;
1865# if U32SIZE > SIZE32
1866 ai32 = 0;
1867# endif
f337b084 1868 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1869# ifdef HAS_NTOHL
73cb7263
NC
1870 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1871 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1872# endif
1873# ifdef HAS_VTOHL
73cb7263
NC
1874 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1875 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1876# endif
1877 if (!checksum)
ac7f3b1b 1878 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1879 else if (checksum > bits_in_uv)
1880 cdouble += (NV)ai32;
1881 else
1882 cuv += ai32;
068bd2e7
MHM
1883 }
1884 break;
08ca2aa3 1885#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1886 case 'p':
a6ec74c1 1887 while (len-- > 0) {
08ca2aa3 1888 char *aptr;
f337b084 1889 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
5512a2f9 1890 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1891 /* newSVpv generates undef if aptr is NULL */
1892 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1893 }
1894 break;
1895 case 'w':
a6ec74c1
JH
1896 {
1897 UV auv = 0;
1898 U32 bytes = 0;
fc241834 1899
08ca2aa3
TH
1900 while (len > 0 && s < strend) {
1901 U8 ch;
f337b084 1902 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1903 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1904 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1905 if (ch < 0x80) {
a6ec74c1 1906 bytes = 0;
ac7f3b1b 1907 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1908 len--;
1909 auv = 0;
08ca2aa3 1910 continue;
a6ec74c1 1911 }
08ca2aa3 1912 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1913 char *t;
1914 STRLEN n_a;
1915
1916 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1917 while (s < strend) {
f337b084 1918 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1919 sv = mul128(sv, (U8)(ch & 0x7f));
1920 if (!(ch & 0x80)) {
a6ec74c1
JH
1921 bytes = 0;
1922 break;
1923 }
1924 }
1925 t = SvPV(sv, n_a);
1926 while (*t == '0')
1927 t++;
1928 sv_chop(sv, t);
1929 PUSHs(sv_2mortal(sv));
1930 len--;
1931 auv = 0;
1932 }
1933 }
1934 if ((s >= strend) && bytes)
49704364 1935 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1936 }
1937 break;
1938 case 'P':
49704364
LW
1939 if (symptr->howlen == e_star)
1940 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1941 EXTEND(SP, 1);
08ca2aa3
TH
1942 if (sizeof(char*) <= strend - s) {
1943 char *aptr;
f337b084 1944 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
08ca2aa3 1945 DO_BO_UNPACK_P(aptr);
fc241834
RGS
1946 /* newSVpvn generates undef if aptr is NULL */
1947 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1948 }
a6ec74c1
JH
1949 break;
1950#ifdef HAS_QUAD
1951 case 'q':
73cb7263 1952 while (len-- > 0) {
08ca2aa3 1953 Quad_t aquad;
f337b084
TH
1954 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1955 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
1956 if (!checksum)
1957 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 1958 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)aquad;
1961 else
1962 cuv += aquad;
1963 }
a6ec74c1
JH
1964 break;
1965 case 'Q':
73cb7263 1966 while (len-- > 0) {
08ca2aa3 1967 Uquad_t auquad;
f337b084
TH
1968 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1969 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
1970 if (!checksum)
1971 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1972 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
1973 else if (checksum > bits_in_uv)
1974 cdouble += (NV)auquad;
1975 else
1976 cuv += auquad;
a6ec74c1
JH
1977 }
1978 break;
08ca2aa3 1979#endif /* HAS_QUAD */
a6ec74c1
JH
1980 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1981 case 'f':
73cb7263 1982 while (len-- > 0) {
08ca2aa3 1983 float afloat;
f337b084 1984 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1985 DO_BO_UNPACK_N(afloat, float);
1986 if (!checksum)
1987 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1988 else
73cb7263 1989 cdouble += afloat;
fc241834 1990 }
a6ec74c1
JH
1991 break;
1992 case 'd':
73cb7263 1993 while (len-- > 0) {
08ca2aa3 1994 double adouble;
f337b084 1995 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1996 DO_BO_UNPACK_N(adouble, double);
1997 if (!checksum)
1998 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
1999 else
73cb7263 2000 cdouble += adouble;
fc241834 2001 }
a6ec74c1 2002 break;
92d41999 2003 case 'F':
73cb7263 2004 while (len-- > 0) {
08ca2aa3 2005 NV anv;
f337b084 2006 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2007 DO_BO_UNPACK_N(anv, NV);
2008 if (!checksum)
2009 PUSHs(sv_2mortal(newSVnv(anv)));
2010 else
73cb7263 2011 cdouble += anv;
fc241834 2012 }
92d41999
JH
2013 break;
2014#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2015 case 'D':
73cb7263 2016 while (len-- > 0) {
08ca2aa3 2017 long double aldouble;
f337b084 2018 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2019 DO_BO_UNPACK_N(aldouble, long double);
2020 if (!checksum)
2021 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2022 else
2023 cdouble += aldouble;
92d41999
JH
2024 }
2025 break;
2026#endif
a6ec74c1
JH
2027 case 'u':
2028 /* MKS:
2029 * Initialise the decode mapping. By using a table driven
2030 * algorithm, the code will be character-set independent
2031 * (and just as fast as doing character arithmetic)
2032 */
2033 if (PL_uudmap['M'] == 0) {
2034 int i;
2035
2036 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2037 PL_uudmap[(U8)PL_uuemap[i]] = i;
2038 /*
2039 * Because ' ' and '`' map to the same value,
2040 * we need to decode them both the same.
2041 */
2042 PL_uudmap[' '] = 0;
2043 }
08ca2aa3
TH
2044 {
2045 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2046 sv = sv_2mortal(NEWSV(42, l));
2047 if (l) SvPOK_on(sv);
2048 }
2049 if (utf8) {
2050 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2051 I32 a, b, c, d;
2052 char hunk[4];
2053
2054 hunk[3] = '\0';
2055 while (len > 0) {
2056 next_uni_uu(aTHX_ &s, strend, &a);
2057 next_uni_uu(aTHX_ &s, strend, &b);
2058 next_uni_uu(aTHX_ &s, strend, &c);
2059 next_uni_uu(aTHX_ &s, strend, &d);
2060 hunk[0] = (char)((a << 2) | (b >> 4));
2061 hunk[1] = (char)((b << 4) | (c >> 2));
2062 hunk[2] = (char)((c << 6) | d);
2063 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2064 len -= 3;
2065 }
2066 if (s < strend) {
2067 if (*s == '\n') s++;
2068 else {
2069 /* possible checksum byte */
2070 char *skip = s+UTF8SKIP(s);
2071 if (skip < strend && *skip == '\n') s = skip+1;
2072 }
2073 }
2074 }
2075 } else {
fc241834
RGS
2076 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2077 I32 a, b, c, d;
2078 char hunk[4];
a6ec74c1 2079
fc241834
RGS
2080 hunk[3] = '\0';
2081 len = PL_uudmap[*(U8*)s++] & 077;
2082 while (len > 0) {
2083 if (s < strend && ISUUCHAR(*s))
2084 a = PL_uudmap[*(U8*)s++] & 077;
2085 else
2086 a = 0;
2087 if (s < strend && ISUUCHAR(*s))
2088 b = PL_uudmap[*(U8*)s++] & 077;
2089 else
2090 b = 0;
2091 if (s < strend && ISUUCHAR(*s))
2092 c = PL_uudmap[*(U8*)s++] & 077;
2093 else
2094 c = 0;
2095 if (s < strend && ISUUCHAR(*s))
2096 d = PL_uudmap[*(U8*)s++] & 077;
2097 else
2098 d = 0;
2099 hunk[0] = (char)((a << 2) | (b >> 4));
2100 hunk[1] = (char)((b << 4) | (c >> 2));
2101 hunk[2] = (char)((c << 6) | d);
2102 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2103 len -= 3;
2104 }
2105 if (*s == '\n')
2106 s++;
2107 else /* possible checksum byte */
2108 if (s + 1 < strend && s[1] == '\n')
2109 s += 2;
a6ec74c1 2110 }
08ca2aa3
TH
2111 }
2112 XPUSHs(sv);
a6ec74c1
JH
2113 break;
2114 }
49704364 2115
a6ec74c1 2116 if (checksum) {
1109a392 2117 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2118 (checksum > bits_in_uv &&
08ca2aa3
TH
2119 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2120 NV trouble, anv;
a6ec74c1 2121
08ca2aa3 2122 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2123 while (checksum >= 16) {
2124 checksum -= 16;
08ca2aa3 2125 anv *= 65536.0;
a6ec74c1 2126 }
a6ec74c1 2127 while (cdouble < 0.0)
08ca2aa3
TH
2128 cdouble += anv;
2129 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2130 sv = newSVnv(cdouble);
a6ec74c1
JH
2131 }
2132 else {
fa8ec7c1
NC
2133 if (checksum < bits_in_uv) {
2134 UV mask = ((UV)1 << checksum) - 1;
92d41999 2135 cuv &= mask;
a6ec74c1 2136 }
c4c5f44a 2137 sv = newSVuv(cuv);
a6ec74c1
JH
2138 }
2139 XPUSHs(sv_2mortal(sv));
2140 checksum = 0;
2141 }
fc241834 2142
49704364
LW
2143 if (symptr->flags & FLAG_SLASH){
2144 if (SP - PL_stack_base - start_sp_offset <= 0)
2145 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2146 if( next_symbol(symptr) ){
2147 if( symptr->howlen == e_number )
2148 Perl_croak(aTHX_ "Count after length/code in unpack" );
2149 if( beyond ){
2150 /* ...end of char buffer then no decent length available */
2151 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2152 } else {
2153 /* take top of stack (hope it's numeric) */
2154 len = POPi;
2155 if( len < 0 )
2156 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2157 }
2158 } else {
2159 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2160 }
2161 datumtype = symptr->code;
21c16052 2162 explicit_length = FALSE;
49704364
LW
2163 goto redo_switch;
2164 }
a6ec74c1 2165 }
49704364 2166
18529408
IZ
2167 if (new_s)
2168 *new_s = s;
2169 PUTBACK;
2170 return SP - PL_stack_base - start_sp_offset;
2171}
2172
2173PP(pp_unpack)
2174{
2175 dSP;
bab9c0ac 2176 dPOPPOPssrl;
18529408
IZ
2177 I32 gimme = GIMME_V;
2178 STRLEN llen;
2179 STRLEN rlen;
08ca2aa3
TH
2180 char *pat = SvPV(left, llen);
2181 char *s = SvPV(right, rlen);
18529408 2182 char *strend = s + rlen;
08ca2aa3
TH
2183 char *patend = pat + llen;
2184 I32 cnt;
18529408
IZ
2185
2186 PUTBACK;
7accc089 2187 cnt = unpackstring(pat, patend, s, strend,
49704364 2188 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2189 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2190
18529408
IZ
2191 SPAGAIN;
2192 if ( !cnt && gimme == G_SCALAR )
2193 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2194 RETURN;
2195}
2196
f337b084
TH
2197STATIC U8 *
2198doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2199{
f337b084 2200 *h++ = PL_uuemap[len];
a6ec74c1 2201 while (len > 2) {
f337b084
TH
2202 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2203 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2204 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2205 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2206 s += 3;
2207 len -= 3;
2208 }
2209 if (len > 0) {
2210 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2211 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2212 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2213 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2214 *h++ = PL_uuemap[0];
a6ec74c1 2215 }
f337b084
TH
2216 *h++ = '\n';
2217 return h;
a6ec74c1
JH
2218}
2219
2220STATIC SV *
2221S_is_an_int(pTHX_ char *s, STRLEN l)
2222{
2223 STRLEN n_a;
2224 SV *result = newSVpvn(s, l);
2225 char *result_c = SvPV(result, n_a); /* convenience */
2226 char *out = result_c;
2227 bool skip = 1;
2228 bool ignore = 0;
2229
2230 while (*s) {
2231 switch (*s) {
2232 case ' ':
2233 break;
2234 case '+':
2235 if (!skip) {
2236 SvREFCNT_dec(result);
2237 return (NULL);
2238 }
2239 break;
2240 case '0':
2241 case '1':
2242 case '2':
2243 case '3':
2244 case '4':
2245 case '5':
2246 case '6':
2247 case '7':
2248 case '8':
2249 case '9':
2250 skip = 0;
2251 if (!ignore) {
2252 *(out++) = *s;
2253 }
2254 break;
2255 case '.':
2256 ignore = 1;
2257 break;
2258 default:
2259 SvREFCNT_dec(result);
2260 return (NULL);
2261 }
2262 s++;
2263 }
2264 *(out++) = '\0';
2265 SvCUR_set(result, out - result_c);
2266 return (result);
2267}
2268
2269/* pnum must be '\0' terminated */
2270STATIC int
2271S_div128(pTHX_ SV *pnum, bool *done)
2272{
2273 STRLEN len;
2274 char *s = SvPV(pnum, len);
2275 int m = 0;
2276 int r = 0;
2277 char *t = s;
2278
2279 *done = 1;
2280 while (*t) {
2281 int i;
2282
2283 i = m * 10 + (*t - '0');
2284 m = i & 0x7F;
2285 r = (i >> 7); /* r < 10 */
2286 if (r) {
2287 *done = 0;
2288 }
2289 *(t++) = '0' + r;
2290 }
2291 *(t++) = '\0';
2292 SvCUR_set(pnum, (STRLEN) (t - s));
2293 return (m);
2294}
2295
49704364 2296
a6ec74c1 2297
18529408
IZ
2298/*
2299=for apidoc pack_cat
2300
7accc089
JH
2301The engine implementing pack() Perl function. Note: parameters next_in_list and
2302flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2303
2304=cut */
2305
49704364 2306
18529408
IZ
2307void
2308Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2309{
49704364
LW
2310 tempsym_t sym = { 0 };
2311 sym.patptr = pat;
2312 sym.patend = patend;
7accc089
JH
2313 sym.flags = FLAG_PACK;
2314
2315 (void)pack_rec( cat, &sym, beglist, endlist );
2316}
2317
2318
2319/*
2320=for apidoc packlist
2321
2322The engine implementing pack() Perl function.
2323
2324=cut */
2325
2326
2327void
2328Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2329{
f337b084 2330 STRLEN no_len;
7accc089 2331 tempsym_t sym = { 0 };
f337b084 2332
7accc089
JH
2333 sym.patptr = pat;
2334 sym.patend = patend;
2335 sym.flags = FLAG_PACK;
49704364 2336
f337b084
TH
2337 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2338 Also make sure any UTF8 flag is loaded */
2339 SvPV_force(cat, no_len);
2340 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2341
49704364
LW
2342 (void)pack_rec( cat, &sym, beglist, endlist );
2343}
2344
f337b084
TH
2345/* like sv_utf8_upgrade, but also repoint the group start markers */
2346STATIC void
2347marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2348 STRLEN len;
2349 tempsym_t *group;
2350 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2351
2352 if (SvUTF8(sv)) return;
2353
2354 from_start = SvPVX(sv);
2355 from_end = from_start + SvCUR(sv);
2356 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2357 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2358 if (from_ptr == from_end) {
2359 /* Simple case: no character needs to be changed */
2360 SvUTF8_on(sv);
2361 return;
2362 }
2363
3473cf63 2364 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2365 New('U', to_start, len, char);
2366 Copy(from_start, to_start, from_ptr-from_start, char);
2367 to_ptr = to_start + (from_ptr-from_start);
2368
2369 New('U', marks, sym_ptr->level+2, char *);
2370 for (group=sym_ptr; group; group = group->previous)
2371 marks[group->level] = from_start + group->strbeg;
2372 marks[sym_ptr->level+1] = from_end+1;
2373 for (m = marks; *m < from_ptr; m++)
2374 *m = to_start + (*m-from_start);
2375
2376 for (;from_ptr < from_end; from_ptr++) {
2377 while (*m == from_ptr) *m++ = to_ptr;
2378 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2379 }
2380 *to_ptr = 0;
2381
2382 while (*m == from_ptr) *m++ = to_ptr;
2383 if (m != marks + sym_ptr->level+1) {
2384 Safefree(marks);
2385 Safefree(to_start);
2386 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2387 }
2388 for (group=sym_ptr; group; group = group->previous)
2389 group->strbeg = marks[group->level] - to_start;
2390 Safefree(marks);
2391
2392 if (SvOOK(sv)) {
2393 if (SvIVX(sv)) {
2394 SvLEN(sv) += SvIVX(sv);
2395 from_start -= SvIVX(sv);
2396 SvIV_set(sv, 0);
2397 }
2398 SvFLAGS(sv) &= ~SVf_OOK;
2399 }
2400 if (SvLEN(sv) != 0)
2401 Safefree(from_start);
2402 SvPVX(sv) = to_start;
2403 SvCUR(sv) = to_ptr - to_start;
2404 SvLEN(sv) = len;
2405 SvUTF8_on(sv);
2406}
2407
2408/* Exponential string grower. Makes string extension effectively O(n)
2409 needed says how many extra bytes we need (not counting the final '\0')
2410 Only grows the string if there is an actual lack of space
2411*/
2412STATIC char *
2413sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2414 STRLEN cur = SvCUR(sv);
2415 STRLEN len = SvLEN(sv);
2416 STRLEN extend;
2417 if (len - cur > needed) return SvPVX(sv);
2418 extend = needed > len ? needed : len;
2419 return SvGROW(sv, len+extend+1);
2420}
49704364
LW
2421
2422STATIC
2423SV **
f337b084 2424S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2425{
49704364 2426 tempsym_t lookahead;
f337b084
TH
2427 I32 items = endlist - beglist;
2428 bool found = next_symbol(symptr);
2429 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2430
2431 if (symptr->level == 0 && found && symptr->code == 'U') {
2432 marked_upgrade(aTHX_ cat, symptr);
2433 symptr->flags |= FLAG_DO_UTF8;
2434 utf8 = 0;
49704364 2435 }
f337b084 2436 symptr->strbeg = SvCUR(cat);
49704364
LW
2437
2438 while (found) {
f337b084
TH
2439 SV *fromstr;
2440 STRLEN fromlen;
2441 I32 len;
a6ec74c1 2442 SV *lengthcode = Nullsv;
49704364 2443 I32 datumtype = symptr->code;
f337b084
TH
2444 howlen_t howlen = symptr->howlen;
2445 char *start = SvPVX(cat);
2446 char *cur = start + SvCUR(cat);
49704364 2447
f337b084
TH
2448#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2449
2450 switch (howlen) {
fc241834 2451 case e_star:
f337b084
TH
2452 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2453 0 : items;
2454 break;
2455 default:
2456 /* e_no_len and e_number */
2457 len = symptr->length;
49704364
LW
2458 break;
2459 }
2460
f337b084 2461 if (len) {
a7a3cfaa 2462 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2463
a7a3cfaa
TH
2464 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2465 /* We can process this letter. */
2466 STRLEN size = props & PACK_SIZE_MASK;
2467 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2468 }
f337b084
TH
2469 }
2470
49704364
LW
2471 /* Look ahead for next symbol. Do we have code/code? */
2472 lookahead = *symptr;
2473 found = next_symbol(&lookahead);
2474 if ( symptr->flags & FLAG_SLASH ) {
f337b084 2475 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
49704364
LW
2476 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2477 e_star != lookahead.howlen )
2478 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
f337b084
TH
2479 lengthcode =
2480 sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
a6ec74c1 2481 }
49704364 2482
fc241834
RGS
2483 /* Code inside the switch must take care to properly update
2484 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2485 doesn't simply leave using break */
1109a392 2486 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2487 default:
f337b084
TH
2488 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2489 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2490 case '%':
49704364 2491 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2492 case '@':
f337b084
TH
2493 if (utf8) {
2494 char *s = start + symptr->strbeg;
2495 while (len > 0 && s < cur) {
2496 s += UTF8SKIP(s);
2497 len--;
2498 }
2499 if (s > cur)
2500 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2501 if (len > 0) {
2502 grow:
2503 GROWING(0, cat, start, cur, len);
2504 Zero(cur, len, char);
2505 cur += len;
2506 } else if (s < cur) cur = s;
2507 else goto no_change;
2508 } else {
2509 len -= cur - (start+symptr->strbeg);
2510 if (len > 0) goto grow;
fc241834 2511 len = -len;
f337b084
TH
2512 if (len > 0) goto shrink;
2513 else goto no_change;
2514 }
a6ec74c1 2515 break;
fc241834 2516 case '(': {
49704364 2517 tempsym_t savsym = *symptr;
66c611c5
MHM
2518 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2519 symptr->flags |= group_modifiers;
49704364
LW
2520 symptr->patend = savsym.grpend;
2521 symptr->level++;
f337b084 2522 symptr->previous = &lookahead;
18529408 2523 while (len--) {
f337b084
TH
2524 U32 was_utf8;
2525 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2526 else symptr->flags &= ~FLAG_PARSE_UTF8;
2527 was_utf8 = SvUTF8(cat);
49704364 2528 symptr->patptr = savsym.grpbeg;
f337b084
TH
2529 beglist = pack_rec(cat, symptr, beglist, endlist);
2530 if (SvUTF8(cat) != was_utf8)
2531 /* This had better be an upgrade while in utf8==0 mode */
2532 utf8 = 1;
2533
49704364 2534 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2535 break; /* No way to continue */
2536 }
f337b084
TH
2537 lookahead.flags = symptr->flags & ~group_modifiers;
2538 goto no_change;
18529408 2539 }
62f95557
IZ
2540 case 'X' | TYPE_IS_SHRIEKING:
2541 if (!len) /* Avoid division by 0 */
2542 len = 1;
f337b084
TH
2543 if (utf8) {
2544 char *hop, *last;
2545 I32 l = len;
2546 hop = last = start;
2547 while (hop < cur) {
2548 hop += UTF8SKIP(hop);
2549 if (--l == 0) {
2550 last = hop;
2551 l = len;
2552 }
2553 }
2554 if (last > cur)
2555 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2556 cur = last;
2557 break;
2558 }
2559 len = (cur-start) % len;
62f95557 2560 /* FALL THROUGH */
a6ec74c1 2561 case 'X':
f337b084
TH
2562 if (utf8) {
2563 if (len < 1) goto no_change;
2564 while (len > 0) {
2565 if (cur <= start)
2566 Perl_croak(aTHX_ "'X' outside of string in pack");
2567 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2568 if (cur <= start)
2569 Perl_croak(aTHX_ "'X' outside of string in pack");
2570 }
2571 len--;
2572 }
2573 } else {
fc241834 2574 shrink:
f337b084 2575 if (cur - start < len)
fc241834 2576 Perl_croak(aTHX_ "'X' outside of string in pack");
f337b084
TH
2577 cur -= len;
2578 }
2579 if (cur < start+symptr->strbeg) {
2580 /* Make sure group starts don't point into the void */
2581 tempsym_t *group;
2582 STRLEN length = cur-start;
2583 for (group = symptr;
2584 group && length < group->strbeg;
2585 group = group->previous) group->strbeg = length;
2586 lookahead.strbeg = length;
2587 }
a6ec74c1 2588 break;
fc241834
RGS
2589 case 'x' | TYPE_IS_SHRIEKING: {
2590 I32 ai32;
62f95557
IZ
2591 if (!len) /* Avoid division by 0 */
2592 len = 1;
fc241834
RGS
2593 if (utf8) ai32 = utf8_length(start, cur) % len;
2594 else ai32 = (cur - start) % len;
2595 if (ai32 == 0) goto no_change;
2596 len -= ai32;
2597 }
2598 /* FALL THROUGH */
a6ec74c1 2599 case 'x':
f337b084 2600 goto grow;
a6ec74c1
JH
2601 case 'A':
2602 case 'Z':
f337b084
TH
2603 case 'a': {
2604 char *aptr;
2605
a6ec74c1
JH
2606 fromstr = NEXTFROM;
2607 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2608 if (DO_UTF8(fromstr)) {
2609 char *end, *s;
2610
2611 if (!utf8 && !SvUTF8(cat)) {
2612 marked_upgrade(aTHX_ cat, symptr);
2613 lookahead.flags |= FLAG_DO_UTF8;
2614 lookahead.strbeg = symptr->strbeg;
2615 utf8 = 1;
2616 start = SvPVX(cat);
2617 cur = start + SvCUR(cat);
2618 }
fc241834 2619 if (howlen == e_star) {
f337b084
TH
2620 if (utf8) goto string_copy;
2621 len = fromlen+1;
2622 }
2623 s = aptr;
2624 end = aptr + fromlen;
2625 fromlen = datumtype == 'Z' ? len-1 : len;
2626 while ((I32) fromlen > 0 && s < end) {
2627 s += UTF8SKIP(s);
2628 fromlen--;
2629 }
2630 if (s > end)
2631 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2632 if (utf8) {
fc241834 2633 len = fromlen;
f337b084
TH
2634 if (datumtype == 'Z') len++;
2635 fromlen = s-aptr;
2636 len += fromlen;
fc241834 2637
f337b084 2638 goto string_copy;
fc241834 2639 }
f337b084
TH
2640 fromlen = len - fromlen;
2641 if (datumtype == 'Z') fromlen--;
2642 if (howlen == e_star) {
2643 len = fromlen;
2644 if (datumtype == 'Z') len++;
fc241834 2645 }
f337b084 2646 GROWING(0, cat, start, cur, len);
fc241834 2647 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2648 datumtype | TYPE_IS_PACK))
2649 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2650 cur += fromlen;
a6ec74c1 2651 len -= fromlen;
f337b084
TH
2652 } else if (utf8) {
2653 if (howlen == e_star) {
2654 len = fromlen;
2655 if (datumtype == 'Z') len++;
a6ec74c1 2656 }
f337b084
TH
2657 if (len <= (I32) fromlen) {
2658 fromlen = len;
2659 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2660 }
fc241834 2661 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2662 upgrade, so:
2663 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2664 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2665 len -= fromlen;
2666 while (fromlen > 0) {
2667 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2668 aptr++;
2669 fromlen--;
fc241834 2670 }
f337b084
TH
2671 } else {
2672 string_copy:
2673 if (howlen == e_star) {
2674 len = fromlen;
2675 if (datumtype == 'Z') len++;
2676 }
2677 if (len <= (I32) fromlen) {
2678 fromlen = len;
2679 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2680 }
f337b084
TH
2681 GROWING(0, cat, start, cur, len);
2682 Copy(aptr, cur, fromlen, char);
2683 cur += fromlen;
2684 len -= fromlen;
a6ec74c1 2685 }
f337b084
TH
2686 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2687 cur += len;
a6ec74c1 2688 break;
f337b084 2689 }
a6ec74c1 2690 case 'B':
f337b084
TH
2691 case 'b': {
2692 char *str, *end;
2693 I32 l, field_len;
2694 U8 bits;
2695 bool utf8_source;
2696 U32 utf8_flags;
a6ec74c1 2697
fc241834
RGS
2698 fromstr = NEXTFROM;
2699 str = SvPV(fromstr, fromlen);
f337b084
TH
2700 end = str + fromlen;
2701 if (DO_UTF8(fromstr)) {
2702 utf8_source = TRUE;
2703 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2704 } else {
2705 utf8_source = FALSE;
2706 utf8_flags = 0; /* Unused, but keep compilers happy */
2707 }
2708 if (howlen == e_star) len = fromlen;
2709 field_len = (len+7)/8;
2710 GROWING(utf8, cat, start, cur, field_len);
2711 if (len > (I32)fromlen) len = fromlen;
2712 bits = 0;
2713 l = 0;
2714 if (datumtype == 'B')
2715 while (l++ < len) {
2716 if (utf8_source) {
2717 UV val;
2718 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2719 bits |= val & 1;
2720 } else bits |= *str++ & 1;
2721 if (l & 7) bits <<= 1;
fc241834 2722 else {
f337b084
TH
2723 PUSH_BYTE(utf8, cur, bits);
2724 bits = 0;
a6ec74c1
JH
2725 }
2726 }
f337b084
TH
2727 else
2728 /* datumtype == 'b' */
2729 while (l++ < len) {
2730 if (utf8_source) {
2731 UV val;
2732 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2733 if (val & 1) bits |= 0x80;
2734 } else if (*str++ & 1)
2735 bits |= 0x80;
2736 if (l & 7) bits >>= 1;
fc241834 2737 else {
f337b084
TH
2738 PUSH_BYTE(utf8, cur, bits);
2739 bits = 0;
a6ec74c1
JH
2740 }
2741 }
f337b084
TH
2742 l--;
2743 if (l & 7) {
fc241834 2744 if (datumtype == 'B')
f337b084 2745 bits <<= 7 - (l & 7);
fc241834 2746 else
f337b084
TH
2747 bits >>= 7 - (l & 7);
2748 PUSH_BYTE(utf8, cur, bits);
2749 l += 7;
a6ec74c1 2750 }
f337b084
TH
2751 /* Determine how many chars are left in the requested field */
2752 l /= 8;
2753 if (howlen == e_star) field_len = 0;
2754 else field_len -= l;
2755 Zero(cur, field_len, char);
2756 cur += field_len;
a6ec74c1 2757 break;
f337b084 2758 }
a6ec74c1 2759 case 'H':
f337b084
TH
2760 case 'h': {
2761 char *str, *end;
2762 I32 l, field_len;
2763 U8 bits;
2764 bool utf8_source;
2765 U32 utf8_flags;
a6ec74c1 2766
fc241834
RGS
2767 fromstr = NEXTFROM;
2768 str = SvPV(fromstr, fromlen);
f337b084
TH
2769 end = str + fromlen;
2770 if (DO_UTF8(fromstr)) {
2771 utf8_source = TRUE;
2772 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2773 } else {
2774 utf8_source = FALSE;
2775 utf8_flags = 0; /* Unused, but keep compilers happy */
2776 }
2777 if (howlen == e_star) len = fromlen;
2778 field_len = (len+1)/2;
2779 GROWING(utf8, cat, start, cur, field_len);
2780 if (!utf8 && len > (I32)fromlen) len = fromlen;
2781 bits = 0;
2782 l = 0;
2783 if (datumtype == 'H')
2784 while (l++ < len) {
2785 if (utf8_source) {
2786 UV val;
2787 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2788 if (val < 256 && isALPHA(val))
2789 bits |= (val + 9) & 0xf;
a6ec74c1 2790 else
f337b084
TH
2791 bits |= val & 0xf;
2792 } else if (isALPHA(*str))
2793 bits |= (*str++ + 9) & 0xf;
2794 else
2795 bits |= *str++ & 0xf;
2796 if (l & 1) bits <<= 4;
fc241834 2797 else {
f337b084
TH
2798 PUSH_BYTE(utf8, cur, bits);
2799 bits = 0;
a6ec74c1
JH
2800 }
2801 }
f337b084
TH
2802 else
2803 while (l++ < len) {
2804 if (utf8_source) {
2805 UV val;
2806 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2807 if (val < 256 && isALPHA(val))
2808 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2809 else
f337b084
TH
2810 bits |= (val & 0xf) << 4;
2811 } else if (isALPHA(*str))
2812 bits |= ((*str++ + 9) & 0xf) << 4;
2813 else
2814 bits |= (*str++ & 0xf) << 4;
2815 if (l & 1) bits >>= 4;
fc241834 2816 else {
f337b084
TH
2817 PUSH_BYTE(utf8, cur, bits);
2818 bits = 0;
a6ec74c1 2819 }
fc241834 2820 }
f337b084
TH
2821 l--;
2822 if (l & 1) {
2823 PUSH_BYTE(utf8, cur, bits);
2824 l++;
2825 }
2826 /* Determine how many chars are left in the requested field */
2827 l /= 2;
2828 if (howlen == e_star) field_len = 0;
2829 else field_len -= l;
2830 Zero(cur, field_len, char);
2831 cur += field_len;
2832 break;
fc241834
RGS
2833 }
2834 case 'c':
f337b084
TH
2835 while (len-- > 0) {
2836 IV aiv;
2837 fromstr = NEXTFROM;
2838 aiv = SvIV(fromstr);
2839 if ((-128 > aiv || aiv > 127) &&
2840 ckWARN(WARN_PACK))
2841 Perl_warner(aTHX_ packWARN(WARN_PACK),
2842 "Character in 'c' format wrapped in pack");
2843 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2844 }
2845 break;
2846 case 'C':
f337b084
TH
2847 if (len == 0) {
2848 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2849 break;
2850 }
2851 GROWING(0, cat, start, cur, len);
a6ec74c1 2852 while (len-- > 0) {
f337b084 2853 IV aiv;
a6ec74c1 2854 fromstr = NEXTFROM;
f337b084
TH
2855 aiv = SvIV(fromstr);
2856 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2857 ckWARN(WARN_PACK))
2858 Perl_warner(aTHX_ packWARN(WARN_PACK),
2859 "Character in 'C' format wrapped in pack");
f337b084
TH
2860 *cur++ = aiv & 0xff;
2861 }
fc241834
RGS
2862 break;
2863 case 'W': {
2864 char *end;
2865 U8 in_bytes = IN_BYTES;
2866
2867 end = start+SvLEN(cat)-1;
2868 if (utf8) end -= UTF8_MAXLEN-1;
2869 while (len-- > 0) {
2870 UV auv;
2871 fromstr = NEXTFROM;
2872 auv = SvUV(fromstr);
2873 if (in_bytes) auv = auv % 0x100;
2874 if (utf8) {
2875 W_utf8:
2876 if (cur > end) {
2877 *cur = '\0';
2878 SvCUR(cat) = cur - start;
2879
2880 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2881 end = start+SvLEN(cat)-UTF8_MAXLEN;
2882 }
2883 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2884 ckWARN(WARN_UTF8) ?
2885 0 : UNICODE_ALLOW_ANY);
2886 } else {
2887 if (auv >= 0x100) {
2888 if (!SvUTF8(cat)) {
2889 *cur = '\0';
2890 SvCUR(cat) = cur - start;
2891 marked_upgrade(aTHX_ cat, symptr);
2892 lookahead.flags |= FLAG_DO_UTF8;
2893 lookahead.strbeg = symptr->strbeg;
2894 utf8 = 1;
2895 start = SvPVX(cat);
2896 cur = start + SvCUR(cat);
2897 end = start+SvLEN(cat)-UTF8_MAXLEN;
2898 goto W_utf8;
2899 }
2900 if (ckWARN(WARN_PACK))
2901 Perl_warner(aTHX_ packWARN(WARN_PACK),
2902 "Character in 'W' format wrapped in pack");
2903 auv &= 0xff;
2904 }
2905 if (cur >= end) {
2906 *cur = '\0';
2907 SvCUR(cat) = cur - start;
2908 GROWING(0, cat, start, cur, len+1);
2909 end = start+SvLEN(cat)-1;
2910 }
2911 *(U8 *) cur++ = auv;
a6ec74c1
JH
2912 }
2913 }
2914 break;
fc241834
RGS
2915 }
2916 case 'U': {
2917 char *end;
2918
2919 if (len == 0) {
2920 if (!(symptr->flags & FLAG_DO_UTF8)) {
2921 marked_upgrade(aTHX_ cat, symptr);
2922 lookahead.flags |= FLAG_DO_UTF8;
2923 lookahead.strbeg = symptr->strbeg;
2924 }
2925 utf8 = 0;
2926 goto no_change;
2927 }
2928
2929 end = start+SvLEN(cat);
2930 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2931 while (len-- > 0) {
fc241834 2932 UV auv;
a6ec74c1 2933 fromstr = NEXTFROM;
fc241834
RGS
2934 auv = SvUV(fromstr);
2935 if (utf8) {
2936 char buffer[UTF8_MAXLEN], *endb;
2937 endb = uvuni_to_utf8_flags(buffer, auv,
2938 ckWARN(WARN_UTF8) ?
2939 0 : UNICODE_ALLOW_ANY);
2940 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2941 *cur = '\0';
2942 SvCUR(cat) = cur - start;
2943 GROWING(0, cat, start, cur,
2944 len+(endb-buffer)*UTF8_EXPAND);
2945 end = start+SvLEN(cat);
2946 }
2947 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2948 } else {
2949 if (cur >= end) {
2950 *cur = '\0';
2951 SvCUR(cat) = cur - start;
2952 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2953 end = start+SvLEN(cat)-UTF8_MAXLEN;
2954 }
2955 cur = uvuni_to_utf8_flags(cur, auv,
2956 ckWARN(WARN_UTF8) ?
2957 0 : UNICODE_ALLOW_ANY);
2958 }
a6ec74c1 2959 }
a6ec74c1 2960 break;
fc241834 2961 }
a6ec74c1
JH
2962 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2963 case 'f':
a6ec74c1 2964 while (len-- > 0) {
f337b084
TH
2965 float afloat;
2966 NV anv;
a6ec74c1 2967 fromstr = NEXTFROM;
f337b084 2968 anv = SvNV(fromstr);
5cdb9e01 2969#ifdef __VOS__
f337b084 2970 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
2971 during conversion from double to float into infinity, so we
2972 do it by hand. This code should either be generalized for
2973 any OS that needs it, or removed if and when VOS implements
2974 posix-976 (suggestion to support mapping to infinity).
2975 Paul.Green@stratus.com 02-04-02. */
f337b084 2976 if (anv > FLT_MAX)
fc241834 2977 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 2978 else if (anv < -FLT_MAX)
fc241834 2979 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
2980 else afloat = (float) anv;
2981#else /* __VOS__ */
baf3cf9c 2982# if defined(VMS) && !defined(__IEEE_FP)
f337b084 2983 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2984 * on Alpha; fake it if we don't have them.
2985 */
f337b084 2986 if (anv > FLT_MAX)
fc241834 2987 afloat = FLT_MAX;
f337b084 2988 else if (anv < -FLT_MAX)
fc241834 2989 afloat = -FLT_MAX;
f337b084 2990 else afloat = (float)anv;
baf3cf9c 2991# else
f337b084 2992 afloat = (float)anv;
baf3cf9c 2993# endif
f337b084 2994#endif /* __VOS__ */
1109a392 2995 DO_BO_PACK_N(afloat, float);
f337b084 2996 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2997 }
2998 break;
2999 case 'd':
a6ec74c1 3000 while (len-- > 0) {
f337b084
TH
3001 double adouble;
3002 NV anv;
a6ec74c1 3003 fromstr = NEXTFROM;
f337b084 3004 anv = SvNV(fromstr);
5cdb9e01 3005#ifdef __VOS__
f337b084 3006 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3007 during conversion from long double to double into infinity,
3008 so we do it by hand. This code should either be generalized
3009 for any OS that needs it, or removed if and when VOS
3010 implements posix-976 (suggestion to support mapping to
3011 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3012 if (anv > DBL_MAX)
fc241834 3013 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3014 else if (anv < -DBL_MAX)
fc241834 3015 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3016 else adouble = (double) anv;
3017#else /* __VOS__ */
baf3cf9c 3018# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3019 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3020 * on Alpha; fake it if we don't have them.
3021 */
f337b084 3022 if (anv > DBL_MAX)
fc241834 3023 adouble = DBL_MAX;
f337b084 3024 else if (anv < -DBL_MAX)
fc241834 3025 adouble = -DBL_MAX;
f337b084 3026 else adouble = (double)anv;
baf3cf9c 3027# else
f337b084 3028 adouble = (double)anv;
baf3cf9c 3029# endif
f337b084 3030#endif /* __VOS__ */
1109a392 3031 DO_BO_PACK_N(adouble, double);
f337b084 3032 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3033 }
3034 break;
fc241834
RGS
3035 case 'F': {
3036 NV anv;
1109a392 3037 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3038 while (len-- > 0) {
3039 fromstr = NEXTFROM;
3040 anv = SvNV(fromstr);
1109a392 3041 DO_BO_PACK_N(anv, NV);
fc241834 3042 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3043 }
3044 break;
fc241834 3045 }
92d41999 3046#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3047 case 'D': {
3048 long double aldouble;
1109a392
MHM
3049 /* long doubles can have unused bits, which may be nonzero */
3050 Zero(&aldouble, 1, long double);
92d41999
JH
3051 while (len-- > 0) {
3052 fromstr = NEXTFROM;
3053 aldouble = (long double)SvNV(fromstr);
1109a392 3054 DO_BO_PACK_N(aldouble, long double);
fc241834 3055 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3056 }
3057 break;
fc241834 3058 }
92d41999 3059#endif
7212898e 3060#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3061 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3062#endif
a6ec74c1
JH
3063 case 'n':
3064 while (len-- > 0) {
f337b084 3065 I16 ai16;
a6ec74c1 3066 fromstr = NEXTFROM;
ef108786 3067 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3068#ifdef HAS_HTONS
ef108786 3069 ai16 = PerlSock_htons(ai16);
a6ec74c1 3070#endif
f337b084 3071 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3072 }
3073 break;
7212898e 3074#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3075 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3076#endif
a6ec74c1
JH
3077 case 'v':
3078 while (len-- > 0) {
f337b084 3079 I16 ai16;
a6ec74c1 3080 fromstr = NEXTFROM;
ef108786 3081 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3082#ifdef HAS_HTOVS
ef108786 3083 ai16 = htovs(ai16);
a6ec74c1 3084#endif
f337b084 3085 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3086 }
3087 break;
49704364 3088 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3089#if SHORTSIZE != SIZE16
fc241834 3090 while (len-- > 0) {
f337b084 3091 unsigned short aushort;
fc241834
RGS
3092 fromstr = NEXTFROM;
3093 aushort = SvUV(fromstr);
3094 DO_BO_PACK(aushort, s);
f337b084 3095 PUSH_VAR(utf8, cur, aushort);
fc241834 3096 }
49704364
LW
3097 break;
3098#else
3099 /* Fall through! */
a6ec74c1 3100#endif
49704364 3101 case 'S':
fc241834 3102 while (len-- > 0) {
f337b084 3103 U16 au16;
fc241834
RGS
3104 fromstr = NEXTFROM;
3105 au16 = (U16)SvUV(fromstr);
3106 DO_BO_PACK(au16, 16);
f337b084 3107 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3108 }
3109 break;
49704364 3110 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3111#if SHORTSIZE != SIZE16
fc241834 3112 while (len-- > 0) {
f337b084 3113 short ashort;
fc241834
RGS
3114 fromstr = NEXTFROM;
3115 ashort = SvIV(fromstr);
3116 DO_BO_PACK(ashort, s);
f337b084 3117 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3118 }
49704364
LW
3119 break;
3120#else
3121 /* Fall through! */
a6ec74c1 3122#endif
49704364
LW
3123 case 's':
3124 while (len-- > 0) {
f337b084 3125 I16 ai16;
49704364 3126 fromstr = NEXTFROM;
ef108786
MHM
3127 ai16 = (I16)SvIV(fromstr);
3128 DO_BO_PACK(ai16, 16);
f337b084 3129 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3130 }
3131 break;
3132 case 'I':
49704364 3133 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3134 while (len-- > 0) {
f337b084 3135 unsigned int auint;
a6ec74c1
JH
3136 fromstr = NEXTFROM;
3137 auint = SvUV(fromstr);
1109a392 3138 DO_BO_PACK(auint, i);
f337b084 3139 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3140 }
3141 break;
92d41999
JH
3142 case 'j':
3143 while (len-- > 0) {
f337b084 3144 IV aiv;
92d41999
JH
3145 fromstr = NEXTFROM;
3146 aiv = SvIV(fromstr);
1109a392
MHM
3147#if IVSIZE == INTSIZE
3148 DO_BO_PACK(aiv, i);
3149#elif IVSIZE == LONGSIZE
3150 DO_BO_PACK(aiv, l);
3151#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3152 DO_BO_PACK(aiv, 64);
f337b084
TH
3153#else
3154 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3155#endif
f337b084 3156 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3157 }
3158 break;
3159 case 'J':
3160 while (len-- > 0) {
f337b084 3161 UV auv;
92d41999
JH
3162 fromstr = NEXTFROM;
3163 auv = SvUV(fromstr);
1109a392
MHM
3164#if UVSIZE == INTSIZE
3165 DO_BO_PACK(auv, i);
3166#elif UVSIZE == LONGSIZE
3167 DO_BO_PACK(auv, l);
3168#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3169 DO_BO_PACK(auv, 64);
f337b084
TH
3170#else
3171 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3172#endif
f337b084 3173 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3174 }
3175 break;
a6ec74c1
JH
3176 case 'w':
3177 while (len-- > 0) {
f337b084 3178 NV anv;
a6ec74c1 3179 fromstr = NEXTFROM;
15e9f109 3180 anv = SvNV(fromstr);
a6ec74c1 3181
f337b084
TH
3182 if (anv < 0) {
3183 *cur = '\0';
3184 SvCUR(cat) = cur - start;
49704364 3185 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3186 }
a6ec74c1 3187
196b62db
NC
3188 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3189 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3190 any negative IVs will have already been got by the croak()
3191 above. IOK is untrue for fractions, so we test them
3192 against UV_MAX_P1. */
f337b084
TH
3193 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3194 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3195 char *in = buf + sizeof(buf);
196b62db 3196 UV auv = SvUV(fromstr);
a6ec74c1
JH
3197
3198 do {
eb160463 3199 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3200 auv >>= 7;
3201 } while (auv);
3202 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3203 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3204 in, (buf + sizeof(buf)) - in);
3205 } else if (SvPOKp(fromstr))
3206 goto w_string;
a6ec74c1 3207 else if (SvNOKp(fromstr)) {
0258719b
NC
3208 /* 10**NV_MAX_10_EXP is the largest power of 10
3209 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3210 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3211 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3212 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3213 Some C compilers are strict about integral constant
3214 expressions so we conservatively divide by a slightly
3215 smaller integer instead of multiplying by the exact
3216 floating-point value.
0258719b
NC
3217 */
3218#ifdef NV_MAX_10_EXP
f337b084 3219 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3220 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3221#else
f337b084 3222 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3223 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3224#endif
a6ec74c1
JH
3225 char *in = buf + sizeof(buf);
3226
15e9f109 3227 anv = Perl_floor(anv);
a6ec74c1 3228 do {
15e9f109 3229 NV next = Perl_floor(anv / 128);
a6ec74c1 3230 if (in <= buf) /* this cannot happen ;-) */
49704364 3231 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3232 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3233 anv = next;
3234 } while (anv > 0);
a6ec74c1 3235 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3236 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3237 in, (buf + sizeof(buf)) - in);
3238 } else {
735b914b
JH
3239 char *from, *result, *in;
3240 SV *norm;
3241 STRLEN len;
3242 bool done;
3243
f337b084 3244 w_string:
735b914b
JH
3245 /* Copy string and check for compliance */
3246 from = SvPV(fromstr, len);
3247 if ((norm = is_an_int(from, len)) == NULL)
49704364 3248 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3249
3250 New('w', result, len, char);
3251 in = result + len;
3252 done = FALSE;
f337b084 3253 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3254 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3255 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3256 in, (result + len) - in);
735b914b
JH
3257 Safefree(result);
3258 SvREFCNT_dec(norm); /* free norm */
fc241834 3259 }
a6ec74c1
JH
3260 }
3261 break;
3262 case 'i':
49704364 3263 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3264 while (len-- > 0) {
f337b084 3265 int aint;
a6ec74c1
JH
3266 fromstr = NEXTFROM;
3267 aint = SvIV(fromstr);
1109a392 3268 DO_BO_PACK(aint, i);
f337b084 3269 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3270 }
3271 break;
7212898e 3272#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3273 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3274#endif
a6ec74c1
JH
3275 case 'N':
3276 while (len-- > 0) {
f337b084 3277 U32 au32;
a6ec74c1 3278 fromstr = NEXTFROM;
ef108786 3279 au32 = SvUV(fromstr);
a6ec74c1 3280#ifdef HAS_HTONL
ef108786 3281 au32 = PerlSock_htonl(au32);
a6ec74c1 3282#endif
f337b084 3283 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3284 }
3285 break;
7212898e 3286#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3287 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3288#endif
a6ec74c1
JH
3289 case 'V':
3290 while (len-- > 0) {
f337b084 3291 U32 au32;
a6ec74c1 3292 fromstr = NEXTFROM;
ef108786 3293 au32 = SvUV(fromstr);
a6ec74c1 3294#ifdef HAS_HTOVL
ef108786 3295 au32 = htovl(au32);
a6ec74c1 3296#endif
f337b084 3297 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3298 }
3299 break;
49704364 3300 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3301#if LONGSIZE != SIZE32
fc241834 3302 while (len-- > 0) {
f337b084 3303 unsigned long aulong;
fc241834
RGS
3304 fromstr = NEXTFROM;
3305 aulong = SvUV(fromstr);
3306 DO_BO_PACK(aulong, l);
f337b084 3307 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3308 }
49704364
LW
3309 break;
3310#else
3311 /* Fall though! */
a6ec74c1 3312#endif
49704364 3313 case 'L':
fc241834 3314 while (len-- > 0) {
f337b084 3315 U32 au32;
fc241834
RGS
3316 fromstr = NEXTFROM;
3317 au32 = SvUV(fromstr);
3318 DO_BO_PACK(au32, 32);
f337b084 3319 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3320 }
3321 break;
49704364 3322 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3323#if LONGSIZE != SIZE32
fc241834 3324 while (len-- > 0) {
f337b084 3325 long along;
fc241834
RGS
3326 fromstr = NEXTFROM;
3327 along = SvIV(fromstr);
3328 DO_BO_PACK(along, l);
f337b084 3329 PUSH_VAR(utf8, cur, along);
a6ec74c1 3330 }
49704364
LW
3331 break;
3332#else
3333 /* Fall though! */
a6ec74c1 3334#endif
49704364
LW
3335 case 'l':
3336 while (len-- > 0) {
f337b084 3337 I32 ai32;
49704364 3338 fromstr = NEXTFROM;
ef108786
MHM
3339 ai32 = SvIV(fromstr);
3340 DO_BO_PACK(ai32, 32);
f337b084 3341 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3342 }
3343 break;
3344#ifdef HAS_QUAD
3345 case 'Q':
3346 while (len-- > 0) {
f337b084 3347 Uquad_t auquad;
a6ec74c1 3348 fromstr = NEXTFROM;
f337b084 3349 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3350 DO_BO_PACK(auquad, 64);
f337b084 3351 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3352 }
3353 break;
3354 case 'q':
3355 while (len-- > 0) {
f337b084 3356 Quad_t aquad;
a6ec74c1
JH
3357 fromstr = NEXTFROM;
3358 aquad = (Quad_t)SvIV(fromstr);
1109a392 3359 DO_BO_PACK(aquad, 64);
f337b084 3360 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3361 }
3362 break;
f337b084 3363#endif /* HAS_QUAD */
a6ec74c1
JH
3364 case 'P':
3365 len = 1; /* assume SV is correct length */
f337b084 3366 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3367 /* Fall through! */
a6ec74c1
JH
3368 case 'p':
3369 while (len-- > 0) {
f337b084
TH
3370 char *aptr;
3371
a6ec74c1 3372 fromstr = NEXTFROM;
28a4f200 3373 SvGETMAGIC(fromstr);
3374 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3375 else {
3376 STRLEN n_a;
3377 /* XXX better yet, could spirit away the string to
3378 * a safe spot and hang on to it until the result
3379 * of pack() (and all copies of the result) are
3380 * gone.
3381 */
f337b084
TH
3382 if (ckWARN(WARN_PACK) &&
3383 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3384 !SvREADONLY(fromstr)))) {
9014280d 3385 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3386 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3387 }
3388 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3389 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3390 else
28a4f200 3391 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3392 }
1109a392 3393 DO_BO_PACK_P(aptr);
f337b084 3394 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3395 }
3396 break;
fc241834
RGS
3397 case 'u': {
3398 char *aptr, *aend;
3399 bool from_utf8;
f337b084 3400
a6ec74c1 3401 fromstr = NEXTFROM;
fc241834
RGS
3402 if (len <= 2) len = 45;
3403 else len = len / 3 * 3;
3404 if (len >= 64) {
3405 Perl_warner(aTHX_ packWARN(WARN_PACK),
3406 "Field too wide in 'u' format in pack");
3407 len = 63;
3408 }
a6ec74c1 3409 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3410 from_utf8 = DO_UTF8(fromstr);
3411 if (from_utf8) {
3412 aend = aptr + fromlen;
3413 fromlen = sv_len_utf8(fromstr);
3414 } else aend = NULL; /* Unused, but keep compilers happy */
3415 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3416 while (fromlen > 0) {
fc241834 3417 U8 *end;
a6ec74c1 3418 I32 todo;
fc241834 3419 U8 hunk[1+63/3*4+1];
a6ec74c1 3420
eb160463 3421 if ((I32)fromlen > len)
a6ec74c1
JH
3422 todo = len;
3423 else
3424 todo = fromlen;
fc241834
RGS
3425 if (from_utf8) {
3426 char buffer[64];
3427 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3428 'u' | TYPE_IS_PACK)) {
3429 *cur = '\0';
3430 SvCUR(cat) = cur - start;
3431 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3432 }
3433 end = doencodes(hunk, buffer, todo);
3434 } else {
3435 end = doencodes(hunk, aptr, todo);
3436 aptr += todo;
3437 }
3438 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3439 fromlen -= todo;
3440 }
a6ec74c1
JH
3441 break;
3442 }
f337b084
TH
3443 }
3444 *cur = '\0';
3445 SvCUR(cat) = cur - start;
3446 no_change:
49704364 3447 *symptr = lookahead;
a6ec74c1 3448 }
49704364 3449 return beglist;
18529408
IZ
3450}
3451#undef NEXTFROM
3452
3453
3454PP(pp_pack)
3455{
3456 dSP; dMARK; dORIGMARK; dTARGET;
3457 register SV *cat = TARG;
3458 STRLEN fromlen;
3459 register char *pat = SvPVx(*++MARK, fromlen);
3460 register char *patend = pat + fromlen;
3461
3462 MARK++;
3463 sv_setpvn(cat, "", 0);
f337b084 3464 SvUTF8_off(cat);
18529408 3465
7accc089 3466 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3467
a6ec74c1
JH
3468 SvSETMAGIC(cat);
3469 SP = ORIGMARK;
3470 PUSHs(cat);
3471 RETURN;
3472}
a6ec74c1 3473
73cb7263
NC
3474/*
3475 * Local variables:
3476 * c-indentation-style: bsd
3477 * c-basic-offset: 4
3478 * indent-tabs-mode: t
3479 * End:
3480 *
edf815fd 3481 * vim: shiftwidth=4:
73cb7263 3482*/