This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
A couple more Win32 compilation clean-ups
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084 121#define PUSH_VAR(utf8, aptr, var) \
230e1fce 122 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 123
49704364
WL
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e
NC
185#ifdef PERL_PACK_CAN_SHRIEKSIGN
186#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
187#else
188#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
200# define DO_BO_UNPACK_PTR(var, type, pre_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast)
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
7212898e 232# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
7212898e 246# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e
NC
276# if PTRSIZE == INTSIZE
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279# elif PTRSIZE == LONGSIZE
280# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
282# else
283# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285# endif
1109a392 286
7212898e 287# if defined(my_htolen) && defined(my_letohn) && \
1109a392 288 defined(my_htoben) && defined(my_betohn)
7212898e 289# define DO_BO_UNPACK_N(var, type) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
294 default: break; \
295 } \
296 } STMT_END
297
7212898e 298# define DO_BO_PACK_N(var, type) \
1109a392 299 STMT_START { \
66c611c5 300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
303 default: break; \
304 } \
305 } STMT_END
7212898e
NC
306# else
307# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309# endif
310
f337b084 311#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 312
78d46eaa 313#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 314#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
315#define PACK_SIZE_MASK 0x3F
316
78d46eaa
NC
317/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
319
320#if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
322#endif
323
324typedef U8 packprops_t;
78d46eaa
NC
325#if 'J'-'I' == 1
326/* ASCII */
a7a3cfaa
TH
327const packprops_t packprops[512] = {
328 /* normal */
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 335#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 336 /* D */ LONG_DOUBLESIZE,
80a13697 337#else
a7a3cfaa 338 0,
80a13697 339#endif
a7a3cfaa
TH
340 0,
341 /* F */ NVSIZE,
342 0, 0,
343 /* I */ sizeof(unsigned int),
344 /* J */ UVSIZE,
345 0,
346 /* L */ SIZE32,
347 0,
348 /* N */ SIZE32,
349 0, 0,
80a13697 350#if defined(HAS_QUAD)
a7a3cfaa 351 /* Q */ sizeof(Uquad_t),
80a13697 352#else
a7a3cfaa 353 0,
80a13697 354#endif
a7a3cfaa
TH
355 0,
356 /* S */ SIZE16,
357 0,
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
359 /* V */ SIZE32,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
364 0,
365 /* f */ sizeof(float),
366 0, 0,
367 /* i */ sizeof(int),
368 /* j */ IVSIZE,
369 0,
370 /* l */ SIZE32,
371 0,
372 /* n */ SIZE16,
373 0,
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 375#if defined(HAS_QUAD)
a7a3cfaa 376 /* q */ sizeof(Quad_t),
80a13697 377#else
a7a3cfaa 378 0,
80a13697 379#endif
a7a3cfaa
TH
380 0,
381 /* s */ SIZE16,
382 0, 0,
383 /* v */ SIZE16,
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
394 /* shrieking */
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
401 0, 0,
402 /* L */ sizeof(unsigned long),
403 0,
7212898e 404#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 405 /* N */ SIZE32,
7212898e 406#else
a7a3cfaa 407 0,
7212898e 408#endif
a7a3cfaa
TH
409 0, 0, 0, 0,
410 /* S */ sizeof(unsigned short),
411 0, 0,
7212898e 412#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 413 /* V */ SIZE32,
7212898e 414#else
a7a3cfaa 415 0,
7212898e 416#endif
a7a3cfaa
TH
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
418 0, 0,
419 /* i */ sizeof(int),
420 0, 0,
421 /* l */ sizeof(long),
422 0,
7212898e 423#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 424 /* n */ SIZE16,
7212898e 425#else
a7a3cfaa 426 0,
7212898e 427#endif
a7a3cfaa
TH
428 0, 0, 0, 0,
429 /* s */ sizeof(short),
430 0, 0,
7212898e 431#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 432 /* v */ SIZE16,
7212898e 433#else
a7a3cfaa 434 0,
7212898e 435#endif
a7a3cfaa
TH
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
445};
446#else
447/* EBCDIC (or bust) */
a7a3cfaa
TH
448const packprops_t packprops[512] = {
449 /* normal */
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
461 0,
462 /* f */ sizeof(float),
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0, 0, 0, 0, 0, 0,
466 /* j */ IVSIZE,
467 0,
468 /* l */ SIZE32,
469 0,
470 /* n */ SIZE16,
471 0,
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 473#if defined(HAS_QUAD)
a7a3cfaa 474 /* q */ sizeof(Quad_t),
80a13697 475#else
a7a3cfaa 476 0,
80a13697 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 /* s */ SIZE16,
480 0, 0,
481 /* v */ SIZE16,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 486#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 487 /* D */ LONG_DOUBLESIZE,
80a13697 488#else
a7a3cfaa 489 0,
80a13697 490#endif
a7a3cfaa
TH
491 0,
492 /* F */ NVSIZE,
493 0, 0,
494 /* I */ sizeof(unsigned int),
495 0, 0, 0, 0, 0, 0, 0,
496 /* J */ UVSIZE,
497 0,
498 /* L */ SIZE32,
499 0,
500 /* N */ SIZE32,
501 0, 0,
80a13697 502#if defined(HAS_QUAD)
a7a3cfaa 503 /* Q */ sizeof(Uquad_t),
80a13697 504#else
a7a3cfaa 505 0,
80a13697 506#endif
a7a3cfaa
TH
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 /* S */ SIZE16,
509 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
511 /* V */ SIZE32,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 /* shrieking */
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
528 0,
7212898e 529#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 530 /* n */ SIZE16,
7212898e 531#else
a7a3cfaa 532 0,
7212898e 533#endif
a7a3cfaa
TH
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
536 0, 0,
7212898e 537#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 538 /* v */ SIZE16,
7212898e 539#else
a7a3cfaa 540 0,
7212898e 541#endif
a7a3cfaa
TH
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
548 0,
7212898e 549#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 550 /* N */ SIZE32,
7212898e 551#else
a7a3cfaa 552 0,
7212898e 553#endif
a7a3cfaa
TH
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
556 0, 0,
7212898e 557#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 558 /* V */ SIZE32,
7212898e 559#else
a7a3cfaa 560 0,
7212898e 561#endif
a7a3cfaa
TH
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
564};
565#endif
566
08ca2aa3 567STATIC U8
f337b084 568uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
569{
570 UV val;
571 STRLEN retlen;
f337b084
TH
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 580 if (val >= 0x100) {
f337b084 581 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
f337b084 584 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
585 val &= 0xff;
586 }
587 *s += retlen;
fe2774ed 588 return (U8)val;
08ca2aa3
TH
589}
590
f337b084
TH
591#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
593 *(U8 *)(s)++)
594
595STATIC bool
f337b084 596uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
597{
598 UV val;
599 STRLEN retlen;
600 char *from = *s;
601 int bad = 0;
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
f337b084 606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
609 bad |= 1;
610 } else from += retlen;
611 if (val >= 0x100) {
612 bad |= 2;
613 val &= 0xff;
614 }
fe2774ed 615 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
616 }
617 /* We have enough characters for the buffer. Did we have problems ? */
618 if (bad) {
619 if (bad & 1) {
620 /* Rewalk the string fragment while warning */
621 char *ptr;
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
f337b084 625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
626 }
627 if (from > end) from = end;
628 }
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
fc241834 633 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
635 }
636 *s = from;
637 return TRUE;
638}
639
640STATIC bool
641next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
642{
643 UV val;
644 STRLEN retlen;
f337b084 645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
648 *out = 0;
649 return FALSE;
650 }
651 *out = PL_uudmap[val] & 077;
f337b084 652 *s += retlen;
08ca2aa3
TH
653 return TRUE;
654}
78d46eaa 655
f337b084
TH
656STATIC void
657bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
660 char *d = *dest;
661 while (start < end) {
662 int length =
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
664 switch(length) {
665 case 1:
666 *d++ = buffer[0];
667 break;
668 case 2:
669 *d++ = buffer[0];
670 *d++ = buffer[1];
671 break;
672 default:
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
674 *start, length);
675 }
676 start++;
677 }
678 *dest = d;
679}
680
230e1fce
NC
681#define PUSH_BYTES(utf8, cur, buf, len) \
682STMT_START { \
683 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
684 else { \
685 Copy(buf, cur, len, char); \
686 (cur) += (len); \
687 } \
f337b084
TH
688} STMT_END
689
690#define GROWING(utf8, cat, start, cur, in_len) \
691STMT_START { \
692 STRLEN glen = (in_len); \
3473cf63 693 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
697 } \
698} STMT_END
699
700#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
701STMT_START { \
702 STRLEN glen = (in_len); \
703 STRLEN gl = glen; \
3473cf63 704 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
706 *cur = '\0'; \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
710 } \
711 PUSH_BYTES(utf8, cur, buf, glen); \
712} STMT_END
713
714#define PUSH_BYTE(utf8, s, byte) \
715STMT_START { \
716 if (utf8) { \
717 U8 au8 = (byte); \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
720} STMT_END
721
722/* Only to be used inside a loop (see the break) */
723#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
724STMT_START { \
725 STRLEN retlen; \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
729 *cur = '\0'; \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
731 } \
732 str += retlen; \
733} STMT_END
734
206947d2 735/* Returns the sizeof() struct described by pat */
028d1f6d 736STATIC I32
f337b084 737S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 738{
f337b084 739 I32 total = 0;
206947d2 740
49704364 741 while (next_symbol(symptr)) {
f337b084
TH
742 I32 len;
743 int star, size;
f337b084
TH
744
745 switch (symptr->howlen) {
fc241834 746 case e_star:
49704364
WL
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
749 break;
f337b084
TH
750 default:
751 /* e_no_len and e_number */
752 len = symptr->length;
753 break;
49704364
WL
754 }
755
a7a3cfaa 756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
757 if (!size) {
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
760 default:
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
764 case '@':
765 case '/':
766 case 'U': /* XXXX Is it correct? */
767 case 'w':
768 case 'u':
769 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
770 (int)symptr->code,
771 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
772 case '%':
773 size = 0;
774 break;
775 case '(':
fc241834
RGS
776 {
777 tempsym_t savsym = *symptr;
778 symptr->patptr = savsym.grpbeg;
779 symptr->patend = savsym.grpend;
780 /* XXXX Theoretically, we need to measure many times at
781 different positions, since the subexpression may contain
782 alignment commands, but be not of aligned length.
783 Need to detect this and croak(). */
784 size = measure_struct(symptr);
785 *symptr = savsym;
786 break;
787 }
80a13697
NC
788 case 'X' | TYPE_IS_SHRIEKING:
789 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
790 */
791 if (!len) /* Avoid division by 0 */
792 len = 1;
793 len = total % len; /* Assumed: the start is aligned. */
794 /* FALL THROUGH */
795 case 'X':
796 size = -1;
797 if (total < len)
798 Perl_croak(aTHX_ "'X' outside of string in %s",
799 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
800 break;
801 case 'x' | TYPE_IS_SHRIEKING:
802 if (!len) /* Avoid division by 0 */
803 len = 1;
804 star = total % len; /* Assumed: the start is aligned. */
805 if (star) /* Other portable ways? */
806 len = len - star;
807 else
808 len = 0;
809 /* FALL THROUGH */
810 case 'x':
811 case 'A':
812 case 'Z':
813 case 'a':
80a13697
NC
814 size = 1;
815 break;
816 case 'B':
817 case 'b':
818 len = (len + 7)/8;
819 size = 1;
820 break;
821 case 'H':
822 case 'h':
823 len = (len + 1)/2;
824 size = 1;
825 break;
78d46eaa 826
80a13697
NC
827 case 'P':
828 len = 1;
829 size = sizeof(char*);
78d46eaa
NC
830 break;
831 }
206947d2
IZ
832 }
833 total += len * size;
834 }
835 return total;
836}
837
49704364
WL
838
839/* locate matching closing parenthesis or bracket
840 * returns char pointer to char after match, or NULL
841 */
842STATIC char *
843S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 844{
49704364
WL
845 while (patptr < patend) {
846 char c = *patptr++;
847
848 if (isSPACE(c))
849 continue;
850 else if (c == ender)
851 return patptr-1;
852 else if (c == '#') {
853 while (patptr < patend && *patptr != '\n')
854 patptr++;
855 continue;
856 } else if (c == '(')
857 patptr = group_end(patptr, patend, ')') + 1;
858 else if (c == '[')
859 patptr = group_end(patptr, patend, ']') + 1;
18529408 860 }
49704364
WL
861 Perl_croak(aTHX_ "No group ending character '%c' found in template",
862 ender);
863 return 0;
18529408
IZ
864}
865
49704364
WL
866
867/* Convert unsigned decimal number to binary.
868 * Expects a pointer to the first digit and address of length variable
869 * Advances char pointer to 1st non-digit char and returns number
fc241834 870 */
18529408 871STATIC char *
49704364
WL
872S_get_num(pTHX_ register char *patptr, I32 *lenptr )
873{
874 I32 len = *patptr++ - '0';
875 while (isDIGIT(*patptr)) {
876 if (len >= 0x7FFFFFFF/10)
877 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
878 len = (len * 10) + (*patptr++ - '0');
879 }
880 *lenptr = len;
881 return patptr;
882}
883
884/* The marvellous template parsing routine: Using state stored in *symptr,
885 * locates next template code and count
886 */
887STATIC bool
f337b084 888S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 889{
fc241834
RGS
890 char* patptr = symptr->patptr;
891 char* patend = symptr->patend;
f337b084 892 const char *allowed = "";
49704364
WL
893
894 symptr->flags &= ~FLAG_SLASH;
895
896 while (patptr < patend) {
897 if (isSPACE(*patptr))
898 patptr++;
899 else if (*patptr == '#') {
900 patptr++;
901 while (patptr < patend && *patptr != '\n')
902 patptr++;
903 if (patptr < patend)
904 patptr++;
905 } else {
fc241834 906 /* We should have found a template code */
49704364 907 I32 code = *patptr++ & 0xFF;
66c611c5 908 U32 inherited_modifiers = 0;
49704364
WL
909
910 if (code == ','){ /* grandfather in commas but with a warning */
911 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
912 symptr->flags |= FLAG_COMMA;
913 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
914 "Invalid type ',' in %s",
915 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
916 }
917 continue;
918 }
fc241834 919
49704364 920 /* for '(', skip to ')' */
fc241834 921 if (code == '(') {
49704364
WL
922 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
923 Perl_croak(aTHX_ "()-group starts with a count in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 symptr->grpbeg = patptr;
926 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
927 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
928 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
930 }
931
66c611c5
MHM
932 /* look for group modifiers to inherit */
933 if (TYPE_ENDIANNESS(symptr->flags)) {
934 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
935 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
936 }
937
1109a392
MHM
938 /* look for modifiers */
939 while (patptr < patend) {
1109a392
MHM
940 I32 modifier = 0;
941 switch (*patptr) {
942 case '!':
943 modifier = TYPE_IS_SHRIEKING;
7212898e 944 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 945 break;
7212898e 946#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
947 case '>':
948 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 949 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
950 break;
951 case '<':
952 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 953 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 954 break;
f337b084 955#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
956 default:
957 break;
958 }
66c611c5 959
1109a392
MHM
960 if (modifier == 0)
961 break;
66c611c5 962
1109a392
MHM
963 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
964 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
965 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
966
967 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
968 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
969 (int) TYPE_NO_MODIFIERS(code),
970 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
971 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
972 TYPE_ENDIANNESS_MASK)
973 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
974 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
975
1109a392
MHM
976 if (ckWARN(WARN_UNPACK)) {
977 if (code & modifier)
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Duplicate modifier '%c' after '%c' in %s",
980 *patptr, (int) TYPE_NO_MODIFIERS(code),
981 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
982 }
66c611c5 983
1109a392
MHM
984 code |= modifier;
985 patptr++;
49704364
WL
986 }
987
66c611c5
MHM
988 /* inherit modifiers */
989 code |= inherited_modifiers;
990
fc241834 991 /* look for count and/or / */
49704364
WL
992 if (patptr < patend) {
993 if (isDIGIT(*patptr)) {
994 patptr = get_num( patptr, &symptr->length );
995 symptr->howlen = e_number;
996
997 } else if (*patptr == '*') {
998 patptr++;
999 symptr->howlen = e_star;
1000
1001 } else if (*patptr == '[') {
fc241834 1002 char* lenptr = ++patptr;
49704364
WL
1003 symptr->howlen = e_number;
1004 patptr = group_end( patptr, patend, ']' ) + 1;
1005 /* what kind of [] is it? */
1006 if (isDIGIT(*lenptr)) {
1007 lenptr = get_num( lenptr, &symptr->length );
1008 if( *lenptr != ']' )
1009 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1010 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1011 } else {
1012 tempsym_t savsym = *symptr;
1013 symptr->patend = patptr-1;
1014 symptr->patptr = lenptr;
1015 savsym.length = measure_struct(symptr);
1016 *symptr = savsym;
1017 }
1018 } else {
1019 symptr->howlen = e_no_len;
1020 symptr->length = 1;
1021 }
1022
1023 /* try to find / */
1024 while (patptr < patend) {
1025 if (isSPACE(*patptr))
1026 patptr++;
1027 else if (*patptr == '#') {
1028 patptr++;
1029 while (patptr < patend && *patptr != '\n')
1030 patptr++;
1031 if (patptr < patend)
1032 patptr++;
1033 } else {
66c611c5 1034 if (*patptr == '/') {
49704364
WL
1035 symptr->flags |= FLAG_SLASH;
1036 patptr++;
66c611c5
MHM
1037 if (patptr < patend &&
1038 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
WL
1039 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1040 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1041 }
1042 break;
1043 }
18529408 1044 }
49704364
WL
1045 } else {
1046 /* at end - no count, no / */
1047 symptr->howlen = e_no_len;
1048 symptr->length = 1;
1049 }
1050
1051 symptr->code = code;
fc241834 1052 symptr->patptr = patptr;
49704364 1053 return TRUE;
18529408 1054 }
49704364 1055 }
fc241834 1056 symptr->patptr = patptr;
49704364 1057 return FALSE;
18529408
IZ
1058}
1059
18529408 1060/*
fc241834 1061 There is no way to cleanly handle the case where we should process the
08ca2aa3 1062 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1063 (e.g. estimates like strend-s as an upper bound for the number of
1064 characters left wouldn't work). So if we foresee the need of this
1065 (pattern starts with U or contains U0), we want to work on the encoded
1066 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1067 themselves if they need to do a lot of unpacks like this on it
1068*/
fc241834 1069STATIC bool
08ca2aa3
TH
1070need_utf8(const char *pat, const char *patend)
1071{
1072 bool first = TRUE;
1073 while (pat < patend) {
1074 if (pat[0] == '#') {
1075 pat++;
1076 pat = memchr(pat, '\n', patend-pat);
1077 if (!pat) return FALSE;
1078 } else if (pat[0] == 'U') {
1079 if (first || pat[1] == '0') return TRUE;
1080 } else first = FALSE;
1081 pat++;
1082 }
1083 return FALSE;
1084}
1085
1086STATIC char
1087first_symbol(const char *pat, const char *patend) {
1088 while (pat < patend) {
1089 if (pat[0] != '#') return pat[0];
1090 pat++;
1091 pat = memchr(pat, '\n', patend-pat);
1092 if (!pat) return 0;
1093 pat++;
1094 }
1095 return 0;
1096}
1097
1098/*
18529408
IZ
1099=for apidoc unpack_str
1100
7accc089
JH
1101The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1102and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1103
1104=cut */
1105
1106I32
f337b084 1107Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1108{
49704364 1109 tempsym_t sym = { 0 };
08ca2aa3 1110
f337b084 1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
230e1fce 1116 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1117 SAVEFREEPV(s);
1118 strend = s + len;
f337b084 1119 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1120 }
1121
f337b084
TH
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1124
49704364
WL
1125 sym.patptr = pat;
1126 sym.patend = patend;
1127 sym.flags = flags;
1128
1129 return unpack_rec(&sym, s, s, strend, NULL );
1130}
1131
7accc089
JH
1132/*
1133=for apidoc unpackstring
1134
608d3aed
WL
1135The engine implementing unpack() Perl function. C<unpackstring> puts the
1136extracted list items on the stack and returns the number of elements.
1137Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1138
1139=cut */
1140
1141I32
f337b084 1142Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1143{
1144 tempsym_t sym = { 0 };
08ca2aa3 1145
f337b084 1146 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1147 else if (need_utf8(pat, patend)) {
1148 /* We probably should try to avoid this in case a scalar context call
1149 wouldn't get to the "U0" */
1150 STRLEN len = strend - s;
230e1fce 1151 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1152 SAVEFREEPV(s);
1153 strend = s + len;
f337b084 1154 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1155 }
1156
f337b084
TH
1157 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1158 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1159
7accc089
JH
1160 sym.patptr = pat;
1161 sym.patend = patend;
1162 sym.flags = flags;
1163
1164 return unpack_rec(&sym, s, s, strend, NULL );
1165}
1166
49704364
WL
1167STATIC
1168I32
08ca2aa3 1169S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1170{
a6ec74c1 1171 dSP;
18529408
IZ
1172 SV *sv;
1173 I32 start_sp_offset = SP - PL_stack_base;
49704364 1174 howlen_t howlen;
a6ec74c1 1175
a6ec74c1 1176 I32 checksum = 0;
92d41999 1177 UV cuv = 0;
a6ec74c1 1178 NV cdouble = 0.0;
f337b084 1179 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364
WL
1180 char* strrelbeg = s;
1181 bool beyond = FALSE;
21c16052 1182 bool explicit_length;
49704364 1183 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1184 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
49704364 1185
49704364 1186 while (next_symbol(symptr)) {
a7a3cfaa 1187 packprops_t props;
f337b084
TH
1188 I32 len, ai32;
1189 I32 datumtype = symptr->code;
206947d2 1190 /* do first one only unless in list context
08ca2aa3 1191 / is implemented by unpacking the count, then popping it from the
206947d2 1192 stack, so must check that we're not in the middle of a / */
49704364 1193 if ( unpack_only_one
206947d2 1194 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1195 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1196 break;
49704364 1197
f337b084 1198 switch (howlen = symptr->howlen) {
fc241834
RGS
1199 case e_star:
1200 len = strend - strbeg; /* long enough */
49704364 1201 break;
f337b084
TH
1202 default:
1203 /* e_no_len and e_number */
1204 len = symptr->length;
1205 break;
49704364 1206 }
18529408 1207
21c16052 1208 explicit_length = TRUE;
a6ec74c1 1209 redo_switch:
49704364 1210 beyond = s >= strend;
a7a3cfaa
TH
1211
1212 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1213 if (props) {
1214 /* props nonzero means we can process this letter. */
1215 long size = props & PACK_SIZE_MASK;
1216 long howmany = (strend - s) / size;
1217 if (len > howmany)
1218 len = howmany;
1219
1220 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1221 if (len && unpack_only_one) len = 1;
1222 EXTEND(SP, len);
1223 EXTEND_MORTAL(len);
78d46eaa
NC
1224 }
1225 }
a7a3cfaa 1226
1109a392 1227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1228 default:
1109a392 1229 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1230
a6ec74c1 1231 case '%':
49704364 1232 if (howlen == e_no_len)
18529408 1233 len = 16; /* len is not specified */
a6ec74c1 1234 checksum = len;
92d41999 1235 cuv = 0;
a6ec74c1 1236 cdouble = 0;
18529408 1237 continue;
a6ec74c1 1238 break;
18529408
IZ
1239 case '(':
1240 {
49704364 1241 tempsym_t savsym = *symptr;
66c611c5
MHM
1242 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1243 symptr->flags |= group_modifiers;
49704364
WL
1244 symptr->patend = savsym.grpend;
1245 symptr->level++;
18529408
IZ
1246 PUTBACK;
1247 while (len--) {
49704364 1248 symptr->patptr = savsym.grpbeg;
f337b084
TH
1249 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1250 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1251 unpack_rec(symptr, s, strbeg, strend, &s);
1252 if (s == strend && savsym.howlen == e_star)
49704364 1253 break; /* No way to continue */
18529408
IZ
1254 }
1255 SPAGAIN;
66c611c5 1256 symptr->flags &= ~group_modifiers;
49704364
WL
1257 savsym.flags = symptr->flags;
1258 *symptr = savsym;
18529408
IZ
1259 break;
1260 }
a6ec74c1 1261 case '@':
08ca2aa3
TH
1262 if (utf8) {
1263 s = strrelbeg;
1264 while (len > 0) {
1265 if (s >= strend)
1266 Perl_croak(aTHX_ "'@' outside of string in unpack");
1267 s += UTF8SKIP(s);
1268 len--;
1269 }
1270 if (s > strend)
1271 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1272 } else {
fc241834
RGS
1273 if (len > strend - strrelbeg)
1274 Perl_croak(aTHX_ "'@' outside of string in unpack");
1275 s = strrelbeg + len;
08ca2aa3 1276 }
a6ec74c1 1277 break;
62f95557
IZ
1278 case 'X' | TYPE_IS_SHRIEKING:
1279 if (!len) /* Avoid division by 0 */
1280 len = 1;
08ca2aa3
TH
1281 if (utf8) {
1282 char *hop, *last;
f337b084
TH
1283 I32 l = len;
1284 hop = last = strbeg;
1285 while (hop < s) {
1286 hop += UTF8SKIP(hop);
1287 if (--l == 0) {
08ca2aa3 1288 last = hop;
f337b084
TH
1289 l = len;
1290 }
fc241834 1291 }
f337b084
TH
1292 if (last > s)
1293 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1294 s = last;
1295 break;
f337b084
TH
1296 }
1297 len = (s - strbeg) % len;
62f95557 1298 /* FALL THROUGH */
a6ec74c1 1299 case 'X':
08ca2aa3
TH
1300 if (utf8) {
1301 while (len > 0) {
1302 if (s <= strbeg)
1303 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1304 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1305 if (s <= strbeg)
1306 Perl_croak(aTHX_ "'X' outside of string in unpack");
1307 }
1308 len--;
1309 }
1310 } else {
fc241834
RGS
1311 if (len > s - strbeg)
1312 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1313 s -= len;
08ca2aa3 1314 }
a6ec74c1 1315 break;
62f95557
IZ
1316 case 'x' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1318 len = 1;
230e1fce
NC
1319 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1320 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1321 if (ai32 == 0) break;
1322 len -= ai32;
62f95557 1323 /* FALL THROUGH */
a6ec74c1 1324 case 'x':
08ca2aa3
TH
1325 if (utf8) {
1326 while (len>0) {
1327 if (s >= strend)
1328 Perl_croak(aTHX_ "'x' outside of string in unpack");
1329 s += UTF8SKIP(s);
1330 len--;
1331 }
1332 } else {
fc241834
RGS
1333 if (len > strend - s)
1334 Perl_croak(aTHX_ "'x' outside of string in unpack");
1335 s += len;
f337b084 1336 }
a6ec74c1
JH
1337 break;
1338 case '/':
49704364
WL
1339 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1340 break;
a6ec74c1
JH
1341 case 'A':
1342 case 'Z':
1343 case 'a':
08ca2aa3
TH
1344 if (checksum) {
1345 /* Preliminary length estimate is assumed done in 'W' */
1346 if (len > strend - s) len = strend - s;
1347 goto W_checksum;
1348 }
1349 if (utf8) {
1350 I32 l;
1351 char *hop;
1352 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1353 if (hop >= strend) {
1354 if (hop > strend)
1355 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1356 break;
fc241834 1357 }
a6ec74c1 1358 }
08ca2aa3
TH
1359 if (hop > strend)
1360 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1361 len = hop - s;
1362 } else if (len > strend - s)
1363 len = strend - s;
1364
1365 if (datumtype == 'Z') {
1366 /* 'Z' strips stuff after first null */
f337b084
TH
1367 char *ptr, *end;
1368 end = s + len;
1369 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1370 sv = newSVpvn(s, ptr-s);
1371 if (howlen == e_star) /* exact for 'Z*' */
1372 len = ptr-s + (ptr != strend ? 1 : 0);
1373 } else if (datumtype == 'A') {
1374 /* 'A' strips both nulls and spaces */
1375 char *ptr;
18bdf90a
TH
1376 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1377 for (ptr = s+len-1; ptr >= s; ptr--)
1378 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1379 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1380 if (ptr >= s) ptr += UTF8SKIP(ptr);
1381 else ptr++;
1382 if (ptr > s+len)
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1384 } else {
1385 for (ptr = s+len-1; ptr >= s; ptr--)
1386 if (*ptr != 0 && !isSPACE(*ptr)) break;
1387 ptr++;
1388 }
08ca2aa3
TH
1389 sv = newSVpvn(s, ptr-s);
1390 } else sv = newSVpvn(s, len);
1391
1392 if (utf8) {
1393 SvUTF8_on(sv);
1394 /* Undo any upgrade done due to need_utf8() */
f337b084 1395 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1396 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1397 }
1398 XPUSHs(sv_2mortal(sv));
08ca2aa3 1399 s += len;
a6ec74c1
JH
1400 break;
1401 case 'B':
08ca2aa3
TH
1402 case 'b': {
1403 char *str;
49704364 1404 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1405 len = (strend - s) * 8;
1406 if (checksum) {
1407 if (!PL_bitcount) {
08ca2aa3 1408 int bits;
a6ec74c1
JH
1409 Newz(601, PL_bitcount, 256, char);
1410 for (bits = 1; bits < 256; bits++) {
1411 if (bits & 1) PL_bitcount[bits]++;
1412 if (bits & 2) PL_bitcount[bits]++;
1413 if (bits & 4) PL_bitcount[bits]++;
1414 if (bits & 8) PL_bitcount[bits]++;
1415 if (bits & 16) PL_bitcount[bits]++;
1416 if (bits & 32) PL_bitcount[bits]++;
1417 if (bits & 64) PL_bitcount[bits]++;
1418 if (bits & 128) PL_bitcount[bits]++;
1419 }
1420 }
f337b084 1421 if (utf8)
08ca2aa3 1422 while (len >= 8 && s < strend) {
f337b084 1423 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1424 len -= 8;
1425 }
f337b084 1426 else
fc241834 1427 while (len >= 8) {
08ca2aa3 1428 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1429 len -= 8;
1430 }
08ca2aa3
TH
1431 if (len && s < strend) {
1432 U8 bits;
f337b084
TH
1433 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1434 if (datumtype == 'b')
a6ec74c1 1435 while (len-- > 0) {
92d41999 1436 if (bits & 1) cuv++;
a6ec74c1
JH
1437 bits >>= 1;
1438 }
f337b084 1439 else
a6ec74c1 1440 while (len-- > 0) {
08ca2aa3 1441 if (bits & 0x80) cuv++;
a6ec74c1
JH
1442 bits <<= 1;
1443 }
fc241834 1444 }
a6ec74c1
JH
1445 break;
1446 }
08ca2aa3
TH
1447
1448 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1449 SvPOK_on(sv);
1450 str = SvPVX(sv);
1451 if (datumtype == 'b') {
f337b084 1452 U8 bits = 0;
08ca2aa3
TH
1453 ai32 = len;
1454 for (len = 0; len < ai32; len++) {
1455 if (len & 7) bits >>= 1;
1456 else if (utf8) {
1457 if (s >= strend) break;
f337b084 1458 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1459 } else bits = *(U8 *) s++;
1460 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1461 }
08ca2aa3 1462 } else {
f337b084 1463 U8 bits = 0;
08ca2aa3
TH
1464 ai32 = len;
1465 for (len = 0; len < ai32; len++) {
1466 if (len & 7) bits <<= 1;
1467 else if (utf8) {
1468 if (s >= strend) break;
f337b084 1469 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1470 } else bits = *(U8 *) s++;
1471 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1472 }
1473 }
1474 *str = '\0';
08ca2aa3
TH
1475 SvCUR_set(sv, str - SvPVX(sv));
1476 XPUSHs(sv);
a6ec74c1 1477 break;
08ca2aa3 1478 }
a6ec74c1 1479 case 'H':
08ca2aa3
TH
1480 case 'h': {
1481 char *str;
fc241834 1482 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1483 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1484 len = (strend - s) * 2;
fc241834 1485 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1486 SvPOK_on(sv);
1487 str = SvPVX(sv);
1488 if (datumtype == 'h') {
f337b084 1489 U8 bits = 0;
fc241834
RGS
1490 ai32 = len;
1491 for (len = 0; len < ai32; len++) {
1492 if (len & 1) bits >>= 4;
1493 else if (utf8) {
1494 if (s >= strend) break;
f337b084 1495 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1496 } else bits = * (U8 *) s++;
a6ec74c1
JH
1497 *str++ = PL_hexdigit[bits & 15];
1498 }
08ca2aa3 1499 } else {
f337b084 1500 U8 bits = 0;
08ca2aa3
TH
1501 ai32 = len;
1502 for (len = 0; len < ai32; len++) {
1503 if (len & 1) bits <<= 4;
1504 else if (utf8) {
1505 if (s >= strend) break;
f337b084 1506 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1507 } else bits = *(U8 *) s++;
a6ec74c1
JH
1508 *str++ = PL_hexdigit[(bits >> 4) & 15];
1509 }
1510 }
1511 *str = '\0';
08ca2aa3
TH
1512 SvCUR_set(sv, str - SvPVX(sv));
1513 XPUSHs(sv);
a6ec74c1 1514 break;
08ca2aa3 1515 }
a6ec74c1 1516 case 'c':
73cb7263 1517 while (len-- > 0) {
f337b084 1518 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1519 if (aint >= 128) /* fake up signed chars */
1520 aint -= 256;
08ca2aa3 1521 if (!checksum)
ac7f3b1b 1522 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV)aint;
1525 else
1526 cuv += aint;
a6ec74c1
JH
1527 }
1528 break;
1529 case 'C':
08ca2aa3
TH
1530 case 'W':
1531 W_checksum:
35bcd338 1532 if (len == 0) {
fc241834 1533 if (explicit_length && datumtype == 'C')
08ca2aa3 1534 /* Switch to "character" mode */
f337b084 1535 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1536 break;
1537 }
fc241834 1538 if (datumtype == 'C' ?
f337b084
TH
1539 (symptr->flags & FLAG_DO_UTF8) &&
1540 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1541 while (len-- > 0 && s < strend) {
1542 UV val;
1543 STRLEN retlen;
f337b084
TH
1544 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1545 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1546 if (retlen == (STRLEN) -1 || retlen == 0)
1547 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1548 s += retlen;
1549 if (!checksum)
1550 PUSHs(sv_2mortal(newSVuv((UV) val)));
1551 else if (checksum > bits_in_uv)
1552 cdouble += (NV) val;
d6d3e8bd 1553 else
08ca2aa3 1554 cuv += val;
fc241834 1555 }
08ca2aa3 1556 } else if (!checksum)
a6ec74c1 1557 while (len-- > 0) {
08ca2aa3
TH
1558 U8 ch = *(U8 *) s++;
1559 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1560 }
08ca2aa3
TH
1561 else if (checksum > bits_in_uv)
1562 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1563 else
1564 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1565 break;
1566 case 'U':
35bcd338 1567 if (len == 0) {
08ca2aa3
TH
1568 if (explicit_length) {
1569 /* Switch to "bytes in UTF-8" mode */
f337b084 1570 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1571 else
1572 /* Should be impossible due to the need_utf8() test */
1573 Perl_croak(aTHX_ "U0 mode on a byte string");
1574 }
35bcd338
JH
1575 break;
1576 }
08ca2aa3 1577 if (len > strend - s) len = strend - s;
fc241834 1578 if (!checksum) {
08ca2aa3
TH
1579 if (len && unpack_only_one) len = 1;
1580 EXTEND(SP, len);
1581 EXTEND_MORTAL(len);
fc241834 1582 }
08ca2aa3
TH
1583 while (len-- > 0 && s < strend) {
1584 STRLEN retlen;
1585 UV auv;
1586 if (utf8) {
1587 U8 result[UTF8_MAXLEN];
1588 char *ptr;
1589 STRLEN len;
1590 ptr = s;
1591 /* Bug: warns about bad utf8 even if we are short on bytes
1592 and will break out of the loop */
230e1fce
NC
1593 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1594 'U'))
08ca2aa3
TH
1595 break;
1596 len = UTF8SKIP(result);
fc241834 1597 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1598 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1599 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1600 s = ptr;
1601 } else {
1602 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1605 s += retlen;
1606 }
1607 if (!checksum)
1608 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1609 else if (checksum > bits_in_uv)
08ca2aa3 1610 cdouble += (NV) auv;
73cb7263 1611 else
08ca2aa3 1612 cuv += auv;
a6ec74c1
JH
1613 }
1614 break;
49704364
WL
1615 case 's' | TYPE_IS_SHRIEKING:
1616#if SHORTSIZE != SIZE16
73cb7263 1617 while (len-- > 0) {
08ca2aa3 1618 short ashort;
f337b084
TH
1619 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1620 DO_BO_UNPACK(ashort, s);
08ca2aa3 1621 if (!checksum)
ac7f3b1b 1622 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1623 else if (checksum > bits_in_uv)
1624 cdouble += (NV)ashort;
1625 else
1626 cuv += ashort;
49704364
WL
1627 }
1628 break;
1629#else
1630 /* Fallthrough! */
a6ec74c1 1631#endif
49704364 1632 case 's':
73cb7263 1633 while (len-- > 0) {
08ca2aa3
TH
1634 I16 ai16;
1635
1636#if U16SIZE > SIZE16
1637 ai16 = 0;
1638#endif
f337b084 1639 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1640 DO_BO_UNPACK(ai16, 16);
1109a392 1641#if U16SIZE > SIZE16
73cb7263
NC
1642 if (ai16 > 32767)
1643 ai16 -= 65536;
a6ec74c1 1644#endif
08ca2aa3 1645 if (!checksum)
ac7f3b1b 1646 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1647 else if (checksum > bits_in_uv)
1648 cdouble += (NV)ai16;
1649 else
1650 cuv += ai16;
a6ec74c1
JH
1651 }
1652 break;
49704364
WL
1653 case 'S' | TYPE_IS_SHRIEKING:
1654#if SHORTSIZE != SIZE16
73cb7263 1655 while (len-- > 0) {
08ca2aa3 1656 unsigned short aushort;
f337b084
TH
1657 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1658 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1659 if (!checksum)
1660 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1661 else if (checksum > bits_in_uv)
1662 cdouble += (NV)aushort;
1663 else
1664 cuv += aushort;
49704364
WL
1665 }
1666 break;
1667#else
1668 /* Fallhrough! */
1669#endif
a6ec74c1
JH
1670 case 'v':
1671 case 'n':
1672 case 'S':
73cb7263 1673 while (len-- > 0) {
08ca2aa3
TH
1674 U16 au16;
1675#if U16SIZE > SIZE16
1676 au16 = 0;
1677#endif
f337b084 1678 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1679 DO_BO_UNPACK(au16, 16);
a6ec74c1 1680#ifdef HAS_NTOHS
73cb7263
NC
1681 if (datumtype == 'n')
1682 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1683#endif
1684#ifdef HAS_VTOHS
73cb7263
NC
1685 if (datumtype == 'v')
1686 au16 = vtohs(au16);
a6ec74c1 1687#endif
08ca2aa3
TH
1688 if (!checksum)
1689 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1690 else if (checksum > bits_in_uv)
f337b084 1691 cdouble += (NV) au16;
73cb7263
NC
1692 else
1693 cuv += au16;
a6ec74c1
JH
1694 }
1695 break;
7212898e 1696#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1697 case 'v' | TYPE_IS_SHRIEKING:
1698 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1699 while (len-- > 0) {
08ca2aa3
TH
1700 I16 ai16;
1701# if U16SIZE > SIZE16
1702 ai16 = 0;
1703# endif
f337b084 1704 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1705# ifdef HAS_NTOHS
73cb7263 1706 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1707 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1708# endif /* HAS_NTOHS */
1709# ifdef HAS_VTOHS
73cb7263 1710 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1711 ai16 = (I16) vtohs((U16) ai16);
1712# endif /* HAS_VTOHS */
1713 if (!checksum)
ac7f3b1b 1714 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1715 else if (checksum > bits_in_uv)
08ca2aa3 1716 cdouble += (NV) ai16;
73cb7263
NC
1717 else
1718 cuv += ai16;
068bd2e7
MHM
1719 }
1720 break;
08ca2aa3 1721#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1722 case 'i':
49704364 1723 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1724 while (len-- > 0) {
08ca2aa3 1725 int aint;
f337b084
TH
1726 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1727 DO_BO_UNPACK(aint, i);
08ca2aa3 1728 if (!checksum)
ac7f3b1b 1729 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1730 else if (checksum > bits_in_uv)
1731 cdouble += (NV)aint;
1732 else
1733 cuv += aint;
a6ec74c1
JH
1734 }
1735 break;
1736 case 'I':
49704364 1737 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1738 while (len-- > 0) {
08ca2aa3 1739 unsigned int auint;
f337b084
TH
1740 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1741 DO_BO_UNPACK(auint, i);
08ca2aa3 1742 if (!checksum)
ac7f3b1b 1743 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1744 else if (checksum > bits_in_uv)
1745 cdouble += (NV)auint;
1746 else
1747 cuv += auint;
a6ec74c1
JH
1748 }
1749 break;
92d41999 1750 case 'j':
73cb7263 1751 while (len-- > 0) {
08ca2aa3 1752 IV aiv;
f337b084 1753 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1754#if IVSIZE == INTSIZE
f337b084 1755 DO_BO_UNPACK(aiv, i);
1109a392 1756#elif IVSIZE == LONGSIZE
f337b084 1757 DO_BO_UNPACK(aiv, l);
1109a392 1758#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1759 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1760#else
1761 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1762#endif
08ca2aa3 1763 if (!checksum)
ac7f3b1b 1764 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1765 else if (checksum > bits_in_uv)
1766 cdouble += (NV)aiv;
1767 else
1768 cuv += aiv;
92d41999
JH
1769 }
1770 break;
1771 case 'J':
73cb7263 1772 while (len-- > 0) {
08ca2aa3 1773 UV auv;
f337b084 1774 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1775#if IVSIZE == INTSIZE
f337b084 1776 DO_BO_UNPACK(auv, i);
08ca2aa3 1777#elif IVSIZE == LONGSIZE
f337b084 1778 DO_BO_UNPACK(auv, l);
08ca2aa3 1779#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1780 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1781#else
1782 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1783#endif
08ca2aa3 1784 if (!checksum)
ac7f3b1b 1785 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)auv;
1788 else
1789 cuv += auv;
92d41999
JH
1790 }
1791 break;
49704364
WL
1792 case 'l' | TYPE_IS_SHRIEKING:
1793#if LONGSIZE != SIZE32
73cb7263 1794 while (len-- > 0) {
08ca2aa3 1795 long along;
f337b084
TH
1796 SHIFT_VAR(utf8, s, strend, along, datumtype);
1797 DO_BO_UNPACK(along, l);
08ca2aa3 1798 if (!checksum)
ac7f3b1b 1799 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1800 else if (checksum > bits_in_uv)
1801 cdouble += (NV)along;
1802 else
1803 cuv += along;
49704364
WL
1804 }
1805 break;
1806#else
1807 /* Fallthrough! */
a6ec74c1 1808#endif
49704364 1809 case 'l':
73cb7263 1810 while (len-- > 0) {
08ca2aa3
TH
1811 I32 ai32;
1812#if U32SIZE > SIZE32
1813 ai32 = 0;
1814#endif
f337b084 1815 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1816 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1817#if U32SIZE > SIZE32
08ca2aa3 1818 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1819#endif
08ca2aa3 1820 if (!checksum)
ac7f3b1b 1821 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1822 else if (checksum > bits_in_uv)
1823 cdouble += (NV)ai32;
1824 else
1825 cuv += ai32;
a6ec74c1
JH
1826 }
1827 break;
49704364
WL
1828 case 'L' | TYPE_IS_SHRIEKING:
1829#if LONGSIZE != SIZE32
73cb7263 1830 while (len-- > 0) {
08ca2aa3 1831 unsigned long aulong;
f337b084
TH
1832 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1833 DO_BO_UNPACK(aulong, l);
08ca2aa3 1834 if (!checksum)
ac7f3b1b 1835 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1836 else if (checksum > bits_in_uv)
1837 cdouble += (NV)aulong;
1838 else
1839 cuv += aulong;
49704364
WL
1840 }
1841 break;
1842#else
1843 /* Fall through! */
1844#endif
a6ec74c1
JH
1845 case 'V':
1846 case 'N':
1847 case 'L':
73cb7263 1848 while (len-- > 0) {
08ca2aa3
TH
1849 U32 au32;
1850#if U32SIZE > SIZE32
1851 au32 = 0;
1852#endif
f337b084 1853 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1854 DO_BO_UNPACK(au32, 32);
a6ec74c1 1855#ifdef HAS_NTOHL
73cb7263
NC
1856 if (datumtype == 'N')
1857 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1858#endif
1859#ifdef HAS_VTOHL
73cb7263
NC
1860 if (datumtype == 'V')
1861 au32 = vtohl(au32);
a6ec74c1 1862#endif
08ca2aa3 1863 if (!checksum)
fc241834
RGS
1864 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1865 else if (checksum > bits_in_uv)
1866 cdouble += (NV)au32;
1867 else
1868 cuv += au32;
a6ec74c1
JH
1869 }
1870 break;
7212898e 1871#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1872 case 'V' | TYPE_IS_SHRIEKING:
1873 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1874 while (len-- > 0) {
08ca2aa3
TH
1875 I32 ai32;
1876# if U32SIZE > SIZE32
1877 ai32 = 0;
1878# endif
f337b084 1879 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1880# ifdef HAS_NTOHL
73cb7263
NC
1881 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1882 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1883# endif
1884# ifdef HAS_VTOHL
73cb7263
NC
1885 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1886 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1887# endif
1888 if (!checksum)
ac7f3b1b 1889 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1890 else if (checksum > bits_in_uv)
1891 cdouble += (NV)ai32;
1892 else
1893 cuv += ai32;
068bd2e7
MHM
1894 }
1895 break;
08ca2aa3 1896#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1897 case 'p':
a6ec74c1 1898 while (len-- > 0) {
08ca2aa3 1899 char *aptr;
f337b084 1900 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
5512a2f9 1901 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1902 /* newSVpv generates undef if aptr is NULL */
1903 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1904 }
1905 break;
1906 case 'w':
a6ec74c1
JH
1907 {
1908 UV auv = 0;
1909 U32 bytes = 0;
fc241834 1910
08ca2aa3
TH
1911 while (len > 0 && s < strend) {
1912 U8 ch;
f337b084 1913 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1914 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1915 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1916 if (ch < 0x80) {
a6ec74c1 1917 bytes = 0;
ac7f3b1b 1918 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1919 len--;
1920 auv = 0;
08ca2aa3 1921 continue;
a6ec74c1 1922 }
08ca2aa3 1923 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1924 char *t;
1925 STRLEN n_a;
1926
1927 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1928 while (s < strend) {
f337b084 1929 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1930 sv = mul128(sv, (U8)(ch & 0x7f));
1931 if (!(ch & 0x80)) {
a6ec74c1
JH
1932 bytes = 0;
1933 break;
1934 }
1935 }
1936 t = SvPV(sv, n_a);
1937 while (*t == '0')
1938 t++;
1939 sv_chop(sv, t);
1940 PUSHs(sv_2mortal(sv));
1941 len--;
1942 auv = 0;
1943 }
1944 }
1945 if ((s >= strend) && bytes)
49704364 1946 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1947 }
1948 break;
1949 case 'P':
49704364
WL
1950 if (symptr->howlen == e_star)
1951 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1952 EXTEND(SP, 1);
08ca2aa3
TH
1953 if (sizeof(char*) <= strend - s) {
1954 char *aptr;
f337b084 1955 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
08ca2aa3 1956 DO_BO_UNPACK_P(aptr);
fc241834
RGS
1957 /* newSVpvn generates undef if aptr is NULL */
1958 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1959 }
a6ec74c1
JH
1960 break;
1961#ifdef HAS_QUAD
1962 case 'q':
73cb7263 1963 while (len-- > 0) {
08ca2aa3 1964 Quad_t aquad;
f337b084
TH
1965 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1966 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
1967 if (!checksum)
1968 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 1969 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
1970 else if (checksum > bits_in_uv)
1971 cdouble += (NV)aquad;
1972 else
1973 cuv += aquad;
1974 }
a6ec74c1
JH
1975 break;
1976 case 'Q':
73cb7263 1977 while (len-- > 0) {
08ca2aa3 1978 Uquad_t auquad;
f337b084
TH
1979 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1980 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
1981 if (!checksum)
1982 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1983 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
1984 else if (checksum > bits_in_uv)
1985 cdouble += (NV)auquad;
1986 else
1987 cuv += auquad;
a6ec74c1
JH
1988 }
1989 break;
08ca2aa3 1990#endif /* HAS_QUAD */
a6ec74c1
JH
1991 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1992 case 'f':
73cb7263 1993 while (len-- > 0) {
08ca2aa3 1994 float afloat;
f337b084 1995 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1996 DO_BO_UNPACK_N(afloat, float);
1997 if (!checksum)
1998 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1999 else
73cb7263 2000 cdouble += afloat;
fc241834 2001 }
a6ec74c1
JH
2002 break;
2003 case 'd':
73cb7263 2004 while (len-- > 0) {
08ca2aa3 2005 double adouble;
f337b084 2006 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2007 DO_BO_UNPACK_N(adouble, double);
2008 if (!checksum)
2009 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2010 else
73cb7263 2011 cdouble += adouble;
fc241834 2012 }
a6ec74c1 2013 break;
92d41999 2014 case 'F':
73cb7263 2015 while (len-- > 0) {
08ca2aa3 2016 NV anv;
f337b084 2017 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2018 DO_BO_UNPACK_N(anv, NV);
2019 if (!checksum)
2020 PUSHs(sv_2mortal(newSVnv(anv)));
2021 else
73cb7263 2022 cdouble += anv;
fc241834 2023 }
92d41999
JH
2024 break;
2025#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2026 case 'D':
73cb7263 2027 while (len-- > 0) {
08ca2aa3 2028 long double aldouble;
f337b084 2029 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2030 DO_BO_UNPACK_N(aldouble, long double);
2031 if (!checksum)
2032 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2033 else
2034 cdouble += aldouble;
92d41999
JH
2035 }
2036 break;
2037#endif
a6ec74c1
JH
2038 case 'u':
2039 /* MKS:
2040 * Initialise the decode mapping. By using a table driven
2041 * algorithm, the code will be character-set independent
2042 * (and just as fast as doing character arithmetic)
2043 */
2044 if (PL_uudmap['M'] == 0) {
2045 int i;
2046
2047 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2048 PL_uudmap[(U8)PL_uuemap[i]] = i;
2049 /*
2050 * Because ' ' and '`' map to the same value,
2051 * we need to decode them both the same.
2052 */
2053 PL_uudmap[' '] = 0;
2054 }
08ca2aa3
TH
2055 {
2056 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2057 sv = sv_2mortal(NEWSV(42, l));
2058 if (l) SvPOK_on(sv);
2059 }
2060 if (utf8) {
2061 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2062 I32 a, b, c, d;
2063 char hunk[4];
2064
2065 hunk[3] = '\0';
2066 while (len > 0) {
2067 next_uni_uu(aTHX_ &s, strend, &a);
2068 next_uni_uu(aTHX_ &s, strend, &b);
2069 next_uni_uu(aTHX_ &s, strend, &c);
2070 next_uni_uu(aTHX_ &s, strend, &d);
2071 hunk[0] = (char)((a << 2) | (b >> 4));
2072 hunk[1] = (char)((b << 4) | (c >> 2));
2073 hunk[2] = (char)((c << 6) | d);
2074 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2075 len -= 3;
2076 }
2077 if (s < strend) {
2078 if (*s == '\n') s++;
2079 else {
2080 /* possible checksum byte */
2081 char *skip = s+UTF8SKIP(s);
2082 if (skip < strend && *skip == '\n') s = skip+1;
2083 }
2084 }
2085 }
2086 } else {
fc241834
RGS
2087 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2088 I32 a, b, c, d;
2089 char hunk[4];
a6ec74c1 2090
fc241834
RGS
2091 hunk[3] = '\0';
2092 len = PL_uudmap[*(U8*)s++] & 077;
2093 while (len > 0) {
2094 if (s < strend && ISUUCHAR(*s))
2095 a = PL_uudmap[*(U8*)s++] & 077;
2096 else
2097 a = 0;
2098 if (s < strend && ISUUCHAR(*s))
2099 b = PL_uudmap[*(U8*)s++] & 077;
2100 else
2101 b = 0;
2102 if (s < strend && ISUUCHAR(*s))
2103 c = PL_uudmap[*(U8*)s++] & 077;
2104 else
2105 c = 0;
2106 if (s < strend && ISUUCHAR(*s))
2107 d = PL_uudmap[*(U8*)s++] & 077;
2108 else
2109 d = 0;
2110 hunk[0] = (char)((a << 2) | (b >> 4));
2111 hunk[1] = (char)((b << 4) | (c >> 2));
2112 hunk[2] = (char)((c << 6) | d);
2113 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2114 len -= 3;
2115 }
2116 if (*s == '\n')
2117 s++;
2118 else /* possible checksum byte */
2119 if (s + 1 < strend && s[1] == '\n')
2120 s += 2;
a6ec74c1 2121 }
08ca2aa3
TH
2122 }
2123 XPUSHs(sv);
a6ec74c1
JH
2124 break;
2125 }
49704364 2126
a6ec74c1 2127 if (checksum) {
1109a392 2128 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2129 (checksum > bits_in_uv &&
08ca2aa3
TH
2130 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2131 NV trouble, anv;
a6ec74c1 2132
08ca2aa3 2133 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2134 while (checksum >= 16) {
2135 checksum -= 16;
08ca2aa3 2136 anv *= 65536.0;
a6ec74c1 2137 }
a6ec74c1 2138 while (cdouble < 0.0)
08ca2aa3
TH
2139 cdouble += anv;
2140 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2141 sv = newSVnv(cdouble);
a6ec74c1
JH
2142 }
2143 else {
fa8ec7c1
NC
2144 if (checksum < bits_in_uv) {
2145 UV mask = ((UV)1 << checksum) - 1;
92d41999 2146 cuv &= mask;
a6ec74c1 2147 }
c4c5f44a 2148 sv = newSVuv(cuv);
a6ec74c1
JH
2149 }
2150 XPUSHs(sv_2mortal(sv));
2151 checksum = 0;
2152 }
fc241834 2153
49704364
WL
2154 if (symptr->flags & FLAG_SLASH){
2155 if (SP - PL_stack_base - start_sp_offset <= 0)
2156 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2157 if( next_symbol(symptr) ){
2158 if( symptr->howlen == e_number )
2159 Perl_croak(aTHX_ "Count after length/code in unpack" );
2160 if( beyond ){
2161 /* ...end of char buffer then no decent length available */
2162 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2163 } else {
2164 /* take top of stack (hope it's numeric) */
2165 len = POPi;
2166 if( len < 0 )
2167 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2168 }
2169 } else {
2170 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2171 }
2172 datumtype = symptr->code;
21c16052 2173 explicit_length = FALSE;
49704364
WL
2174 goto redo_switch;
2175 }
a6ec74c1 2176 }
49704364 2177
18529408
IZ
2178 if (new_s)
2179 *new_s = s;
2180 PUTBACK;
2181 return SP - PL_stack_base - start_sp_offset;
2182}
2183
2184PP(pp_unpack)
2185{
2186 dSP;
bab9c0ac 2187 dPOPPOPssrl;
18529408
IZ
2188 I32 gimme = GIMME_V;
2189 STRLEN llen;
2190 STRLEN rlen;
08ca2aa3
TH
2191 char *pat = SvPV(left, llen);
2192 char *s = SvPV(right, rlen);
18529408 2193 char *strend = s + rlen;
08ca2aa3
TH
2194 char *patend = pat + llen;
2195 I32 cnt;
18529408
IZ
2196
2197 PUTBACK;
7accc089 2198 cnt = unpackstring(pat, patend, s, strend,
49704364 2199 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2200 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2201
18529408
IZ
2202 SPAGAIN;
2203 if ( !cnt && gimme == G_SCALAR )
2204 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2205 RETURN;
2206}
2207
f337b084
TH
2208STATIC U8 *
2209doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2210{
f337b084 2211 *h++ = PL_uuemap[len];
a6ec74c1 2212 while (len > 2) {
f337b084
TH
2213 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2214 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2215 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2216 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2217 s += 3;
2218 len -= 3;
2219 }
2220 if (len > 0) {
2221 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2222 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2223 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2224 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2225 *h++ = PL_uuemap[0];
a6ec74c1 2226 }
f337b084
TH
2227 *h++ = '\n';
2228 return h;
a6ec74c1
JH
2229}
2230
2231STATIC SV *
2232S_is_an_int(pTHX_ char *s, STRLEN l)
2233{
2234 STRLEN n_a;
2235 SV *result = newSVpvn(s, l);
2236 char *result_c = SvPV(result, n_a); /* convenience */
2237 char *out = result_c;
2238 bool skip = 1;
2239 bool ignore = 0;
2240
2241 while (*s) {
2242 switch (*s) {
2243 case ' ':
2244 break;
2245 case '+':
2246 if (!skip) {
2247 SvREFCNT_dec(result);
2248 return (NULL);
2249 }
2250 break;
2251 case '0':
2252 case '1':
2253 case '2':
2254 case '3':
2255 case '4':
2256 case '5':
2257 case '6':
2258 case '7':
2259 case '8':
2260 case '9':
2261 skip = 0;
2262 if (!ignore) {
2263 *(out++) = *s;
2264 }
2265 break;
2266 case '.':
2267 ignore = 1;
2268 break;
2269 default:
2270 SvREFCNT_dec(result);
2271 return (NULL);
2272 }
2273 s++;
2274 }
2275 *(out++) = '\0';
2276 SvCUR_set(result, out - result_c);
2277 return (result);
2278}
2279
2280/* pnum must be '\0' terminated */
2281STATIC int
2282S_div128(pTHX_ SV *pnum, bool *done)
2283{
2284 STRLEN len;
2285 char *s = SvPV(pnum, len);
2286 int m = 0;
2287 int r = 0;
2288 char *t = s;
2289
2290 *done = 1;
2291 while (*t) {
2292 int i;
2293
2294 i = m * 10 + (*t - '0');
2295 m = i & 0x7F;
2296 r = (i >> 7); /* r < 10 */
2297 if (r) {
2298 *done = 0;
2299 }
2300 *(t++) = '0' + r;
2301 }
2302 *(t++) = '\0';
2303 SvCUR_set(pnum, (STRLEN) (t - s));
2304 return (m);
2305}
2306
49704364 2307
a6ec74c1 2308
18529408
IZ
2309/*
2310=for apidoc pack_cat
2311
7accc089
JH
2312The engine implementing pack() Perl function. Note: parameters next_in_list and
2313flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2314
2315=cut */
2316
49704364 2317
18529408
IZ
2318void
2319Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2320{
49704364
WL
2321 tempsym_t sym = { 0 };
2322 sym.patptr = pat;
2323 sym.patend = patend;
7accc089
JH
2324 sym.flags = FLAG_PACK;
2325
2326 (void)pack_rec( cat, &sym, beglist, endlist );
2327}
2328
2329
2330/*
2331=for apidoc packlist
2332
2333The engine implementing pack() Perl function.
2334
2335=cut */
2336
2337
2338void
2339Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2340{
f337b084 2341 STRLEN no_len;
7accc089 2342 tempsym_t sym = { 0 };
f337b084 2343
7accc089
JH
2344 sym.patptr = pat;
2345 sym.patend = patend;
2346 sym.flags = FLAG_PACK;
49704364 2347
f337b084
TH
2348 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2349 Also make sure any UTF8 flag is loaded */
2350 SvPV_force(cat, no_len);
2351 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2352
49704364
WL
2353 (void)pack_rec( cat, &sym, beglist, endlist );
2354}
2355
f337b084
TH
2356/* like sv_utf8_upgrade, but also repoint the group start markers */
2357STATIC void
2358marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2359 STRLEN len;
2360 tempsym_t *group;
2361 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2362
2363 if (SvUTF8(sv)) return;
2364
2365 from_start = SvPVX(sv);
2366 from_end = from_start + SvCUR(sv);
2367 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2368 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2369 if (from_ptr == from_end) {
2370 /* Simple case: no character needs to be changed */
2371 SvUTF8_on(sv);
2372 return;
2373 }
2374
3473cf63 2375 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2376 New('U', to_start, len, char);
2377 Copy(from_start, to_start, from_ptr-from_start, char);
2378 to_ptr = to_start + (from_ptr-from_start);
2379
2380 New('U', marks, sym_ptr->level+2, char *);
2381 for (group=sym_ptr; group; group = group->previous)
2382 marks[group->level] = from_start + group->strbeg;
2383 marks[sym_ptr->level+1] = from_end+1;
2384 for (m = marks; *m < from_ptr; m++)
2385 *m = to_start + (*m-from_start);
2386
2387 for (;from_ptr < from_end; from_ptr++) {
2388 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2389 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2390 }
2391 *to_ptr = 0;
2392
2393 while (*m == from_ptr) *m++ = to_ptr;
2394 if (m != marks + sym_ptr->level+1) {
2395 Safefree(marks);
2396 Safefree(to_start);
2397 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2398 }
2399 for (group=sym_ptr; group; group = group->previous)
2400 group->strbeg = marks[group->level] - to_start;
2401 Safefree(marks);
2402
2403 if (SvOOK(sv)) {
2404 if (SvIVX(sv)) {
2405 SvLEN(sv) += SvIVX(sv);
2406 from_start -= SvIVX(sv);
2407 SvIV_set(sv, 0);
2408 }
2409 SvFLAGS(sv) &= ~SVf_OOK;
2410 }
2411 if (SvLEN(sv) != 0)
2412 Safefree(from_start);
2413 SvPVX(sv) = to_start;
2414 SvCUR(sv) = to_ptr - to_start;
2415 SvLEN(sv) = len;
2416 SvUTF8_on(sv);
2417}
2418
2419/* Exponential string grower. Makes string extension effectively O(n)
2420 needed says how many extra bytes we need (not counting the final '\0')
2421 Only grows the string if there is an actual lack of space
2422*/
2423STATIC char *
2424sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2425 STRLEN cur = SvCUR(sv);
2426 STRLEN len = SvLEN(sv);
2427 STRLEN extend;
2428 if (len - cur > needed) return SvPVX(sv);
2429 extend = needed > len ? needed : len;
2430 return SvGROW(sv, len+extend+1);
2431}
49704364
WL
2432
2433STATIC
2434SV **
f337b084 2435S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2436{
49704364 2437 tempsym_t lookahead;
f337b084
TH
2438 I32 items = endlist - beglist;
2439 bool found = next_symbol(symptr);
2440 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2441
2442 if (symptr->level == 0 && found && symptr->code == 'U') {
2443 marked_upgrade(aTHX_ cat, symptr);
2444 symptr->flags |= FLAG_DO_UTF8;
2445 utf8 = 0;
49704364 2446 }
f337b084 2447 symptr->strbeg = SvCUR(cat);
49704364
WL
2448
2449 while (found) {
f337b084
TH
2450 SV *fromstr;
2451 STRLEN fromlen;
2452 I32 len;
a6ec74c1 2453 SV *lengthcode = Nullsv;
49704364 2454 I32 datumtype = symptr->code;
f337b084
TH
2455 howlen_t howlen = symptr->howlen;
2456 char *start = SvPVX(cat);
2457 char *cur = start + SvCUR(cat);
49704364 2458
f337b084
TH
2459#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2460
2461 switch (howlen) {
fc241834 2462 case e_star:
f337b084
TH
2463 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2464 0 : items;
2465 break;
2466 default:
2467 /* e_no_len and e_number */
2468 len = symptr->length;
49704364
WL
2469 break;
2470 }
2471
f337b084 2472 if (len) {
a7a3cfaa 2473 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2474
a7a3cfaa
TH
2475 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2476 /* We can process this letter. */
2477 STRLEN size = props & PACK_SIZE_MASK;
2478 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2479 }
f337b084
TH
2480 }
2481
49704364
WL
2482 /* Look ahead for next symbol. Do we have code/code? */
2483 lookahead = *symptr;
2484 found = next_symbol(&lookahead);
246f24af
TH
2485 if (symptr->flags & FLAG_SLASH) {
2486 IV count;
f337b084 2487 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2488 if (strchr("aAZ", lookahead.code)) {
2489 if (lookahead.howlen == e_number) count = lookahead.length;
2490 else {
2491 if (items > 0)
2492 count = DO_UTF8(*beglist) ?
2493 sv_len_utf8(*beglist) : sv_len(*beglist);
2494 else count = 0;
2495 if (lookahead.code == 'Z') count++;
2496 }
2497 } else {
2498 if (lookahead.howlen == e_number && lookahead.length < items)
2499 count = lookahead.length;
2500 else count = items;
2501 }
2502 lookahead.howlen = e_number;
2503 lookahead.length = count;
2504 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2505 }
49704364 2506
fc241834
RGS
2507 /* Code inside the switch must take care to properly update
2508 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2509 doesn't simply leave using break */
1109a392 2510 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2511 default:
f337b084
TH
2512 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2513 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2514 case '%':
49704364 2515 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2516 case '@':
f337b084
TH
2517 if (utf8) {
2518 char *s = start + symptr->strbeg;
2519 while (len > 0 && s < cur) {
2520 s += UTF8SKIP(s);
2521 len--;
2522 }
2523 if (s > cur)
2524 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2525 if (len > 0) {
2526 grow:
2527 GROWING(0, cat, start, cur, len);
2528 Zero(cur, len, char);
2529 cur += len;
2530 } else if (s < cur) cur = s;
2531 else goto no_change;
2532 } else {
2533 len -= cur - (start+symptr->strbeg);
2534 if (len > 0) goto grow;
fc241834 2535 len = -len;
f337b084
TH
2536 if (len > 0) goto shrink;
2537 else goto no_change;
2538 }
a6ec74c1 2539 break;
fc241834 2540 case '(': {
49704364 2541 tempsym_t savsym = *symptr;
66c611c5
MHM
2542 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2543 symptr->flags |= group_modifiers;
49704364
WL
2544 symptr->patend = savsym.grpend;
2545 symptr->level++;
f337b084 2546 symptr->previous = &lookahead;
18529408 2547 while (len--) {
f337b084
TH
2548 U32 was_utf8;
2549 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2550 else symptr->flags &= ~FLAG_PARSE_UTF8;
2551 was_utf8 = SvUTF8(cat);
49704364 2552 symptr->patptr = savsym.grpbeg;
f337b084
TH
2553 beglist = pack_rec(cat, symptr, beglist, endlist);
2554 if (SvUTF8(cat) != was_utf8)
2555 /* This had better be an upgrade while in utf8==0 mode */
2556 utf8 = 1;
2557
49704364 2558 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2559 break; /* No way to continue */
2560 }
f337b084
TH
2561 lookahead.flags = symptr->flags & ~group_modifiers;
2562 goto no_change;
18529408 2563 }
62f95557
IZ
2564 case 'X' | TYPE_IS_SHRIEKING:
2565 if (!len) /* Avoid division by 0 */
2566 len = 1;
f337b084
TH
2567 if (utf8) {
2568 char *hop, *last;
2569 I32 l = len;
2570 hop = last = start;
2571 while (hop < cur) {
2572 hop += UTF8SKIP(hop);
2573 if (--l == 0) {
2574 last = hop;
2575 l = len;
2576 }
2577 }
2578 if (last > cur)
2579 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2580 cur = last;
2581 break;
2582 }
2583 len = (cur-start) % len;
62f95557 2584 /* FALL THROUGH */
a6ec74c1 2585 case 'X':
f337b084
TH
2586 if (utf8) {
2587 if (len < 1) goto no_change;
2588 while (len > 0) {
2589 if (cur <= start)
2590 Perl_croak(aTHX_ "'X' outside of string in pack");
2591 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2592 if (cur <= start)
2593 Perl_croak(aTHX_ "'X' outside of string in pack");
2594 }
2595 len--;
2596 }
2597 } else {
fc241834 2598 shrink:
f337b084 2599 if (cur - start < len)
fc241834 2600 Perl_croak(aTHX_ "'X' outside of string in pack");
f337b084
TH
2601 cur -= len;
2602 }
2603 if (cur < start+symptr->strbeg) {
2604 /* Make sure group starts don't point into the void */
2605 tempsym_t *group;
2606 STRLEN length = cur-start;
2607 for (group = symptr;
2608 group && length < group->strbeg;
2609 group = group->previous) group->strbeg = length;
2610 lookahead.strbeg = length;
2611 }
a6ec74c1 2612 break;
fc241834
RGS
2613 case 'x' | TYPE_IS_SHRIEKING: {
2614 I32 ai32;
62f95557
IZ
2615 if (!len) /* Avoid division by 0 */
2616 len = 1;
230e1fce 2617 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2618 else ai32 = (cur - start) % len;
2619 if (ai32 == 0) goto no_change;
2620 len -= ai32;
2621 }
2622 /* FALL THROUGH */
a6ec74c1 2623 case 'x':
f337b084 2624 goto grow;
a6ec74c1
JH
2625 case 'A':
2626 case 'Z':
f337b084
TH
2627 case 'a': {
2628 char *aptr;
2629
a6ec74c1
JH
2630 fromstr = NEXTFROM;
2631 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2632 if (DO_UTF8(fromstr)) {
2633 char *end, *s;
2634
2635 if (!utf8 && !SvUTF8(cat)) {
2636 marked_upgrade(aTHX_ cat, symptr);
2637 lookahead.flags |= FLAG_DO_UTF8;
2638 lookahead.strbeg = symptr->strbeg;
2639 utf8 = 1;
2640 start = SvPVX(cat);
2641 cur = start + SvCUR(cat);
2642 }
fc241834 2643 if (howlen == e_star) {
f337b084
TH
2644 if (utf8) goto string_copy;
2645 len = fromlen+1;
2646 }
2647 s = aptr;
2648 end = aptr + fromlen;
2649 fromlen = datumtype == 'Z' ? len-1 : len;
2650 while ((I32) fromlen > 0 && s < end) {
2651 s += UTF8SKIP(s);
2652 fromlen--;
2653 }
2654 if (s > end)
2655 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2656 if (utf8) {
fc241834 2657 len = fromlen;
f337b084
TH
2658 if (datumtype == 'Z') len++;
2659 fromlen = s-aptr;
2660 len += fromlen;
fc241834 2661
f337b084 2662 goto string_copy;
fc241834 2663 }
f337b084
TH
2664 fromlen = len - fromlen;
2665 if (datumtype == 'Z') fromlen--;
2666 if (howlen == e_star) {
2667 len = fromlen;
2668 if (datumtype == 'Z') len++;
fc241834 2669 }
f337b084 2670 GROWING(0, cat, start, cur, len);
fc241834 2671 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2672 datumtype | TYPE_IS_PACK))
2673 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2674 cur += fromlen;
a6ec74c1 2675 len -= fromlen;
f337b084
TH
2676 } else if (utf8) {
2677 if (howlen == e_star) {
2678 len = fromlen;
2679 if (datumtype == 'Z') len++;
a6ec74c1 2680 }
f337b084
TH
2681 if (len <= (I32) fromlen) {
2682 fromlen = len;
2683 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2684 }
fc241834 2685 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2686 upgrade, so:
2687 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2688 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2689 len -= fromlen;
2690 while (fromlen > 0) {
230e1fce 2691 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2692 aptr++;
2693 fromlen--;
fc241834 2694 }
f337b084
TH
2695 } else {
2696 string_copy:
2697 if (howlen == e_star) {
2698 len = fromlen;
2699 if (datumtype == 'Z') len++;
2700 }
2701 if (len <= (I32) fromlen) {
2702 fromlen = len;
2703 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2704 }
f337b084
TH
2705 GROWING(0, cat, start, cur, len);
2706 Copy(aptr, cur, fromlen, char);
2707 cur += fromlen;
2708 len -= fromlen;
a6ec74c1 2709 }
f337b084
TH
2710 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2711 cur += len;
a6ec74c1 2712 break;
f337b084 2713 }
a6ec74c1 2714 case 'B':
f337b084
TH
2715 case 'b': {
2716 char *str, *end;
2717 I32 l, field_len;
2718 U8 bits;
2719 bool utf8_source;
2720 U32 utf8_flags;
a6ec74c1 2721
fc241834
RGS
2722 fromstr = NEXTFROM;
2723 str = SvPV(fromstr, fromlen);
f337b084
TH
2724 end = str + fromlen;
2725 if (DO_UTF8(fromstr)) {
2726 utf8_source = TRUE;
2727 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2728 } else {
2729 utf8_source = FALSE;
2730 utf8_flags = 0; /* Unused, but keep compilers happy */
2731 }
2732 if (howlen == e_star) len = fromlen;
2733 field_len = (len+7)/8;
2734 GROWING(utf8, cat, start, cur, field_len);
2735 if (len > (I32)fromlen) len = fromlen;
2736 bits = 0;
2737 l = 0;
2738 if (datumtype == 'B')
2739 while (l++ < len) {
2740 if (utf8_source) {
2741 UV val;
2742 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2743 bits |= val & 1;
2744 } else bits |= *str++ & 1;
2745 if (l & 7) bits <<= 1;
fc241834 2746 else {
f337b084
TH
2747 PUSH_BYTE(utf8, cur, bits);
2748 bits = 0;
a6ec74c1
JH
2749 }
2750 }
f337b084
TH
2751 else
2752 /* datumtype == 'b' */
2753 while (l++ < len) {
2754 if (utf8_source) {
2755 UV val;
2756 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2757 if (val & 1) bits |= 0x80;
2758 } else if (*str++ & 1)
2759 bits |= 0x80;
2760 if (l & 7) bits >>= 1;
fc241834 2761 else {
f337b084
TH
2762 PUSH_BYTE(utf8, cur, bits);
2763 bits = 0;
a6ec74c1
JH
2764 }
2765 }
f337b084
TH
2766 l--;
2767 if (l & 7) {
fc241834 2768 if (datumtype == 'B')
f337b084 2769 bits <<= 7 - (l & 7);
fc241834 2770 else
f337b084
TH
2771 bits >>= 7 - (l & 7);
2772 PUSH_BYTE(utf8, cur, bits);
2773 l += 7;
a6ec74c1 2774 }
f337b084
TH
2775 /* Determine how many chars are left in the requested field */
2776 l /= 8;
2777 if (howlen == e_star) field_len = 0;
2778 else field_len -= l;
2779 Zero(cur, field_len, char);
2780 cur += field_len;
a6ec74c1 2781 break;
f337b084 2782 }
a6ec74c1 2783 case 'H':
f337b084
TH
2784 case 'h': {
2785 char *str, *end;
2786 I32 l, field_len;
2787 U8 bits;
2788 bool utf8_source;
2789 U32 utf8_flags;
a6ec74c1 2790
fc241834
RGS
2791 fromstr = NEXTFROM;
2792 str = SvPV(fromstr, fromlen);
f337b084
TH
2793 end = str + fromlen;
2794 if (DO_UTF8(fromstr)) {
2795 utf8_source = TRUE;
2796 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2797 } else {
2798 utf8_source = FALSE;
2799 utf8_flags = 0; /* Unused, but keep compilers happy */
2800 }
2801 if (howlen == e_star) len = fromlen;
2802 field_len = (len+1)/2;
2803 GROWING(utf8, cat, start, cur, field_len);
2804 if (!utf8 && len > (I32)fromlen) len = fromlen;
2805 bits = 0;
2806 l = 0;
2807 if (datumtype == 'H')
2808 while (l++ < len) {
2809 if (utf8_source) {
2810 UV val;
2811 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2812 if (val < 256 && isALPHA(val))
2813 bits |= (val + 9) & 0xf;
a6ec74c1 2814 else
f337b084
TH
2815 bits |= val & 0xf;
2816 } else if (isALPHA(*str))
2817 bits |= (*str++ + 9) & 0xf;
2818 else
2819 bits |= *str++ & 0xf;
2820 if (l & 1) bits <<= 4;
fc241834 2821 else {
f337b084
TH
2822 PUSH_BYTE(utf8, cur, bits);
2823 bits = 0;
a6ec74c1
JH
2824 }
2825 }
f337b084
TH
2826 else
2827 while (l++ < len) {
2828 if (utf8_source) {
2829 UV val;
2830 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2831 if (val < 256 && isALPHA(val))
2832 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2833 else
f337b084
TH
2834 bits |= (val & 0xf) << 4;
2835 } else if (isALPHA(*str))
2836 bits |= ((*str++ + 9) & 0xf) << 4;
2837 else
2838 bits |= (*str++ & 0xf) << 4;
2839 if (l & 1) bits >>= 4;
fc241834 2840 else {
f337b084
TH
2841 PUSH_BYTE(utf8, cur, bits);
2842 bits = 0;
a6ec74c1 2843 }
fc241834 2844 }
f337b084
TH
2845 l--;
2846 if (l & 1) {
2847 PUSH_BYTE(utf8, cur, bits);
2848 l++;
2849 }
2850 /* Determine how many chars are left in the requested field */
2851 l /= 2;
2852 if (howlen == e_star) field_len = 0;
2853 else field_len -= l;
2854 Zero(cur, field_len, char);
2855 cur += field_len;
2856 break;
fc241834
RGS
2857 }
2858 case 'c':
f337b084
TH
2859 while (len-- > 0) {
2860 IV aiv;
2861 fromstr = NEXTFROM;
2862 aiv = SvIV(fromstr);
2863 if ((-128 > aiv || aiv > 127) &&
2864 ckWARN(WARN_PACK))
2865 Perl_warner(aTHX_ packWARN(WARN_PACK),
2866 "Character in 'c' format wrapped in pack");
2867 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2868 }
2869 break;
2870 case 'C':
f337b084
TH
2871 if (len == 0) {
2872 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2873 break;
2874 }
2875 GROWING(0, cat, start, cur, len);
a6ec74c1 2876 while (len-- > 0) {
f337b084 2877 IV aiv;
a6ec74c1 2878 fromstr = NEXTFROM;
f337b084
TH
2879 aiv = SvIV(fromstr);
2880 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2881 ckWARN(WARN_PACK))
2882 Perl_warner(aTHX_ packWARN(WARN_PACK),
2883 "Character in 'C' format wrapped in pack");
f337b084
TH
2884 *cur++ = aiv & 0xff;
2885 }
fc241834
RGS
2886 break;
2887 case 'W': {
2888 char *end;
2889 U8 in_bytes = IN_BYTES;
2890
2891 end = start+SvLEN(cat)-1;
2892 if (utf8) end -= UTF8_MAXLEN-1;
2893 while (len-- > 0) {
2894 UV auv;
2895 fromstr = NEXTFROM;
2896 auv = SvUV(fromstr);
2897 if (in_bytes) auv = auv % 0x100;
2898 if (utf8) {
2899 W_utf8:
2900 if (cur > end) {
2901 *cur = '\0';
2902 SvCUR(cat) = cur - start;
2903
2904 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2905 end = start+SvLEN(cat)-UTF8_MAXLEN;
2906 }
230e1fce
NC
2907 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2908 NATIVE_TO_UNI(auv),
2909 ckWARN(WARN_UTF8) ?
2910 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2911 } else {
2912 if (auv >= 0x100) {
2913 if (!SvUTF8(cat)) {
2914 *cur = '\0';
2915 SvCUR(cat) = cur - start;
2916 marked_upgrade(aTHX_ cat, symptr);
2917 lookahead.flags |= FLAG_DO_UTF8;
2918 lookahead.strbeg = symptr->strbeg;
2919 utf8 = 1;
2920 start = SvPVX(cat);
2921 cur = start + SvCUR(cat);
2922 end = start+SvLEN(cat)-UTF8_MAXLEN;
2923 goto W_utf8;
2924 }
2925 if (ckWARN(WARN_PACK))
2926 Perl_warner(aTHX_ packWARN(WARN_PACK),
2927 "Character in 'W' format wrapped in pack");
2928 auv &= 0xff;
2929 }
2930 if (cur >= end) {
2931 *cur = '\0';
2932 SvCUR(cat) = cur - start;
2933 GROWING(0, cat, start, cur, len+1);
2934 end = start+SvLEN(cat)-1;
2935 }
fe2774ed 2936 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2937 }
2938 }
2939 break;
fc241834
RGS
2940 }
2941 case 'U': {
2942 char *end;
2943
2944 if (len == 0) {
2945 if (!(symptr->flags & FLAG_DO_UTF8)) {
2946 marked_upgrade(aTHX_ cat, symptr);
2947 lookahead.flags |= FLAG_DO_UTF8;
2948 lookahead.strbeg = symptr->strbeg;
2949 }
2950 utf8 = 0;
2951 goto no_change;
2952 }
2953
2954 end = start+SvLEN(cat);
2955 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2956 while (len-- > 0) {
fc241834 2957 UV auv;
a6ec74c1 2958 fromstr = NEXTFROM;
fc241834
RGS
2959 auv = SvUV(fromstr);
2960 if (utf8) {
230e1fce 2961 U8 buffer[UTF8_MAXLEN], *endb;
fc241834
RGS
2962 endb = uvuni_to_utf8_flags(buffer, auv,
2963 ckWARN(WARN_UTF8) ?
2964 0 : UNICODE_ALLOW_ANY);
2965 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2966 *cur = '\0';
2967 SvCUR(cat) = cur - start;
2968 GROWING(0, cat, start, cur,
2969 len+(endb-buffer)*UTF8_EXPAND);
2970 end = start+SvLEN(cat);
2971 }
2972 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2973 } else {
2974 if (cur >= end) {
2975 *cur = '\0';
2976 SvCUR(cat) = cur - start;
2977 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2978 end = start+SvLEN(cat)-UTF8_MAXLEN;
2979 }
230e1fce
NC
2980 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2981 ckWARN(WARN_UTF8) ?
2982 0 : UNICODE_ALLOW_ANY);
fc241834 2983 }
a6ec74c1 2984 }
a6ec74c1 2985 break;
fc241834 2986 }
a6ec74c1
JH
2987 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2988 case 'f':
a6ec74c1 2989 while (len-- > 0) {
f337b084
TH
2990 float afloat;
2991 NV anv;
a6ec74c1 2992 fromstr = NEXTFROM;
f337b084 2993 anv = SvNV(fromstr);
5cdb9e01 2994#ifdef __VOS__
f337b084 2995 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
2996 during conversion from double to float into infinity, so we
2997 do it by hand. This code should either be generalized for
2998 any OS that needs it, or removed if and when VOS implements
2999 posix-976 (suggestion to support mapping to infinity).
3000 Paul.Green@stratus.com 02-04-02. */
f337b084 3001 if (anv > FLT_MAX)
fc241834 3002 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3003 else if (anv < -FLT_MAX)
fc241834 3004 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3005 else afloat = (float) anv;
3006#else /* __VOS__ */
baf3cf9c 3007# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3008 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3009 * on Alpha; fake it if we don't have them.
3010 */
f337b084 3011 if (anv > FLT_MAX)
fc241834 3012 afloat = FLT_MAX;
f337b084 3013 else if (anv < -FLT_MAX)
fc241834 3014 afloat = -FLT_MAX;
f337b084 3015 else afloat = (float)anv;
baf3cf9c 3016# else
f337b084 3017 afloat = (float)anv;
baf3cf9c 3018# endif
f337b084 3019#endif /* __VOS__ */
1109a392 3020 DO_BO_PACK_N(afloat, float);
f337b084 3021 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3022 }
3023 break;
3024 case 'd':
a6ec74c1 3025 while (len-- > 0) {
f337b084
TH
3026 double adouble;
3027 NV anv;
a6ec74c1 3028 fromstr = NEXTFROM;
f337b084 3029 anv = SvNV(fromstr);
5cdb9e01 3030#ifdef __VOS__
f337b084 3031 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3032 during conversion from long double to double into infinity,
3033 so we do it by hand. This code should either be generalized
3034 for any OS that needs it, or removed if and when VOS
3035 implements posix-976 (suggestion to support mapping to
3036 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3037 if (anv > DBL_MAX)
fc241834 3038 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3039 else if (anv < -DBL_MAX)
fc241834 3040 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3041 else adouble = (double) anv;
3042#else /* __VOS__ */
baf3cf9c 3043# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3044 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3045 * on Alpha; fake it if we don't have them.
3046 */
f337b084 3047 if (anv > DBL_MAX)
fc241834 3048 adouble = DBL_MAX;
f337b084 3049 else if (anv < -DBL_MAX)
fc241834 3050 adouble = -DBL_MAX;
f337b084 3051 else adouble = (double)anv;
baf3cf9c 3052# else
f337b084 3053 adouble = (double)anv;
baf3cf9c 3054# endif
f337b084 3055#endif /* __VOS__ */
1109a392 3056 DO_BO_PACK_N(adouble, double);
f337b084 3057 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3058 }
3059 break;
fc241834
RGS
3060 case 'F': {
3061 NV anv;
1109a392 3062 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3063 while (len-- > 0) {
3064 fromstr = NEXTFROM;
3065 anv = SvNV(fromstr);
1109a392 3066 DO_BO_PACK_N(anv, NV);
fc241834 3067 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3068 }
3069 break;
fc241834 3070 }
92d41999 3071#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3072 case 'D': {
3073 long double aldouble;
1109a392
MHM
3074 /* long doubles can have unused bits, which may be nonzero */
3075 Zero(&aldouble, 1, long double);
92d41999
JH
3076 while (len-- > 0) {
3077 fromstr = NEXTFROM;
3078 aldouble = (long double)SvNV(fromstr);
1109a392 3079 DO_BO_PACK_N(aldouble, long double);
fc241834 3080 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3081 }
3082 break;
fc241834 3083 }
92d41999 3084#endif
7212898e 3085#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3086 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3087#endif
a6ec74c1
JH
3088 case 'n':
3089 while (len-- > 0) {
f337b084 3090 I16 ai16;
a6ec74c1 3091 fromstr = NEXTFROM;
ef108786 3092 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3093#ifdef HAS_HTONS
ef108786 3094 ai16 = PerlSock_htons(ai16);
a6ec74c1 3095#endif
f337b084 3096 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3097 }
3098 break;
7212898e 3099#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3100 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3101#endif
a6ec74c1
JH
3102 case 'v':
3103 while (len-- > 0) {
f337b084 3104 I16 ai16;
a6ec74c1 3105 fromstr = NEXTFROM;
ef108786 3106 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3107#ifdef HAS_HTOVS
ef108786 3108 ai16 = htovs(ai16);
a6ec74c1 3109#endif
f337b084 3110 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3111 }
3112 break;
49704364 3113 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3114#if SHORTSIZE != SIZE16
fc241834 3115 while (len-- > 0) {
f337b084 3116 unsigned short aushort;
fc241834
RGS
3117 fromstr = NEXTFROM;
3118 aushort = SvUV(fromstr);
3119 DO_BO_PACK(aushort, s);
f337b084 3120 PUSH_VAR(utf8, cur, aushort);
fc241834 3121 }
49704364
WL
3122 break;
3123#else
3124 /* Fall through! */
a6ec74c1 3125#endif
49704364 3126 case 'S':
fc241834 3127 while (len-- > 0) {
f337b084 3128 U16 au16;
fc241834
RGS
3129 fromstr = NEXTFROM;
3130 au16 = (U16)SvUV(fromstr);
3131 DO_BO_PACK(au16, 16);
f337b084 3132 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3133 }
3134 break;
49704364 3135 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3136#if SHORTSIZE != SIZE16
fc241834 3137 while (len-- > 0) {
f337b084 3138 short ashort;
fc241834
RGS
3139 fromstr = NEXTFROM;
3140 ashort = SvIV(fromstr);
3141 DO_BO_PACK(ashort, s);
f337b084 3142 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3143 }
49704364
WL
3144 break;
3145#else
3146 /* Fall through! */
a6ec74c1 3147#endif
49704364
WL
3148 case 's':
3149 while (len-- > 0) {
f337b084 3150 I16 ai16;
49704364 3151 fromstr = NEXTFROM;
ef108786
MHM
3152 ai16 = (I16)SvIV(fromstr);
3153 DO_BO_PACK(ai16, 16);
f337b084 3154 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3155 }
3156 break;
3157 case 'I':
49704364 3158 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3159 while (len-- > 0) {
f337b084 3160 unsigned int auint;
a6ec74c1
JH
3161 fromstr = NEXTFROM;
3162 auint = SvUV(fromstr);
1109a392 3163 DO_BO_PACK(auint, i);
f337b084 3164 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3165 }
3166 break;
92d41999
JH
3167 case 'j':
3168 while (len-- > 0) {
f337b084 3169 IV aiv;
92d41999
JH
3170 fromstr = NEXTFROM;
3171 aiv = SvIV(fromstr);
1109a392
MHM
3172#if IVSIZE == INTSIZE
3173 DO_BO_PACK(aiv, i);
3174#elif IVSIZE == LONGSIZE
3175 DO_BO_PACK(aiv, l);
3176#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3177 DO_BO_PACK(aiv, 64);
f337b084
TH
3178#else
3179 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3180#endif
f337b084 3181 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3182 }
3183 break;
3184 case 'J':
3185 while (len-- > 0) {
f337b084 3186 UV auv;
92d41999
JH
3187 fromstr = NEXTFROM;
3188 auv = SvUV(fromstr);
1109a392
MHM
3189#if UVSIZE == INTSIZE
3190 DO_BO_PACK(auv, i);
3191#elif UVSIZE == LONGSIZE
3192 DO_BO_PACK(auv, l);
3193#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3194 DO_BO_PACK(auv, 64);
f337b084
TH
3195#else
3196 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3197#endif
f337b084 3198 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3199 }
3200 break;
a6ec74c1
JH
3201 case 'w':
3202 while (len-- > 0) {
f337b084 3203 NV anv;
a6ec74c1 3204 fromstr = NEXTFROM;
15e9f109 3205 anv = SvNV(fromstr);
a6ec74c1 3206
f337b084
TH
3207 if (anv < 0) {
3208 *cur = '\0';
3209 SvCUR(cat) = cur - start;
49704364 3210 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3211 }
a6ec74c1 3212
196b62db
NC
3213 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3214 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3215 any negative IVs will have already been got by the croak()
3216 above. IOK is untrue for fractions, so we test them
3217 against UV_MAX_P1. */
f337b084
TH
3218 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3219 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3220 char *in = buf + sizeof(buf);
196b62db 3221 UV auv = SvUV(fromstr);
a6ec74c1
JH
3222
3223 do {
eb160463 3224 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3225 auv >>= 7;
3226 } while (auv);
3227 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3228 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3229 in, (buf + sizeof(buf)) - in);
3230 } else if (SvPOKp(fromstr))
3231 goto w_string;
a6ec74c1 3232 else if (SvNOKp(fromstr)) {
0258719b
NC
3233 /* 10**NV_MAX_10_EXP is the largest power of 10
3234 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3235 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3236 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3237 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3238 Some C compilers are strict about integral constant
3239 expressions so we conservatively divide by a slightly
3240 smaller integer instead of multiplying by the exact
3241 floating-point value.
0258719b
NC
3242 */
3243#ifdef NV_MAX_10_EXP
f337b084 3244 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3245 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3246#else
f337b084 3247 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3248 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3249#endif
a6ec74c1
JH
3250 char *in = buf + sizeof(buf);
3251
15e9f109 3252 anv = Perl_floor(anv);
a6ec74c1 3253 do {
15e9f109 3254 NV next = Perl_floor(anv / 128);
a6ec74c1 3255 if (in <= buf) /* this cannot happen ;-) */
49704364 3256 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3257 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3258 anv = next;
3259 } while (anv > 0);
a6ec74c1 3260 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3261 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3262 in, (buf + sizeof(buf)) - in);
3263 } else {
735b914b
JH
3264 char *from, *result, *in;
3265 SV *norm;
3266 STRLEN len;
3267 bool done;
3268
f337b084 3269 w_string:
735b914b
JH
3270 /* Copy string and check for compliance */
3271 from = SvPV(fromstr, len);
3272 if ((norm = is_an_int(from, len)) == NULL)
49704364 3273 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3274
3275 New('w', result, len, char);
3276 in = result + len;
3277 done = FALSE;
f337b084 3278 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3279 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3280 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3281 in, (result + len) - in);
735b914b
JH
3282 Safefree(result);
3283 SvREFCNT_dec(norm); /* free norm */
fc241834 3284 }
a6ec74c1
JH
3285 }
3286 break;
3287 case 'i':
49704364 3288 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3289 while (len-- > 0) {
f337b084 3290 int aint;
a6ec74c1
JH
3291 fromstr = NEXTFROM;
3292 aint = SvIV(fromstr);
1109a392 3293 DO_BO_PACK(aint, i);
f337b084 3294 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3295 }
3296 break;
7212898e 3297#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3298 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3299#endif
a6ec74c1
JH
3300 case 'N':
3301 while (len-- > 0) {
f337b084 3302 U32 au32;
a6ec74c1 3303 fromstr = NEXTFROM;
ef108786 3304 au32 = SvUV(fromstr);
a6ec74c1 3305#ifdef HAS_HTONL
ef108786 3306 au32 = PerlSock_htonl(au32);
a6ec74c1 3307#endif
f337b084 3308 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3309 }
3310 break;
7212898e 3311#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3312 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3313#endif
a6ec74c1
JH
3314 case 'V':
3315 while (len-- > 0) {
f337b084 3316 U32 au32;
a6ec74c1 3317 fromstr = NEXTFROM;
ef108786 3318 au32 = SvUV(fromstr);
a6ec74c1 3319#ifdef HAS_HTOVL
ef108786 3320 au32 = htovl(au32);
a6ec74c1 3321#endif
f337b084 3322 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3323 }
3324 break;
49704364 3325 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3326#if LONGSIZE != SIZE32
fc241834 3327 while (len-- > 0) {
f337b084 3328 unsigned long aulong;
fc241834
RGS
3329 fromstr = NEXTFROM;
3330 aulong = SvUV(fromstr);
3331 DO_BO_PACK(aulong, l);
f337b084 3332 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3333 }
49704364
WL
3334 break;
3335#else
3336 /* Fall though! */
a6ec74c1 3337#endif
49704364 3338 case 'L':
fc241834 3339 while (len-- > 0) {
f337b084 3340 U32 au32;
fc241834
RGS
3341 fromstr = NEXTFROM;
3342 au32 = SvUV(fromstr);
3343 DO_BO_PACK(au32, 32);
f337b084 3344 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3345 }
3346 break;
49704364 3347 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3348#if LONGSIZE != SIZE32
fc241834 3349 while (len-- > 0) {
f337b084 3350 long along;
fc241834
RGS
3351 fromstr = NEXTFROM;
3352 along = SvIV(fromstr);
3353 DO_BO_PACK(along, l);
f337b084 3354 PUSH_VAR(utf8, cur, along);
a6ec74c1 3355 }
49704364
WL
3356 break;
3357#else
3358 /* Fall though! */
a6ec74c1 3359#endif
49704364
WL
3360 case 'l':
3361 while (len-- > 0) {
f337b084 3362 I32 ai32;
49704364 3363 fromstr = NEXTFROM;
ef108786
MHM
3364 ai32 = SvIV(fromstr);
3365 DO_BO_PACK(ai32, 32);
f337b084 3366 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3367 }
3368 break;
3369#ifdef HAS_QUAD
3370 case 'Q':
3371 while (len-- > 0) {
f337b084 3372 Uquad_t auquad;
a6ec74c1 3373 fromstr = NEXTFROM;
f337b084 3374 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3375 DO_BO_PACK(auquad, 64);
f337b084 3376 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3377 }
3378 break;
3379 case 'q':
3380 while (len-- > 0) {
f337b084 3381 Quad_t aquad;
a6ec74c1
JH
3382 fromstr = NEXTFROM;
3383 aquad = (Quad_t)SvIV(fromstr);
1109a392 3384 DO_BO_PACK(aquad, 64);
f337b084 3385 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3386 }
3387 break;
f337b084 3388#endif /* HAS_QUAD */
a6ec74c1
JH
3389 case 'P':
3390 len = 1; /* assume SV is correct length */
f337b084 3391 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3392 /* Fall through! */
a6ec74c1
JH
3393 case 'p':
3394 while (len-- > 0) {
f337b084
TH
3395 char *aptr;
3396
a6ec74c1 3397 fromstr = NEXTFROM;
28a4f200
TH
3398 SvGETMAGIC(fromstr);
3399 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3400 else {
3401 STRLEN n_a;
3402 /* XXX better yet, could spirit away the string to
3403 * a safe spot and hang on to it until the result
3404 * of pack() (and all copies of the result) are
3405 * gone.
3406 */
f337b084
TH
3407 if (ckWARN(WARN_PACK) &&
3408 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3409 !SvREADONLY(fromstr)))) {
9014280d 3410 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3411 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3412 }
3413 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3414 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3415 else
28a4f200 3416 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3417 }
1109a392 3418 DO_BO_PACK_P(aptr);
f337b084 3419 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3420 }
3421 break;
fc241834
RGS
3422 case 'u': {
3423 char *aptr, *aend;
3424 bool from_utf8;
f337b084 3425
a6ec74c1 3426 fromstr = NEXTFROM;
fc241834
RGS
3427 if (len <= 2) len = 45;
3428 else len = len / 3 * 3;
3429 if (len >= 64) {
3430 Perl_warner(aTHX_ packWARN(WARN_PACK),
3431 "Field too wide in 'u' format in pack");
3432 len = 63;
3433 }
a6ec74c1 3434 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3435 from_utf8 = DO_UTF8(fromstr);
3436 if (from_utf8) {
3437 aend = aptr + fromlen;
3438 fromlen = sv_len_utf8(fromstr);
3439 } else aend = NULL; /* Unused, but keep compilers happy */
3440 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3441 while (fromlen > 0) {
fc241834 3442 U8 *end;
a6ec74c1 3443 I32 todo;
fc241834 3444 U8 hunk[1+63/3*4+1];
a6ec74c1 3445
eb160463 3446 if ((I32)fromlen > len)
a6ec74c1
JH
3447 todo = len;
3448 else
3449 todo = fromlen;
fc241834
RGS
3450 if (from_utf8) {
3451 char buffer[64];
3452 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3453 'u' | TYPE_IS_PACK)) {
3454 *cur = '\0';
3455 SvCUR(cat) = cur - start;
3456 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3457 }
3458 end = doencodes(hunk, buffer, todo);
3459 } else {
3460 end = doencodes(hunk, aptr, todo);
3461 aptr += todo;
3462 }
3463 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3464 fromlen -= todo;
3465 }
a6ec74c1
JH
3466 break;
3467 }
f337b084
TH
3468 }
3469 *cur = '\0';
3470 SvCUR(cat) = cur - start;
3471 no_change:
49704364 3472 *symptr = lookahead;
a6ec74c1 3473 }
49704364 3474 return beglist;
18529408
IZ
3475}
3476#undef NEXTFROM
3477
3478
3479PP(pp_pack)
3480{
3481 dSP; dMARK; dORIGMARK; dTARGET;
3482 register SV *cat = TARG;
3483 STRLEN fromlen;
3484 register char *pat = SvPVx(*++MARK, fromlen);
3485 register char *patend = pat + fromlen;
3486
3487 MARK++;
3488 sv_setpvn(cat, "", 0);
f337b084 3489 SvUTF8_off(cat);
18529408 3490
7accc089 3491 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3492
a6ec74c1
JH
3493 SvSETMAGIC(cat);
3494 SP = ORIGMARK;
3495 PUSHs(cat);
3496 RETURN;
3497}
a6ec74c1 3498
73cb7263
NC
3499/*
3500 * Local variables:
3501 * c-indentation-style: bsd
3502 * c-basic-offset: 4
3503 * indent-tabs-mode: t
3504 * End:
3505 *
edf815fd 3506 * vim: shiftwidth=4:
73cb7263 3507*/