This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: regexp trie fails compile on VMS
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084
TH
121#define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
123
49704364
WL
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e
NC
185#ifdef PERL_PACK_CAN_SHRIEKSIGN
186#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
187#else
188#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
200# define DO_BO_UNPACK_PTR(var, type, pre_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast)
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
7212898e 232# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
7212898e 246# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e
NC
276# if PTRSIZE == INTSIZE
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279# elif PTRSIZE == LONGSIZE
280# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
282# else
283# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285# endif
1109a392 286
7212898e 287# if defined(my_htolen) && defined(my_letohn) && \
1109a392 288 defined(my_htoben) && defined(my_betohn)
7212898e 289# define DO_BO_UNPACK_N(var, type) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
294 default: break; \
295 } \
296 } STMT_END
297
7212898e 298# define DO_BO_PACK_N(var, type) \
1109a392 299 STMT_START { \
66c611c5 300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
303 default: break; \
304 } \
305 } STMT_END
7212898e
NC
306# else
307# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309# endif
310
f337b084 311#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 312
78d46eaa 313#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 314#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
315#define PACK_SIZE_MASK 0x3F
316
78d46eaa
NC
317/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
319
320#if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
322#endif
323
324typedef U8 packprops_t;
78d46eaa
NC
325#if 'J'-'I' == 1
326/* ASCII */
a7a3cfaa
TH
327const packprops_t packprops[512] = {
328 /* normal */
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 335#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 336 /* D */ LONG_DOUBLESIZE,
80a13697 337#else
a7a3cfaa 338 0,
80a13697 339#endif
a7a3cfaa
TH
340 0,
341 /* F */ NVSIZE,
342 0, 0,
343 /* I */ sizeof(unsigned int),
344 /* J */ UVSIZE,
345 0,
346 /* L */ SIZE32,
347 0,
348 /* N */ SIZE32,
349 0, 0,
80a13697 350#if defined(HAS_QUAD)
a7a3cfaa 351 /* Q */ sizeof(Uquad_t),
80a13697 352#else
a7a3cfaa 353 0,
80a13697 354#endif
a7a3cfaa
TH
355 0,
356 /* S */ SIZE16,
357 0,
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
359 /* V */ SIZE32,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
364 0,
365 /* f */ sizeof(float),
366 0, 0,
367 /* i */ sizeof(int),
368 /* j */ IVSIZE,
369 0,
370 /* l */ SIZE32,
371 0,
372 /* n */ SIZE16,
373 0,
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 375#if defined(HAS_QUAD)
a7a3cfaa 376 /* q */ sizeof(Quad_t),
80a13697 377#else
a7a3cfaa 378 0,
80a13697 379#endif
a7a3cfaa
TH
380 0,
381 /* s */ SIZE16,
382 0, 0,
383 /* v */ SIZE16,
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
394 /* shrieking */
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
401 0, 0,
402 /* L */ sizeof(unsigned long),
403 0,
7212898e 404#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 405 /* N */ SIZE32,
7212898e 406#else
a7a3cfaa 407 0,
7212898e 408#endif
a7a3cfaa
TH
409 0, 0, 0, 0,
410 /* S */ sizeof(unsigned short),
411 0, 0,
7212898e 412#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 413 /* V */ SIZE32,
7212898e 414#else
a7a3cfaa 415 0,
7212898e 416#endif
a7a3cfaa
TH
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
418 0, 0,
419 /* i */ sizeof(int),
420 0, 0,
421 /* l */ sizeof(long),
422 0,
7212898e 423#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 424 /* n */ SIZE16,
7212898e 425#else
a7a3cfaa 426 0,
7212898e 427#endif
a7a3cfaa
TH
428 0, 0, 0, 0,
429 /* s */ sizeof(short),
430 0, 0,
7212898e 431#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 432 /* v */ SIZE16,
7212898e 433#else
a7a3cfaa 434 0,
7212898e 435#endif
a7a3cfaa
TH
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
445};
446#else
447/* EBCDIC (or bust) */
a7a3cfaa
TH
448const packprops_t packprops[512] = {
449 /* normal */
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
461 0,
462 /* f */ sizeof(float),
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0, 0, 0, 0, 0, 0,
466 /* j */ IVSIZE,
467 0,
468 /* l */ SIZE32,
469 0,
470 /* n */ SIZE16,
471 0,
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 473#if defined(HAS_QUAD)
a7a3cfaa 474 /* q */ sizeof(Quad_t),
80a13697 475#else
a7a3cfaa 476 0,
80a13697 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 /* s */ SIZE16,
480 0, 0,
481 /* v */ SIZE16,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 486#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 487 /* D */ LONG_DOUBLESIZE,
80a13697 488#else
a7a3cfaa 489 0,
80a13697 490#endif
a7a3cfaa
TH
491 0,
492 /* F */ NVSIZE,
493 0, 0,
494 /* I */ sizeof(unsigned int),
495 0, 0, 0, 0, 0, 0, 0,
496 /* J */ UVSIZE,
497 0,
498 /* L */ SIZE32,
499 0,
500 /* N */ SIZE32,
501 0, 0,
80a13697 502#if defined(HAS_QUAD)
a7a3cfaa 503 /* Q */ sizeof(Uquad_t),
80a13697 504#else
a7a3cfaa 505 0,
80a13697 506#endif
a7a3cfaa
TH
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 /* S */ SIZE16,
509 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
511 /* V */ SIZE32,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 /* shrieking */
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
528 0,
7212898e 529#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 530 /* n */ SIZE16,
7212898e 531#else
a7a3cfaa 532 0,
7212898e 533#endif
a7a3cfaa
TH
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
536 0, 0,
7212898e 537#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 538 /* v */ SIZE16,
7212898e 539#else
a7a3cfaa 540 0,
7212898e 541#endif
a7a3cfaa
TH
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
548 0,
7212898e 549#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 550 /* N */ SIZE32,
7212898e 551#else
a7a3cfaa 552 0,
7212898e 553#endif
a7a3cfaa
TH
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
556 0, 0,
7212898e 557#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 558 /* V */ SIZE32,
7212898e 559#else
a7a3cfaa 560 0,
7212898e 561#endif
a7a3cfaa
TH
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
564};
565#endif
566
08ca2aa3 567STATIC U8
f337b084 568uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
569{
570 UV val;
571 STRLEN retlen;
f337b084
TH
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 580 if (val >= 0x100) {
f337b084 581 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
f337b084 584 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
585 val &= 0xff;
586 }
587 *s += retlen;
588 return val;
589}
590
f337b084
TH
591#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
593 *(U8 *)(s)++)
594
595STATIC bool
f337b084 596uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
597{
598 UV val;
599 STRLEN retlen;
600 char *from = *s;
601 int bad = 0;
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
f337b084 606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
609 bad |= 1;
610 } else from += retlen;
611 if (val >= 0x100) {
612 bad |= 2;
613 val &= 0xff;
614 }
615 *(U8 *)buf++ = val;
616 }
617 /* We have enough characters for the buffer. Did we have problems ? */
618 if (bad) {
619 if (bad & 1) {
620 /* Rewalk the string fragment while warning */
621 char *ptr;
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
f337b084 625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
626 }
627 if (from > end) from = end;
628 }
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
fc241834 633 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
635 }
636 *s = from;
637 return TRUE;
638}
639
640STATIC bool
641next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
642{
643 UV val;
644 STRLEN retlen;
f337b084 645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
648 *out = 0;
649 return FALSE;
650 }
651 *out = PL_uudmap[val] & 077;
f337b084 652 *s += retlen;
08ca2aa3
TH
653 return TRUE;
654}
78d46eaa 655
f337b084
TH
656STATIC void
657bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
660 char *d = *dest;
661 while (start < end) {
662 int length =
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
664 switch(length) {
665 case 1:
666 *d++ = buffer[0];
667 break;
668 case 2:
669 *d++ = buffer[0];
670 *d++ = buffer[1];
671 break;
672 default:
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
674 *start, length);
675 }
676 start++;
677 }
678 *dest = d;
679}
680
681#define PUSH_BYTES(utf8, cur, buf, len) \
682STMT_START { \
683 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
684 else { \
685 Copy(buf, cur, len, char); \
686 (cur) += (len); \
687 } \
688} STMT_END
689
690#define GROWING(utf8, cat, start, cur, in_len) \
691STMT_START { \
692 STRLEN glen = (in_len); \
3473cf63 693 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
697 } \
698} STMT_END
699
700#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
701STMT_START { \
702 STRLEN glen = (in_len); \
703 STRLEN gl = glen; \
3473cf63 704 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
706 *cur = '\0'; \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
710 } \
711 PUSH_BYTES(utf8, cur, buf, glen); \
712} STMT_END
713
714#define PUSH_BYTE(utf8, s, byte) \
715STMT_START { \
716 if (utf8) { \
717 U8 au8 = (byte); \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
720} STMT_END
721
722/* Only to be used inside a loop (see the break) */
723#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
724STMT_START { \
725 STRLEN retlen; \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
729 *cur = '\0'; \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
731 } \
732 str += retlen; \
733} STMT_END
734
206947d2 735/* Returns the sizeof() struct described by pat */
028d1f6d 736STATIC I32
f337b084 737S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 738{
f337b084 739 I32 total = 0;
206947d2 740
49704364 741 while (next_symbol(symptr)) {
f337b084
TH
742 I32 len;
743 int star, size;
f337b084
TH
744
745 switch (symptr->howlen) {
fc241834 746 case e_star:
49704364
WL
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
749 break;
f337b084
TH
750 default:
751 /* e_no_len and e_number */
752 len = symptr->length;
753 break;
49704364
WL
754 }
755
a7a3cfaa 756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
757 if (!size) {
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
760 default:
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
764 case '@':
765 case '/':
766 case 'U': /* XXXX Is it correct? */
767 case 'w':
768 case 'u':
769 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
770 (int)symptr->code,
771 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
772 case '%':
773 size = 0;
774 break;
775 case '(':
fc241834
RGS
776 {
777 tempsym_t savsym = *symptr;
778 symptr->patptr = savsym.grpbeg;
779 symptr->patend = savsym.grpend;
780 /* XXXX Theoretically, we need to measure many times at
781 different positions, since the subexpression may contain
782 alignment commands, but be not of aligned length.
783 Need to detect this and croak(). */
784 size = measure_struct(symptr);
785 *symptr = savsym;
786 break;
787 }
80a13697
NC
788 case 'X' | TYPE_IS_SHRIEKING:
789 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
790 */
791 if (!len) /* Avoid division by 0 */
792 len = 1;
793 len = total % len; /* Assumed: the start is aligned. */
794 /* FALL THROUGH */
795 case 'X':
796 size = -1;
797 if (total < len)
798 Perl_croak(aTHX_ "'X' outside of string in %s",
799 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
800 break;
801 case 'x' | TYPE_IS_SHRIEKING:
802 if (!len) /* Avoid division by 0 */
803 len = 1;
804 star = total % len; /* Assumed: the start is aligned. */
805 if (star) /* Other portable ways? */
806 len = len - star;
807 else
808 len = 0;
809 /* FALL THROUGH */
810 case 'x':
811 case 'A':
812 case 'Z':
813 case 'a':
80a13697
NC
814 size = 1;
815 break;
816 case 'B':
817 case 'b':
818 len = (len + 7)/8;
819 size = 1;
820 break;
821 case 'H':
822 case 'h':
823 len = (len + 1)/2;
824 size = 1;
825 break;
78d46eaa 826
80a13697
NC
827 case 'P':
828 len = 1;
829 size = sizeof(char*);
78d46eaa
NC
830 break;
831 }
206947d2
IZ
832 }
833 total += len * size;
834 }
835 return total;
836}
837
49704364
WL
838
839/* locate matching closing parenthesis or bracket
840 * returns char pointer to char after match, or NULL
841 */
842STATIC char *
843S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 844{
49704364
WL
845 while (patptr < patend) {
846 char c = *patptr++;
847
848 if (isSPACE(c))
849 continue;
850 else if (c == ender)
851 return patptr-1;
852 else if (c == '#') {
853 while (patptr < patend && *patptr != '\n')
854 patptr++;
855 continue;
856 } else if (c == '(')
857 patptr = group_end(patptr, patend, ')') + 1;
858 else if (c == '[')
859 patptr = group_end(patptr, patend, ']') + 1;
18529408 860 }
49704364
WL
861 Perl_croak(aTHX_ "No group ending character '%c' found in template",
862 ender);
863 return 0;
18529408
IZ
864}
865
49704364
WL
866
867/* Convert unsigned decimal number to binary.
868 * Expects a pointer to the first digit and address of length variable
869 * Advances char pointer to 1st non-digit char and returns number
fc241834 870 */
18529408 871STATIC char *
49704364
WL
872S_get_num(pTHX_ register char *patptr, I32 *lenptr )
873{
874 I32 len = *patptr++ - '0';
875 while (isDIGIT(*patptr)) {
876 if (len >= 0x7FFFFFFF/10)
877 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
878 len = (len * 10) + (*patptr++ - '0');
879 }
880 *lenptr = len;
881 return patptr;
882}
883
884/* The marvellous template parsing routine: Using state stored in *symptr,
885 * locates next template code and count
886 */
887STATIC bool
f337b084 888S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 889{
fc241834
RGS
890 char* patptr = symptr->patptr;
891 char* patend = symptr->patend;
f337b084 892 const char *allowed = "";
49704364
WL
893
894 symptr->flags &= ~FLAG_SLASH;
895
896 while (patptr < patend) {
897 if (isSPACE(*patptr))
898 patptr++;
899 else if (*patptr == '#') {
900 patptr++;
901 while (patptr < patend && *patptr != '\n')
902 patptr++;
903 if (patptr < patend)
904 patptr++;
905 } else {
fc241834 906 /* We should have found a template code */
49704364 907 I32 code = *patptr++ & 0xFF;
66c611c5 908 U32 inherited_modifiers = 0;
49704364
WL
909
910 if (code == ','){ /* grandfather in commas but with a warning */
911 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
912 symptr->flags |= FLAG_COMMA;
913 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
914 "Invalid type ',' in %s",
915 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
916 }
917 continue;
918 }
fc241834 919
49704364 920 /* for '(', skip to ')' */
fc241834 921 if (code == '(') {
49704364
WL
922 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
923 Perl_croak(aTHX_ "()-group starts with a count in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 symptr->grpbeg = patptr;
926 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
927 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
928 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
930 }
931
66c611c5
MHM
932 /* look for group modifiers to inherit */
933 if (TYPE_ENDIANNESS(symptr->flags)) {
934 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
935 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
936 }
937
1109a392
MHM
938 /* look for modifiers */
939 while (patptr < patend) {
1109a392
MHM
940 I32 modifier = 0;
941 switch (*patptr) {
942 case '!':
943 modifier = TYPE_IS_SHRIEKING;
7212898e 944 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 945 break;
7212898e 946#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
947 case '>':
948 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 949 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
950 break;
951 case '<':
952 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 953 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 954 break;
f337b084 955#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
956 default:
957 break;
958 }
66c611c5 959
1109a392
MHM
960 if (modifier == 0)
961 break;
66c611c5 962
1109a392
MHM
963 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
964 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
965 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
966
967 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
968 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
969 (int) TYPE_NO_MODIFIERS(code),
970 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
971 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
972 TYPE_ENDIANNESS_MASK)
973 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
974 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
975
1109a392
MHM
976 if (ckWARN(WARN_UNPACK)) {
977 if (code & modifier)
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Duplicate modifier '%c' after '%c' in %s",
980 *patptr, (int) TYPE_NO_MODIFIERS(code),
981 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
982 }
66c611c5 983
1109a392
MHM
984 code |= modifier;
985 patptr++;
49704364
WL
986 }
987
66c611c5
MHM
988 /* inherit modifiers */
989 code |= inherited_modifiers;
990
fc241834 991 /* look for count and/or / */
49704364
WL
992 if (patptr < patend) {
993 if (isDIGIT(*patptr)) {
994 patptr = get_num( patptr, &symptr->length );
995 symptr->howlen = e_number;
996
997 } else if (*patptr == '*') {
998 patptr++;
999 symptr->howlen = e_star;
1000
1001 } else if (*patptr == '[') {
fc241834 1002 char* lenptr = ++patptr;
49704364
WL
1003 symptr->howlen = e_number;
1004 patptr = group_end( patptr, patend, ']' ) + 1;
1005 /* what kind of [] is it? */
1006 if (isDIGIT(*lenptr)) {
1007 lenptr = get_num( lenptr, &symptr->length );
1008 if( *lenptr != ']' )
1009 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1010 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1011 } else {
1012 tempsym_t savsym = *symptr;
1013 symptr->patend = patptr-1;
1014 symptr->patptr = lenptr;
1015 savsym.length = measure_struct(symptr);
1016 *symptr = savsym;
1017 }
1018 } else {
1019 symptr->howlen = e_no_len;
1020 symptr->length = 1;
1021 }
1022
1023 /* try to find / */
1024 while (patptr < patend) {
1025 if (isSPACE(*patptr))
1026 patptr++;
1027 else if (*patptr == '#') {
1028 patptr++;
1029 while (patptr < patend && *patptr != '\n')
1030 patptr++;
1031 if (patptr < patend)
1032 patptr++;
1033 } else {
66c611c5 1034 if (*patptr == '/') {
49704364
WL
1035 symptr->flags |= FLAG_SLASH;
1036 patptr++;
66c611c5
MHM
1037 if (patptr < patend &&
1038 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
WL
1039 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1040 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1041 }
1042 break;
1043 }
18529408 1044 }
49704364
WL
1045 } else {
1046 /* at end - no count, no / */
1047 symptr->howlen = e_no_len;
1048 symptr->length = 1;
1049 }
1050
1051 symptr->code = code;
fc241834 1052 symptr->patptr = patptr;
49704364 1053 return TRUE;
18529408 1054 }
49704364 1055 }
fc241834 1056 symptr->patptr = patptr;
49704364 1057 return FALSE;
18529408
IZ
1058}
1059
18529408 1060/*
fc241834 1061 There is no way to cleanly handle the case where we should process the
08ca2aa3 1062 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1063 (e.g. estimates like strend-s as an upper bound for the number of
1064 characters left wouldn't work). So if we foresee the need of this
1065 (pattern starts with U or contains U0), we want to work on the encoded
1066 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1067 themselves if they need to do a lot of unpacks like this on it
1068*/
fc241834 1069STATIC bool
08ca2aa3
TH
1070need_utf8(const char *pat, const char *patend)
1071{
1072 bool first = TRUE;
1073 while (pat < patend) {
1074 if (pat[0] == '#') {
1075 pat++;
1076 pat = memchr(pat, '\n', patend-pat);
1077 if (!pat) return FALSE;
1078 } else if (pat[0] == 'U') {
1079 if (first || pat[1] == '0') return TRUE;
1080 } else first = FALSE;
1081 pat++;
1082 }
1083 return FALSE;
1084}
1085
1086STATIC char
1087first_symbol(const char *pat, const char *patend) {
1088 while (pat < patend) {
1089 if (pat[0] != '#') return pat[0];
1090 pat++;
1091 pat = memchr(pat, '\n', patend-pat);
1092 if (!pat) return 0;
1093 pat++;
1094 }
1095 return 0;
1096}
1097
1098/*
18529408
IZ
1099=for apidoc unpack_str
1100
7accc089
JH
1101The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1102and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1103
1104=cut */
1105
1106I32
f337b084 1107Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1108{
49704364 1109 tempsym_t sym = { 0 };
08ca2aa3 1110
f337b084 1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
f337b084 1116 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1117 SAVEFREEPV(s);
1118 strend = s + len;
f337b084 1119 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1120 }
1121
f337b084
TH
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1124
49704364
WL
1125 sym.patptr = pat;
1126 sym.patend = patend;
1127 sym.flags = flags;
1128
1129 return unpack_rec(&sym, s, s, strend, NULL );
1130}
1131
7accc089
JH
1132/*
1133=for apidoc unpackstring
1134
608d3aed
WL
1135The engine implementing unpack() Perl function. C<unpackstring> puts the
1136extracted list items on the stack and returns the number of elements.
1137Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1138
1139=cut */
1140
1141I32
f337b084 1142Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1143{
1144 tempsym_t sym = { 0 };
08ca2aa3 1145
f337b084 1146 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1147 else if (need_utf8(pat, patend)) {
1148 /* We probably should try to avoid this in case a scalar context call
1149 wouldn't get to the "U0" */
1150 STRLEN len = strend - s;
f337b084 1151 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1152 SAVEFREEPV(s);
1153 strend = s + len;
f337b084 1154 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1155 }
1156
f337b084
TH
1157 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1158 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1159
7accc089
JH
1160 sym.patptr = pat;
1161 sym.patend = patend;
1162 sym.flags = flags;
1163
1164 return unpack_rec(&sym, s, s, strend, NULL );
1165}
1166
49704364
WL
1167STATIC
1168I32
08ca2aa3 1169S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1170{
a6ec74c1 1171 dSP;
18529408
IZ
1172 SV *sv;
1173 I32 start_sp_offset = SP - PL_stack_base;
49704364 1174 howlen_t howlen;
a6ec74c1 1175
a6ec74c1 1176 I32 checksum = 0;
92d41999 1177 UV cuv = 0;
a6ec74c1 1178 NV cdouble = 0.0;
f337b084 1179 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364
WL
1180 char* strrelbeg = s;
1181 bool beyond = FALSE;
21c16052 1182 bool explicit_length;
49704364 1183 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1184 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
49704364 1185
49704364 1186 while (next_symbol(symptr)) {
a7a3cfaa 1187 packprops_t props;
f337b084
TH
1188 I32 len, ai32;
1189 I32 datumtype = symptr->code;
206947d2 1190 /* do first one only unless in list context
08ca2aa3 1191 / is implemented by unpacking the count, then popping it from the
206947d2 1192 stack, so must check that we're not in the middle of a / */
49704364 1193 if ( unpack_only_one
206947d2 1194 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1195 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1196 break;
49704364 1197
f337b084 1198 switch (howlen = symptr->howlen) {
fc241834
RGS
1199 case e_star:
1200 len = strend - strbeg; /* long enough */
49704364 1201 break;
f337b084
TH
1202 default:
1203 /* e_no_len and e_number */
1204 len = symptr->length;
1205 break;
49704364 1206 }
18529408 1207
21c16052 1208 explicit_length = TRUE;
a6ec74c1 1209 redo_switch:
49704364 1210 beyond = s >= strend;
a7a3cfaa
TH
1211
1212 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1213 if (props) {
1214 /* props nonzero means we can process this letter. */
1215 long size = props & PACK_SIZE_MASK;
1216 long howmany = (strend - s) / size;
1217 if (len > howmany)
1218 len = howmany;
1219
1220 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1221 if (len && unpack_only_one) len = 1;
1222 EXTEND(SP, len);
1223 EXTEND_MORTAL(len);
78d46eaa
NC
1224 }
1225 }
a7a3cfaa 1226
1109a392 1227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1228 default:
1109a392 1229 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1230
a6ec74c1 1231 case '%':
49704364 1232 if (howlen == e_no_len)
18529408 1233 len = 16; /* len is not specified */
a6ec74c1 1234 checksum = len;
92d41999 1235 cuv = 0;
a6ec74c1 1236 cdouble = 0;
18529408 1237 continue;
a6ec74c1 1238 break;
18529408
IZ
1239 case '(':
1240 {
49704364 1241 tempsym_t savsym = *symptr;
66c611c5
MHM
1242 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1243 symptr->flags |= group_modifiers;
49704364
WL
1244 symptr->patend = savsym.grpend;
1245 symptr->level++;
18529408
IZ
1246 PUTBACK;
1247 while (len--) {
49704364 1248 symptr->patptr = savsym.grpbeg;
f337b084
TH
1249 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1250 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1251 unpack_rec(symptr, s, strbeg, strend, &s);
1252 if (s == strend && savsym.howlen == e_star)
49704364 1253 break; /* No way to continue */
18529408
IZ
1254 }
1255 SPAGAIN;
66c611c5 1256 symptr->flags &= ~group_modifiers;
49704364
WL
1257 savsym.flags = symptr->flags;
1258 *symptr = savsym;
18529408
IZ
1259 break;
1260 }
a6ec74c1 1261 case '@':
08ca2aa3
TH
1262 if (utf8) {
1263 s = strrelbeg;
1264 while (len > 0) {
1265 if (s >= strend)
1266 Perl_croak(aTHX_ "'@' outside of string in unpack");
1267 s += UTF8SKIP(s);
1268 len--;
1269 }
1270 if (s > strend)
1271 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1272 } else {
fc241834
RGS
1273 if (len > strend - strrelbeg)
1274 Perl_croak(aTHX_ "'@' outside of string in unpack");
1275 s = strrelbeg + len;
08ca2aa3 1276 }
a6ec74c1 1277 break;
62f95557
IZ
1278 case 'X' | TYPE_IS_SHRIEKING:
1279 if (!len) /* Avoid division by 0 */
1280 len = 1;
08ca2aa3
TH
1281 if (utf8) {
1282 char *hop, *last;
f337b084
TH
1283 I32 l = len;
1284 hop = last = strbeg;
1285 while (hop < s) {
1286 hop += UTF8SKIP(hop);
1287 if (--l == 0) {
08ca2aa3 1288 last = hop;
f337b084
TH
1289 l = len;
1290 }
fc241834 1291 }
f337b084
TH
1292 if (last > s)
1293 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1294 s = last;
1295 break;
f337b084
TH
1296 }
1297 len = (s - strbeg) % len;
62f95557 1298 /* FALL THROUGH */
a6ec74c1 1299 case 'X':
08ca2aa3
TH
1300 if (utf8) {
1301 while (len > 0) {
1302 if (s <= strbeg)
1303 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1304 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1305 if (s <= strbeg)
1306 Perl_croak(aTHX_ "'X' outside of string in unpack");
1307 }
1308 len--;
1309 }
1310 } else {
fc241834
RGS
1311 if (len > s - strbeg)
1312 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1313 s -= len;
08ca2aa3 1314 }
a6ec74c1 1315 break;
62f95557
IZ
1316 case 'x' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1318 len = 1;
f337b084
TH
1319 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1320 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1321 if (ai32 == 0) break;
1322 len -= ai32;
62f95557 1323 /* FALL THROUGH */
a6ec74c1 1324 case 'x':
08ca2aa3
TH
1325 if (utf8) {
1326 while (len>0) {
1327 if (s >= strend)
1328 Perl_croak(aTHX_ "'x' outside of string in unpack");
1329 s += UTF8SKIP(s);
1330 len--;
1331 }
1332 } else {
fc241834
RGS
1333 if (len > strend - s)
1334 Perl_croak(aTHX_ "'x' outside of string in unpack");
1335 s += len;
f337b084 1336 }
a6ec74c1
JH
1337 break;
1338 case '/':
49704364
WL
1339 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1340 break;
a6ec74c1
JH
1341 case 'A':
1342 case 'Z':
1343 case 'a':
08ca2aa3
TH
1344 if (checksum) {
1345 /* Preliminary length estimate is assumed done in 'W' */
1346 if (len > strend - s) len = strend - s;
1347 goto W_checksum;
1348 }
1349 if (utf8) {
1350 I32 l;
1351 char *hop;
1352 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1353 if (hop >= strend) {
1354 if (hop > strend)
1355 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1356 break;
fc241834 1357 }
a6ec74c1 1358 }
08ca2aa3
TH
1359 if (hop > strend)
1360 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1361 len = hop - s;
1362 } else if (len > strend - s)
1363 len = strend - s;
1364
1365 if (datumtype == 'Z') {
1366 /* 'Z' strips stuff after first null */
f337b084
TH
1367 char *ptr, *end;
1368 end = s + len;
1369 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1370 sv = newSVpvn(s, ptr-s);
1371 if (howlen == e_star) /* exact for 'Z*' */
1372 len = ptr-s + (ptr != strend ? 1 : 0);
1373 } else if (datumtype == 'A') {
1374 /* 'A' strips both nulls and spaces */
1375 char *ptr;
1376 for (ptr = s+len-1; ptr >= s; ptr--)
1377 if (*ptr != 0 && !isSPACE(*ptr)) break;
1378 ptr++;
1379 sv = newSVpvn(s, ptr-s);
1380 } else sv = newSVpvn(s, len);
1381
1382 if (utf8) {
1383 SvUTF8_on(sv);
1384 /* Undo any upgrade done due to need_utf8() */
f337b084 1385 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1386 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1387 }
1388 XPUSHs(sv_2mortal(sv));
08ca2aa3 1389 s += len;
a6ec74c1
JH
1390 break;
1391 case 'B':
08ca2aa3
TH
1392 case 'b': {
1393 char *str;
49704364 1394 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1395 len = (strend - s) * 8;
1396 if (checksum) {
1397 if (!PL_bitcount) {
08ca2aa3 1398 int bits;
a6ec74c1
JH
1399 Newz(601, PL_bitcount, 256, char);
1400 for (bits = 1; bits < 256; bits++) {
1401 if (bits & 1) PL_bitcount[bits]++;
1402 if (bits & 2) PL_bitcount[bits]++;
1403 if (bits & 4) PL_bitcount[bits]++;
1404 if (bits & 8) PL_bitcount[bits]++;
1405 if (bits & 16) PL_bitcount[bits]++;
1406 if (bits & 32) PL_bitcount[bits]++;
1407 if (bits & 64) PL_bitcount[bits]++;
1408 if (bits & 128) PL_bitcount[bits]++;
1409 }
1410 }
f337b084 1411 if (utf8)
08ca2aa3 1412 while (len >= 8 && s < strend) {
f337b084 1413 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1414 len -= 8;
1415 }
f337b084 1416 else
fc241834 1417 while (len >= 8) {
08ca2aa3 1418 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1419 len -= 8;
1420 }
08ca2aa3
TH
1421 if (len && s < strend) {
1422 U8 bits;
f337b084
TH
1423 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1424 if (datumtype == 'b')
a6ec74c1 1425 while (len-- > 0) {
92d41999 1426 if (bits & 1) cuv++;
a6ec74c1
JH
1427 bits >>= 1;
1428 }
f337b084 1429 else
a6ec74c1 1430 while (len-- > 0) {
08ca2aa3 1431 if (bits & 0x80) cuv++;
a6ec74c1
JH
1432 bits <<= 1;
1433 }
fc241834 1434 }
a6ec74c1
JH
1435 break;
1436 }
08ca2aa3
TH
1437
1438 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1439 SvPOK_on(sv);
1440 str = SvPVX(sv);
1441 if (datumtype == 'b') {
f337b084 1442 U8 bits = 0;
08ca2aa3
TH
1443 ai32 = len;
1444 for (len = 0; len < ai32; len++) {
1445 if (len & 7) bits >>= 1;
1446 else if (utf8) {
1447 if (s >= strend) break;
f337b084 1448 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1449 } else bits = *(U8 *) s++;
1450 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1451 }
08ca2aa3 1452 } else {
f337b084 1453 U8 bits = 0;
08ca2aa3
TH
1454 ai32 = len;
1455 for (len = 0; len < ai32; len++) {
1456 if (len & 7) bits <<= 1;
1457 else if (utf8) {
1458 if (s >= strend) break;
f337b084 1459 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1460 } else bits = *(U8 *) s++;
1461 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1462 }
1463 }
1464 *str = '\0';
08ca2aa3
TH
1465 SvCUR_set(sv, str - SvPVX(sv));
1466 XPUSHs(sv);
a6ec74c1 1467 break;
08ca2aa3 1468 }
a6ec74c1 1469 case 'H':
08ca2aa3
TH
1470 case 'h': {
1471 char *str;
fc241834 1472 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1473 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1474 len = (strend - s) * 2;
fc241834 1475 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1476 SvPOK_on(sv);
1477 str = SvPVX(sv);
1478 if (datumtype == 'h') {
f337b084 1479 U8 bits = 0;
fc241834
RGS
1480 ai32 = len;
1481 for (len = 0; len < ai32; len++) {
1482 if (len & 1) bits >>= 4;
1483 else if (utf8) {
1484 if (s >= strend) break;
f337b084 1485 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1486 } else bits = * (U8 *) s++;
a6ec74c1
JH
1487 *str++ = PL_hexdigit[bits & 15];
1488 }
08ca2aa3 1489 } else {
f337b084 1490 U8 bits = 0;
08ca2aa3
TH
1491 ai32 = len;
1492 for (len = 0; len < ai32; len++) {
1493 if (len & 1) bits <<= 4;
1494 else if (utf8) {
1495 if (s >= strend) break;
f337b084 1496 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1497 } else bits = *(U8 *) s++;
a6ec74c1
JH
1498 *str++ = PL_hexdigit[(bits >> 4) & 15];
1499 }
1500 }
1501 *str = '\0';
08ca2aa3
TH
1502 SvCUR_set(sv, str - SvPVX(sv));
1503 XPUSHs(sv);
a6ec74c1 1504 break;
08ca2aa3 1505 }
a6ec74c1 1506 case 'c':
73cb7263 1507 while (len-- > 0) {
f337b084 1508 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1509 if (aint >= 128) /* fake up signed chars */
1510 aint -= 256;
08ca2aa3 1511 if (!checksum)
ac7f3b1b 1512 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV)aint;
1515 else
1516 cuv += aint;
a6ec74c1
JH
1517 }
1518 break;
1519 case 'C':
08ca2aa3
TH
1520 case 'W':
1521 W_checksum:
35bcd338 1522 if (len == 0) {
fc241834 1523 if (explicit_length && datumtype == 'C')
08ca2aa3 1524 /* Switch to "character" mode */
f337b084 1525 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1526 break;
1527 }
fc241834 1528 if (datumtype == 'C' ?
f337b084
TH
1529 (symptr->flags & FLAG_DO_UTF8) &&
1530 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1531 while (len-- > 0 && s < strend) {
1532 UV val;
1533 STRLEN retlen;
f337b084
TH
1534 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1535 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1536 if (retlen == (STRLEN) -1 || retlen == 0)
1537 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1538 s += retlen;
1539 if (!checksum)
1540 PUSHs(sv_2mortal(newSVuv((UV) val)));
1541 else if (checksum > bits_in_uv)
1542 cdouble += (NV) val;
d6d3e8bd 1543 else
08ca2aa3 1544 cuv += val;
fc241834 1545 }
08ca2aa3 1546 } else if (!checksum)
a6ec74c1 1547 while (len-- > 0) {
08ca2aa3
TH
1548 U8 ch = *(U8 *) s++;
1549 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1550 }
08ca2aa3
TH
1551 else if (checksum > bits_in_uv)
1552 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1553 else
1554 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1555 break;
1556 case 'U':
35bcd338 1557 if (len == 0) {
08ca2aa3
TH
1558 if (explicit_length) {
1559 /* Switch to "bytes in UTF-8" mode */
f337b084 1560 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1561 else
1562 /* Should be impossible due to the need_utf8() test */
1563 Perl_croak(aTHX_ "U0 mode on a byte string");
1564 }
35bcd338
JH
1565 break;
1566 }
08ca2aa3 1567 if (len > strend - s) len = strend - s;
fc241834 1568 if (!checksum) {
08ca2aa3
TH
1569 if (len && unpack_only_one) len = 1;
1570 EXTEND(SP, len);
1571 EXTEND_MORTAL(len);
fc241834 1572 }
08ca2aa3
TH
1573 while (len-- > 0 && s < strend) {
1574 STRLEN retlen;
1575 UV auv;
1576 if (utf8) {
1577 U8 result[UTF8_MAXLEN];
1578 char *ptr;
1579 STRLEN len;
1580 ptr = s;
1581 /* Bug: warns about bad utf8 even if we are short on bytes
1582 and will break out of the loop */
f337b084 1583 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
08ca2aa3
TH
1584 break;
1585 len = UTF8SKIP(result);
fc241834 1586 if (!uni_to_bytes(aTHX_ &ptr, strend,
f337b084 1587 &result[1], len-1, 'U')) break;
08ca2aa3
TH
1588 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1589 s = ptr;
1590 } else {
1591 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1592 if (retlen == (STRLEN) -1 || retlen == 0)
1593 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1594 s += retlen;
1595 }
1596 if (!checksum)
1597 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1598 else if (checksum > bits_in_uv)
08ca2aa3 1599 cdouble += (NV) auv;
73cb7263 1600 else
08ca2aa3 1601 cuv += auv;
a6ec74c1
JH
1602 }
1603 break;
49704364
WL
1604 case 's' | TYPE_IS_SHRIEKING:
1605#if SHORTSIZE != SIZE16
73cb7263 1606 while (len-- > 0) {
08ca2aa3 1607 short ashort;
f337b084
TH
1608 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1609 DO_BO_UNPACK(ashort, s);
08ca2aa3 1610 if (!checksum)
ac7f3b1b 1611 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1612 else if (checksum > bits_in_uv)
1613 cdouble += (NV)ashort;
1614 else
1615 cuv += ashort;
49704364
WL
1616 }
1617 break;
1618#else
1619 /* Fallthrough! */
a6ec74c1 1620#endif
49704364 1621 case 's':
73cb7263 1622 while (len-- > 0) {
08ca2aa3
TH
1623 I16 ai16;
1624
1625#if U16SIZE > SIZE16
1626 ai16 = 0;
1627#endif
f337b084 1628 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1629 DO_BO_UNPACK(ai16, 16);
1109a392 1630#if U16SIZE > SIZE16
73cb7263
NC
1631 if (ai16 > 32767)
1632 ai16 -= 65536;
a6ec74c1 1633#endif
08ca2aa3 1634 if (!checksum)
ac7f3b1b 1635 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV)ai16;
1638 else
1639 cuv += ai16;
a6ec74c1
JH
1640 }
1641 break;
49704364
WL
1642 case 'S' | TYPE_IS_SHRIEKING:
1643#if SHORTSIZE != SIZE16
73cb7263 1644 while (len-- > 0) {
08ca2aa3 1645 unsigned short aushort;
f337b084
TH
1646 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1647 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1648 if (!checksum)
1649 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aushort;
1652 else
1653 cuv += aushort;
49704364
WL
1654 }
1655 break;
1656#else
1657 /* Fallhrough! */
1658#endif
a6ec74c1
JH
1659 case 'v':
1660 case 'n':
1661 case 'S':
73cb7263 1662 while (len-- > 0) {
08ca2aa3
TH
1663 U16 au16;
1664#if U16SIZE > SIZE16
1665 au16 = 0;
1666#endif
f337b084 1667 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1668 DO_BO_UNPACK(au16, 16);
a6ec74c1 1669#ifdef HAS_NTOHS
73cb7263
NC
1670 if (datumtype == 'n')
1671 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1672#endif
1673#ifdef HAS_VTOHS
73cb7263
NC
1674 if (datumtype == 'v')
1675 au16 = vtohs(au16);
a6ec74c1 1676#endif
08ca2aa3
TH
1677 if (!checksum)
1678 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1679 else if (checksum > bits_in_uv)
f337b084 1680 cdouble += (NV) au16;
73cb7263
NC
1681 else
1682 cuv += au16;
a6ec74c1
JH
1683 }
1684 break;
7212898e 1685#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1686 case 'v' | TYPE_IS_SHRIEKING:
1687 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1688 while (len-- > 0) {
08ca2aa3
TH
1689 I16 ai16;
1690# if U16SIZE > SIZE16
1691 ai16 = 0;
1692# endif
f337b084 1693 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1694# ifdef HAS_NTOHS
73cb7263 1695 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1696 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1697# endif /* HAS_NTOHS */
1698# ifdef HAS_VTOHS
73cb7263 1699 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1700 ai16 = (I16) vtohs((U16) ai16);
1701# endif /* HAS_VTOHS */
1702 if (!checksum)
ac7f3b1b 1703 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1704 else if (checksum > bits_in_uv)
08ca2aa3 1705 cdouble += (NV) ai16;
73cb7263
NC
1706 else
1707 cuv += ai16;
068bd2e7
MHM
1708 }
1709 break;
08ca2aa3 1710#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1711 case 'i':
49704364 1712 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1713 while (len-- > 0) {
08ca2aa3 1714 int aint;
f337b084
TH
1715 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1716 DO_BO_UNPACK(aint, i);
08ca2aa3 1717 if (!checksum)
ac7f3b1b 1718 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1719 else if (checksum > bits_in_uv)
1720 cdouble += (NV)aint;
1721 else
1722 cuv += aint;
a6ec74c1
JH
1723 }
1724 break;
1725 case 'I':
49704364 1726 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1727 while (len-- > 0) {
08ca2aa3 1728 unsigned int auint;
f337b084
TH
1729 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1730 DO_BO_UNPACK(auint, i);
08ca2aa3 1731 if (!checksum)
ac7f3b1b 1732 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1733 else if (checksum > bits_in_uv)
1734 cdouble += (NV)auint;
1735 else
1736 cuv += auint;
a6ec74c1
JH
1737 }
1738 break;
92d41999 1739 case 'j':
73cb7263 1740 while (len-- > 0) {
08ca2aa3 1741 IV aiv;
f337b084 1742 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1743#if IVSIZE == INTSIZE
f337b084 1744 DO_BO_UNPACK(aiv, i);
1109a392 1745#elif IVSIZE == LONGSIZE
f337b084 1746 DO_BO_UNPACK(aiv, l);
1109a392 1747#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1748 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1749#else
1750 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1751#endif
08ca2aa3 1752 if (!checksum)
ac7f3b1b 1753 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1754 else if (checksum > bits_in_uv)
1755 cdouble += (NV)aiv;
1756 else
1757 cuv += aiv;
92d41999
JH
1758 }
1759 break;
1760 case 'J':
73cb7263 1761 while (len-- > 0) {
08ca2aa3 1762 UV auv;
f337b084 1763 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1764#if IVSIZE == INTSIZE
f337b084 1765 DO_BO_UNPACK(auv, i);
08ca2aa3 1766#elif IVSIZE == LONGSIZE
f337b084 1767 DO_BO_UNPACK(auv, l);
08ca2aa3 1768#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1769 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1770#else
1771 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1772#endif
08ca2aa3 1773 if (!checksum)
ac7f3b1b 1774 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1775 else if (checksum > bits_in_uv)
1776 cdouble += (NV)auv;
1777 else
1778 cuv += auv;
92d41999
JH
1779 }
1780 break;
49704364
WL
1781 case 'l' | TYPE_IS_SHRIEKING:
1782#if LONGSIZE != SIZE32
73cb7263 1783 while (len-- > 0) {
08ca2aa3 1784 long along;
f337b084
TH
1785 SHIFT_VAR(utf8, s, strend, along, datumtype);
1786 DO_BO_UNPACK(along, l);
08ca2aa3 1787 if (!checksum)
ac7f3b1b 1788 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1789 else if (checksum > bits_in_uv)
1790 cdouble += (NV)along;
1791 else
1792 cuv += along;
49704364
WL
1793 }
1794 break;
1795#else
1796 /* Fallthrough! */
a6ec74c1 1797#endif
49704364 1798 case 'l':
73cb7263 1799 while (len-- > 0) {
08ca2aa3
TH
1800 I32 ai32;
1801#if U32SIZE > SIZE32
1802 ai32 = 0;
1803#endif
f337b084 1804 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1805 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1806#if U32SIZE > SIZE32
08ca2aa3 1807 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1808#endif
08ca2aa3 1809 if (!checksum)
ac7f3b1b 1810 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)ai32;
1813 else
1814 cuv += ai32;
a6ec74c1
JH
1815 }
1816 break;
49704364
WL
1817 case 'L' | TYPE_IS_SHRIEKING:
1818#if LONGSIZE != SIZE32
73cb7263 1819 while (len-- > 0) {
08ca2aa3 1820 unsigned long aulong;
f337b084
TH
1821 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1822 DO_BO_UNPACK(aulong, l);
08ca2aa3 1823 if (!checksum)
ac7f3b1b 1824 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1825 else if (checksum > bits_in_uv)
1826 cdouble += (NV)aulong;
1827 else
1828 cuv += aulong;
49704364
WL
1829 }
1830 break;
1831#else
1832 /* Fall through! */
1833#endif
a6ec74c1
JH
1834 case 'V':
1835 case 'N':
1836 case 'L':
73cb7263 1837 while (len-- > 0) {
08ca2aa3
TH
1838 U32 au32;
1839#if U32SIZE > SIZE32
1840 au32 = 0;
1841#endif
f337b084 1842 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1843 DO_BO_UNPACK(au32, 32);
a6ec74c1 1844#ifdef HAS_NTOHL
73cb7263
NC
1845 if (datumtype == 'N')
1846 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1847#endif
1848#ifdef HAS_VTOHL
73cb7263
NC
1849 if (datumtype == 'V')
1850 au32 = vtohl(au32);
a6ec74c1 1851#endif
08ca2aa3 1852 if (!checksum)
fc241834
RGS
1853 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1854 else if (checksum > bits_in_uv)
1855 cdouble += (NV)au32;
1856 else
1857 cuv += au32;
a6ec74c1
JH
1858 }
1859 break;
7212898e 1860#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1861 case 'V' | TYPE_IS_SHRIEKING:
1862 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1863 while (len-- > 0) {
08ca2aa3
TH
1864 I32 ai32;
1865# if U32SIZE > SIZE32
1866 ai32 = 0;
1867# endif
f337b084 1868 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1869# ifdef HAS_NTOHL
73cb7263
NC
1870 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1871 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1872# endif
1873# ifdef HAS_VTOHL
73cb7263
NC
1874 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1875 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1876# endif
1877 if (!checksum)
ac7f3b1b 1878 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1879 else if (checksum > bits_in_uv)
1880 cdouble += (NV)ai32;
1881 else
1882 cuv += ai32;
068bd2e7
MHM
1883 }
1884 break;
08ca2aa3 1885#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1886 case 'p':
a6ec74c1 1887 while (len-- > 0) {
08ca2aa3 1888 char *aptr;
f337b084 1889 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
5512a2f9 1890 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1891 /* newSVpv generates undef if aptr is NULL */
1892 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1893 }
1894 break;
1895 case 'w':
a6ec74c1
JH
1896 {
1897 UV auv = 0;
1898 U32 bytes = 0;
fc241834 1899
08ca2aa3
TH
1900 while (len > 0 && s < strend) {
1901 U8 ch;
f337b084 1902 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1903 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1904 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1905 if (ch < 0x80) {
a6ec74c1 1906 bytes = 0;
ac7f3b1b 1907 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1908 len--;
1909 auv = 0;
08ca2aa3 1910 continue;
a6ec74c1 1911 }
08ca2aa3 1912 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1913 char *t;
1914 STRLEN n_a;
1915
1916 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1917 while (s < strend) {
f337b084 1918 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1919 sv = mul128(sv, (U8)(ch & 0x7f));
1920 if (!(ch & 0x80)) {
a6ec74c1
JH
1921 bytes = 0;
1922 break;
1923 }
1924 }
1925 t = SvPV(sv, n_a);
1926 while (*t == '0')
1927 t++;
1928 sv_chop(sv, t);
1929 PUSHs(sv_2mortal(sv));
1930 len--;
1931 auv = 0;
1932 }
1933 }
1934 if ((s >= strend) && bytes)
49704364 1935 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1936 }
1937 break;
1938 case 'P':
49704364
WL
1939 if (symptr->howlen == e_star)
1940 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1941 EXTEND(SP, 1);
08ca2aa3
TH
1942 if (sizeof(char*) <= strend - s) {
1943 char *aptr;
f337b084 1944 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
08ca2aa3 1945 DO_BO_UNPACK_P(aptr);
fc241834
RGS
1946 /* newSVpvn generates undef if aptr is NULL */
1947 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1948 }
a6ec74c1
JH
1949 break;
1950#ifdef HAS_QUAD
1951 case 'q':
73cb7263 1952 while (len-- > 0) {
08ca2aa3 1953 Quad_t aquad;
f337b084
TH
1954 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1955 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
1956 if (!checksum)
1957 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 1958 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)aquad;
1961 else
1962 cuv += aquad;
1963 }
a6ec74c1
JH
1964 break;
1965 case 'Q':
73cb7263 1966 while (len-- > 0) {
08ca2aa3 1967 Uquad_t auquad;
f337b084
TH
1968 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1969 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
1970 if (!checksum)
1971 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1972 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
1973 else if (checksum > bits_in_uv)
1974 cdouble += (NV)auquad;
1975 else
1976 cuv += auquad;
a6ec74c1
JH
1977 }
1978 break;
08ca2aa3 1979#endif /* HAS_QUAD */
a6ec74c1
JH
1980 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1981 case 'f':
73cb7263 1982 while (len-- > 0) {
08ca2aa3 1983 float afloat;
f337b084 1984 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1985 DO_BO_UNPACK_N(afloat, float);
1986 if (!checksum)
1987 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1988 else
73cb7263 1989 cdouble += afloat;
fc241834 1990 }
a6ec74c1
JH
1991 break;
1992 case 'd':
73cb7263 1993 while (len-- > 0) {
08ca2aa3 1994 double adouble;
f337b084 1995 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1996 DO_BO_UNPACK_N(adouble, double);
1997 if (!checksum)
1998 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
1999 else
73cb7263 2000 cdouble += adouble;
fc241834 2001 }
a6ec74c1 2002 break;
92d41999 2003 case 'F':
73cb7263 2004 while (len-- > 0) {
08ca2aa3 2005 NV anv;
f337b084 2006 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2007 DO_BO_UNPACK_N(anv, NV);
2008 if (!checksum)
2009 PUSHs(sv_2mortal(newSVnv(anv)));
2010 else
73cb7263 2011 cdouble += anv;
fc241834 2012 }
92d41999
JH
2013 break;
2014#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2015 case 'D':
73cb7263 2016 while (len-- > 0) {
08ca2aa3 2017 long double aldouble;
f337b084 2018 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2019 DO_BO_UNPACK_N(aldouble, long double);
2020 if (!checksum)
2021 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2022 else
2023 cdouble += aldouble;
92d41999
JH
2024 }
2025 break;
2026#endif
a6ec74c1
JH
2027 case 'u':
2028 /* MKS:
2029 * Initialise the decode mapping. By using a table driven
2030 * algorithm, the code will be character-set independent
2031 * (and just as fast as doing character arithmetic)
2032 */
2033 if (PL_uudmap['M'] == 0) {
2034 int i;
2035
2036 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2037 PL_uudmap[(U8)PL_uuemap[i]] = i;
2038 /*
2039 * Because ' ' and '`' map to the same value,
2040 * we need to decode them both the same.
2041 */
2042 PL_uudmap[' '] = 0;
2043 }
08ca2aa3
TH
2044 {
2045 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2046 sv = sv_2mortal(NEWSV(42, l));
2047 if (l) SvPOK_on(sv);
2048 }
2049 if (utf8) {
2050 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2051 I32 a, b, c, d;
2052 char hunk[4];
2053
2054 hunk[3] = '\0';
2055 while (len > 0) {
2056 next_uni_uu(aTHX_ &s, strend, &a);
2057 next_uni_uu(aTHX_ &s, strend, &b);
2058 next_uni_uu(aTHX_ &s, strend, &c);
2059 next_uni_uu(aTHX_ &s, strend, &d);
2060 hunk[0] = (char)((a << 2) | (b >> 4));
2061 hunk[1] = (char)((b << 4) | (c >> 2));
2062 hunk[2] = (char)((c << 6) | d);
2063 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2064 len -= 3;
2065 }
2066 if (s < strend) {
2067 if (*s == '\n') s++;
2068 else {
2069 /* possible checksum byte */
2070 char *skip = s+UTF8SKIP(s);
2071 if (skip < strend && *skip == '\n') s = skip+1;
2072 }
2073 }
2074 }
2075 } else {
fc241834
RGS
2076 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2077 I32 a, b, c, d;
2078 char hunk[4];
a6ec74c1 2079
fc241834
RGS
2080 hunk[3] = '\0';
2081 len = PL_uudmap[*(U8*)s++] & 077;
2082 while (len > 0) {
2083 if (s < strend && ISUUCHAR(*s))
2084 a = PL_uudmap[*(U8*)s++] & 077;
2085 else
2086 a = 0;
2087 if (s < strend && ISUUCHAR(*s))
2088 b = PL_uudmap[*(U8*)s++] & 077;
2089 else
2090 b = 0;
2091 if (s < strend && ISUUCHAR(*s))
2092 c = PL_uudmap[*(U8*)s++] & 077;
2093 else
2094 c = 0;
2095 if (s < strend && ISUUCHAR(*s))
2096 d = PL_uudmap[*(U8*)s++] & 077;
2097 else
2098 d = 0;
2099 hunk[0] = (char)((a << 2) | (b >> 4));
2100 hunk[1] = (char)((b << 4) | (c >> 2));
2101 hunk[2] = (char)((c << 6) | d);
2102 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2103 len -= 3;
2104 }
2105 if (*s == '\n')
2106 s++;
2107 else /* possible checksum byte */
2108 if (s + 1 < strend && s[1] == '\n')
2109 s += 2;
a6ec74c1 2110 }
08ca2aa3
TH
2111 }
2112 XPUSHs(sv);
a6ec74c1
JH
2113 break;
2114 }
49704364 2115
a6ec74c1 2116 if (checksum) {
1109a392 2117 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2118 (checksum > bits_in_uv &&
08ca2aa3
TH
2119 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2120 NV trouble, anv;
a6ec74c1 2121
08ca2aa3 2122 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2123 while (checksum >= 16) {
2124 checksum -= 16;
08ca2aa3 2125 anv *= 65536.0;
a6ec74c1 2126 }
a6ec74c1 2127 while (cdouble < 0.0)
08ca2aa3
TH
2128 cdouble += anv;
2129 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2130 sv = newSVnv(cdouble);
a6ec74c1
JH
2131 }
2132 else {
fa8ec7c1
NC
2133 if (checksum < bits_in_uv) {
2134 UV mask = ((UV)1 << checksum) - 1;
92d41999 2135 cuv &= mask;
a6ec74c1 2136 }
c4c5f44a 2137 sv = newSVuv(cuv);
a6ec74c1
JH
2138 }
2139 XPUSHs(sv_2mortal(sv));
2140 checksum = 0;
2141 }
fc241834 2142
49704364
WL
2143 if (symptr->flags & FLAG_SLASH){
2144 if (SP - PL_stack_base - start_sp_offset <= 0)
2145 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2146 if( next_symbol(symptr) ){
2147 if( symptr->howlen == e_number )
2148 Perl_croak(aTHX_ "Count after length/code in unpack" );
2149 if( beyond ){
2150 /* ...end of char buffer then no decent length available */
2151 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2152 } else {
2153 /* take top of stack (hope it's numeric) */
2154 len = POPi;
2155 if( len < 0 )
2156 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2157 }
2158 } else {
2159 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2160 }
2161 datumtype = symptr->code;
21c16052 2162 explicit_length = FALSE;
49704364
WL
2163 goto redo_switch;
2164 }
a6ec74c1 2165 }
49704364 2166
18529408
IZ
2167 if (new_s)
2168 *new_s = s;
2169 PUTBACK;
2170 return SP - PL_stack_base - start_sp_offset;
2171}
2172
2173PP(pp_unpack)
2174{
2175 dSP;
bab9c0ac 2176 dPOPPOPssrl;
18529408
IZ
2177 I32 gimme = GIMME_V;
2178 STRLEN llen;
2179 STRLEN rlen;
08ca2aa3
TH
2180 char *pat = SvPV(left, llen);
2181 char *s = SvPV(right, rlen);
18529408 2182 char *strend = s + rlen;
08ca2aa3
TH
2183 char *patend = pat + llen;
2184 I32 cnt;
18529408
IZ
2185
2186 PUTBACK;
7accc089 2187 cnt = unpackstring(pat, patend, s, strend,
49704364 2188 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2189 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2190
18529408
IZ
2191 SPAGAIN;
2192 if ( !cnt && gimme == G_SCALAR )
2193 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2194 RETURN;
2195}
2196
f337b084
TH
2197STATIC U8 *
2198doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2199{
f337b084 2200 *h++ = PL_uuemap[len];
a6ec74c1 2201 while (len > 2) {
f337b084
TH
2202 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2203 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2204 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2205 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2206 s += 3;
2207 len -= 3;
2208 }
2209 if (len > 0) {
2210 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2211 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2212 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2213 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2214 *h++ = PL_uuemap[0];
a6ec74c1 2215 }
f337b084
TH
2216 *h++ = '\n';
2217 return h;
a6ec74c1
JH
2218}
2219
2220STATIC SV *
2221S_is_an_int(pTHX_ char *s, STRLEN l)
2222{
2223 STRLEN n_a;
2224 SV *result = newSVpvn(s, l);
2225 char *result_c = SvPV(result, n_a); /* convenience */
2226 char *out = result_c;
2227 bool skip = 1;
2228 bool ignore = 0;
2229
2230 while (*s) {
2231 switch (*s) {
2232 case ' ':
2233 break;
2234 case '+':
2235 if (!skip) {
2236 SvREFCNT_dec(result);
2237 return (NULL);
2238 }
2239 break;
2240 case '0':
2241 case '1':
2242 case '2':
2243 case '3':
2244 case '4':
2245 case '5':
2246 case '6':
2247 case '7':
2248 case '8':
2249 case '9':
2250 skip = 0;
2251 if (!ignore) {
2252 *(out++) = *s;
2253 }
2254 break;
2255 case '.':
2256 ignore = 1;
2257 break;
2258 default:
2259 SvREFCNT_dec(result);
2260 return (NULL);
2261 }
2262 s++;
2263 }
2264 *(out++) = '\0';
2265 SvCUR_set(result, out - result_c);
2266 return (result);
2267}
2268
2269/* pnum must be '\0' terminated */
2270STATIC int
2271S_div128(pTHX_ SV *pnum, bool *done)
2272{
2273 STRLEN len;
2274 char *s = SvPV(pnum, len);
2275 int m = 0;
2276 int r = 0;
2277 char *t = s;
2278
2279 *done = 1;
2280 while (*t) {
2281 int i;
2282
2283 i = m * 10 + (*t - '0');
2284 m = i & 0x7F;
2285 r = (i >> 7); /* r < 10 */
2286 if (r) {
2287 *done = 0;
2288 }
2289 *(t++) = '0' + r;
2290 }
2291 *(t++) = '\0';
2292 SvCUR_set(pnum, (STRLEN) (t - s));
2293 return (m);
2294}
2295
49704364 2296
a6ec74c1 2297
18529408
IZ
2298/*
2299=for apidoc pack_cat
2300
7accc089
JH
2301The engine implementing pack() Perl function. Note: parameters next_in_list and
2302flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2303
2304=cut */
2305
49704364 2306
18529408
IZ
2307void
2308Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2309{
49704364
WL
2310 tempsym_t sym = { 0 };
2311 sym.patptr = pat;
2312 sym.patend = patend;
7accc089
JH
2313 sym.flags = FLAG_PACK;
2314
2315 (void)pack_rec( cat, &sym, beglist, endlist );
2316}
2317
2318
2319/*
2320=for apidoc packlist
2321
2322The engine implementing pack() Perl function.
2323
2324=cut */
2325
2326
2327void
2328Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2329{
f337b084 2330 STRLEN no_len;
7accc089 2331 tempsym_t sym = { 0 };
f337b084 2332
7accc089
JH
2333 sym.patptr = pat;
2334 sym.patend = patend;
2335 sym.flags = FLAG_PACK;
49704364 2336
f337b084
TH
2337 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2338 Also make sure any UTF8 flag is loaded */
2339 SvPV_force(cat, no_len);
2340 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2341
49704364
WL
2342 (void)pack_rec( cat, &sym, beglist, endlist );
2343}
2344
f337b084
TH
2345/* like sv_utf8_upgrade, but also repoint the group start markers */
2346STATIC void
2347marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2348 STRLEN len;
2349 tempsym_t *group;
2350 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2351
2352 if (SvUTF8(sv)) return;
2353
2354 from_start = SvPVX(sv);
2355 from_end = from_start + SvCUR(sv);
2356 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2357 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2358 if (from_ptr == from_end) {
2359 /* Simple case: no character needs to be changed */
2360 SvUTF8_on(sv);
2361 return;
2362 }
2363
3473cf63 2364 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2365 New('U', to_start, len, char);
2366 Copy(from_start, to_start, from_ptr-from_start, char);
2367 to_ptr = to_start + (from_ptr-from_start);
2368
2369 New('U', marks, sym_ptr->level+2, char *);
2370 for (group=sym_ptr; group; group = group->previous)
2371 marks[group->level] = from_start + group->strbeg;
2372 marks[sym_ptr->level+1] = from_end+1;
2373 for (m = marks; *m < from_ptr; m++)
2374 *m = to_start + (*m-from_start);
2375
2376 for (;from_ptr < from_end; from_ptr++) {
2377 while (*m == from_ptr) *m++ = to_ptr;
2378 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2379 }
2380 *to_ptr = 0;
2381
2382 while (*m == from_ptr) *m++ = to_ptr;
2383 if (m != marks + sym_ptr->level+1) {
2384 Safefree(marks);
2385 Safefree(to_start);
2386 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2387 }
2388 for (group=sym_ptr; group; group = group->previous)
2389 group->strbeg = marks[group->level] - to_start;
2390 Safefree(marks);
2391
2392 if (SvOOK(sv)) {
2393 if (SvIVX(sv)) {
2394 SvLEN(sv) += SvIVX(sv);
2395 from_start -= SvIVX(sv);
2396 SvIV_set(sv, 0);
2397 }
2398 SvFLAGS(sv) &= ~SVf_OOK;
2399 }
2400 if (SvLEN(sv) != 0)
2401 Safefree(from_start);
2402 SvPVX(sv) = to_start;
2403 SvCUR(sv) = to_ptr - to_start;
2404 SvLEN(sv) = len;
2405 SvUTF8_on(sv);
2406}
2407
2408/* Exponential string grower. Makes string extension effectively O(n)
2409 needed says how many extra bytes we need (not counting the final '\0')
2410 Only grows the string if there is an actual lack of space
2411*/
2412STATIC char *
2413sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2414 STRLEN cur = SvCUR(sv);
2415 STRLEN len = SvLEN(sv);
2416 STRLEN extend;
2417 if (len - cur > needed) return SvPVX(sv);
2418 extend = needed > len ? needed : len;
2419 return SvGROW(sv, len+extend+1);
2420}
49704364
WL
2421
2422STATIC
2423SV **
f337b084 2424S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2425{
49704364 2426 tempsym_t lookahead;
f337b084
TH
2427 I32 items = endlist - beglist;
2428 bool found = next_symbol(symptr);
2429 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2430
2431 if (symptr->level == 0 && found && symptr->code == 'U') {
2432 marked_upgrade(aTHX_ cat, symptr);
2433 symptr->flags |= FLAG_DO_UTF8;
2434 utf8 = 0;
49704364 2435 }
f337b084 2436 symptr->strbeg = SvCUR(cat);
49704364
WL
2437
2438 while (found) {
f337b084
TH
2439 SV *fromstr;
2440 STRLEN fromlen;
2441 I32 len;
a6ec74c1 2442 SV *lengthcode = Nullsv;
49704364 2443 I32 datumtype = symptr->code;
f337b084
TH
2444 howlen_t howlen = symptr->howlen;
2445 char *start = SvPVX(cat);
2446 char *cur = start + SvCUR(cat);
49704364 2447
f337b084
TH
2448#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2449
2450 switch (howlen) {
fc241834 2451 case e_star:
f337b084
TH
2452 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2453 0 : items;
2454 break;
2455 default:
2456 /* e_no_len and e_number */
2457 len = symptr->length;
49704364
WL
2458 break;
2459 }
2460
f337b084 2461 if (len) {
a7a3cfaa 2462 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2463
a7a3cfaa
TH
2464 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2465 /* We can process this letter. */
2466 STRLEN size = props & PACK_SIZE_MASK;
2467 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2468 }
f337b084
TH
2469 }
2470
49704364
WL
2471 /* Look ahead for next symbol. Do we have code/code? */
2472 lookahead = *symptr;
2473 found = next_symbol(&lookahead);
246f24af
TH
2474 if (symptr->flags & FLAG_SLASH) {
2475 IV count;
f337b084 2476 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2477 if (strchr("aAZ", lookahead.code)) {
2478 if (lookahead.howlen == e_number) count = lookahead.length;
2479 else {
2480 if (items > 0)
2481 count = DO_UTF8(*beglist) ?
2482 sv_len_utf8(*beglist) : sv_len(*beglist);
2483 else count = 0;
2484 if (lookahead.code == 'Z') count++;
2485 }
2486 } else {
2487 if (lookahead.howlen == e_number && lookahead.length < items)
2488 count = lookahead.length;
2489 else count = items;
2490 }
2491 lookahead.howlen = e_number;
2492 lookahead.length = count;
2493 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2494 }
49704364 2495
fc241834
RGS
2496 /* Code inside the switch must take care to properly update
2497 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2498 doesn't simply leave using break */
1109a392 2499 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2500 default:
f337b084
TH
2501 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2502 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2503 case '%':
49704364 2504 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2505 case '@':
f337b084
TH
2506 if (utf8) {
2507 char *s = start + symptr->strbeg;
2508 while (len > 0 && s < cur) {
2509 s += UTF8SKIP(s);
2510 len--;
2511 }
2512 if (s > cur)
2513 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2514 if (len > 0) {
2515 grow:
2516 GROWING(0, cat, start, cur, len);
2517 Zero(cur, len, char);
2518 cur += len;
2519 } else if (s < cur) cur = s;
2520 else goto no_change;
2521 } else {
2522 len -= cur - (start+symptr->strbeg);
2523 if (len > 0) goto grow;
fc241834 2524 len = -len;
f337b084
TH
2525 if (len > 0) goto shrink;
2526 else goto no_change;
2527 }
a6ec74c1 2528 break;
fc241834 2529 case '(': {
49704364 2530 tempsym_t savsym = *symptr;
66c611c5
MHM
2531 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2532 symptr->flags |= group_modifiers;
49704364
WL
2533 symptr->patend = savsym.grpend;
2534 symptr->level++;
f337b084 2535 symptr->previous = &lookahead;
18529408 2536 while (len--) {
f337b084
TH
2537 U32 was_utf8;
2538 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2539 else symptr->flags &= ~FLAG_PARSE_UTF8;
2540 was_utf8 = SvUTF8(cat);
49704364 2541 symptr->patptr = savsym.grpbeg;
f337b084
TH
2542 beglist = pack_rec(cat, symptr, beglist, endlist);
2543 if (SvUTF8(cat) != was_utf8)
2544 /* This had better be an upgrade while in utf8==0 mode */
2545 utf8 = 1;
2546
49704364 2547 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2548 break; /* No way to continue */
2549 }
f337b084
TH
2550 lookahead.flags = symptr->flags & ~group_modifiers;
2551 goto no_change;
18529408 2552 }
62f95557
IZ
2553 case 'X' | TYPE_IS_SHRIEKING:
2554 if (!len) /* Avoid division by 0 */
2555 len = 1;
f337b084
TH
2556 if (utf8) {
2557 char *hop, *last;
2558 I32 l = len;
2559 hop = last = start;
2560 while (hop < cur) {
2561 hop += UTF8SKIP(hop);
2562 if (--l == 0) {
2563 last = hop;
2564 l = len;
2565 }
2566 }
2567 if (last > cur)
2568 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2569 cur = last;
2570 break;
2571 }
2572 len = (cur-start) % len;
62f95557 2573 /* FALL THROUGH */
a6ec74c1 2574 case 'X':
f337b084
TH
2575 if (utf8) {
2576 if (len < 1) goto no_change;
2577 while (len > 0) {
2578 if (cur <= start)
2579 Perl_croak(aTHX_ "'X' outside of string in pack");
2580 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2581 if (cur <= start)
2582 Perl_croak(aTHX_ "'X' outside of string in pack");
2583 }
2584 len--;
2585 }
2586 } else {
fc241834 2587 shrink:
f337b084 2588 if (cur - start < len)
fc241834 2589 Perl_croak(aTHX_ "'X' outside of string in pack");
f337b084
TH
2590 cur -= len;
2591 }
2592 if (cur < start+symptr->strbeg) {
2593 /* Make sure group starts don't point into the void */
2594 tempsym_t *group;
2595 STRLEN length = cur-start;
2596 for (group = symptr;
2597 group && length < group->strbeg;
2598 group = group->previous) group->strbeg = length;
2599 lookahead.strbeg = length;
2600 }
a6ec74c1 2601 break;
fc241834
RGS
2602 case 'x' | TYPE_IS_SHRIEKING: {
2603 I32 ai32;
62f95557
IZ
2604 if (!len) /* Avoid division by 0 */
2605 len = 1;
fc241834
RGS
2606 if (utf8) ai32 = utf8_length(start, cur) % len;
2607 else ai32 = (cur - start) % len;
2608 if (ai32 == 0) goto no_change;
2609 len -= ai32;
2610 }
2611 /* FALL THROUGH */
a6ec74c1 2612 case 'x':
f337b084 2613 goto grow;
a6ec74c1
JH
2614 case 'A':
2615 case 'Z':
f337b084
TH
2616 case 'a': {
2617 char *aptr;
2618
a6ec74c1
JH
2619 fromstr = NEXTFROM;
2620 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2621 if (DO_UTF8(fromstr)) {
2622 char *end, *s;
2623
2624 if (!utf8 && !SvUTF8(cat)) {
2625 marked_upgrade(aTHX_ cat, symptr);
2626 lookahead.flags |= FLAG_DO_UTF8;
2627 lookahead.strbeg = symptr->strbeg;
2628 utf8 = 1;
2629 start = SvPVX(cat);
2630 cur = start + SvCUR(cat);
2631 }
fc241834 2632 if (howlen == e_star) {
f337b084
TH
2633 if (utf8) goto string_copy;
2634 len = fromlen+1;
2635 }
2636 s = aptr;
2637 end = aptr + fromlen;
2638 fromlen = datumtype == 'Z' ? len-1 : len;
2639 while ((I32) fromlen > 0 && s < end) {
2640 s += UTF8SKIP(s);
2641 fromlen--;
2642 }
2643 if (s > end)
2644 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2645 if (utf8) {
fc241834 2646 len = fromlen;
f337b084
TH
2647 if (datumtype == 'Z') len++;
2648 fromlen = s-aptr;
2649 len += fromlen;
fc241834 2650
f337b084 2651 goto string_copy;
fc241834 2652 }
f337b084
TH
2653 fromlen = len - fromlen;
2654 if (datumtype == 'Z') fromlen--;
2655 if (howlen == e_star) {
2656 len = fromlen;
2657 if (datumtype == 'Z') len++;
fc241834 2658 }
f337b084 2659 GROWING(0, cat, start, cur, len);
fc241834 2660 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2661 datumtype | TYPE_IS_PACK))
2662 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2663 cur += fromlen;
a6ec74c1 2664 len -= fromlen;
f337b084
TH
2665 } else if (utf8) {
2666 if (howlen == e_star) {
2667 len = fromlen;
2668 if (datumtype == 'Z') len++;
a6ec74c1 2669 }
f337b084
TH
2670 if (len <= (I32) fromlen) {
2671 fromlen = len;
2672 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2673 }
fc241834 2674 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2675 upgrade, so:
2676 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2677 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2678 len -= fromlen;
2679 while (fromlen > 0) {
2680 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2681 aptr++;
2682 fromlen--;
fc241834 2683 }
f337b084
TH
2684 } else {
2685 string_copy:
2686 if (howlen == e_star) {
2687 len = fromlen;
2688 if (datumtype == 'Z') len++;
2689 }
2690 if (len <= (I32) fromlen) {
2691 fromlen = len;
2692 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2693 }
f337b084
TH
2694 GROWING(0, cat, start, cur, len);
2695 Copy(aptr, cur, fromlen, char);
2696 cur += fromlen;
2697 len -= fromlen;
a6ec74c1 2698 }
f337b084
TH
2699 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2700 cur += len;
a6ec74c1 2701 break;
f337b084 2702 }
a6ec74c1 2703 case 'B':
f337b084
TH
2704 case 'b': {
2705 char *str, *end;
2706 I32 l, field_len;
2707 U8 bits;
2708 bool utf8_source;
2709 U32 utf8_flags;
a6ec74c1 2710
fc241834
RGS
2711 fromstr = NEXTFROM;
2712 str = SvPV(fromstr, fromlen);
f337b084
TH
2713 end = str + fromlen;
2714 if (DO_UTF8(fromstr)) {
2715 utf8_source = TRUE;
2716 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2717 } else {
2718 utf8_source = FALSE;
2719 utf8_flags = 0; /* Unused, but keep compilers happy */
2720 }
2721 if (howlen == e_star) len = fromlen;
2722 field_len = (len+7)/8;
2723 GROWING(utf8, cat, start, cur, field_len);
2724 if (len > (I32)fromlen) len = fromlen;
2725 bits = 0;
2726 l = 0;
2727 if (datumtype == 'B')
2728 while (l++ < len) {
2729 if (utf8_source) {
2730 UV val;
2731 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2732 bits |= val & 1;
2733 } else bits |= *str++ & 1;
2734 if (l & 7) bits <<= 1;
fc241834 2735 else {
f337b084
TH
2736 PUSH_BYTE(utf8, cur, bits);
2737 bits = 0;
a6ec74c1
JH
2738 }
2739 }
f337b084
TH
2740 else
2741 /* datumtype == 'b' */
2742 while (l++ < len) {
2743 if (utf8_source) {
2744 UV val;
2745 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2746 if (val & 1) bits |= 0x80;
2747 } else if (*str++ & 1)
2748 bits |= 0x80;
2749 if (l & 7) bits >>= 1;
fc241834 2750 else {
f337b084
TH
2751 PUSH_BYTE(utf8, cur, bits);
2752 bits = 0;
a6ec74c1
JH
2753 }
2754 }
f337b084
TH
2755 l--;
2756 if (l & 7) {
fc241834 2757 if (datumtype == 'B')
f337b084 2758 bits <<= 7 - (l & 7);
fc241834 2759 else
f337b084
TH
2760 bits >>= 7 - (l & 7);
2761 PUSH_BYTE(utf8, cur, bits);
2762 l += 7;
a6ec74c1 2763 }
f337b084
TH
2764 /* Determine how many chars are left in the requested field */
2765 l /= 8;
2766 if (howlen == e_star) field_len = 0;
2767 else field_len -= l;
2768 Zero(cur, field_len, char);
2769 cur += field_len;
a6ec74c1 2770 break;
f337b084 2771 }
a6ec74c1 2772 case 'H':
f337b084
TH
2773 case 'h': {
2774 char *str, *end;
2775 I32 l, field_len;
2776 U8 bits;
2777 bool utf8_source;
2778 U32 utf8_flags;
a6ec74c1 2779
fc241834
RGS
2780 fromstr = NEXTFROM;
2781 str = SvPV(fromstr, fromlen);
f337b084
TH
2782 end = str + fromlen;
2783 if (DO_UTF8(fromstr)) {
2784 utf8_source = TRUE;
2785 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2786 } else {
2787 utf8_source = FALSE;
2788 utf8_flags = 0; /* Unused, but keep compilers happy */
2789 }
2790 if (howlen == e_star) len = fromlen;
2791 field_len = (len+1)/2;
2792 GROWING(utf8, cat, start, cur, field_len);
2793 if (!utf8 && len > (I32)fromlen) len = fromlen;
2794 bits = 0;
2795 l = 0;
2796 if (datumtype == 'H')
2797 while (l++ < len) {
2798 if (utf8_source) {
2799 UV val;
2800 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2801 if (val < 256 && isALPHA(val))
2802 bits |= (val + 9) & 0xf;
a6ec74c1 2803 else
f337b084
TH
2804 bits |= val & 0xf;
2805 } else if (isALPHA(*str))
2806 bits |= (*str++ + 9) & 0xf;
2807 else
2808 bits |= *str++ & 0xf;
2809 if (l & 1) bits <<= 4;
fc241834 2810 else {
f337b084
TH
2811 PUSH_BYTE(utf8, cur, bits);
2812 bits = 0;
a6ec74c1
JH
2813 }
2814 }
f337b084
TH
2815 else
2816 while (l++ < len) {
2817 if (utf8_source) {
2818 UV val;
2819 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2820 if (val < 256 && isALPHA(val))
2821 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2822 else
f337b084
TH
2823 bits |= (val & 0xf) << 4;
2824 } else if (isALPHA(*str))
2825 bits |= ((*str++ + 9) & 0xf) << 4;
2826 else
2827 bits |= (*str++ & 0xf) << 4;
2828 if (l & 1) bits >>= 4;
fc241834 2829 else {
f337b084
TH
2830 PUSH_BYTE(utf8, cur, bits);
2831 bits = 0;
a6ec74c1 2832 }
fc241834 2833 }
f337b084
TH
2834 l--;
2835 if (l & 1) {
2836 PUSH_BYTE(utf8, cur, bits);
2837 l++;
2838 }
2839 /* Determine how many chars are left in the requested field */
2840 l /= 2;
2841 if (howlen == e_star) field_len = 0;
2842 else field_len -= l;
2843 Zero(cur, field_len, char);
2844 cur += field_len;
2845 break;
fc241834
RGS
2846 }
2847 case 'c':
f337b084
TH
2848 while (len-- > 0) {
2849 IV aiv;
2850 fromstr = NEXTFROM;
2851 aiv = SvIV(fromstr);
2852 if ((-128 > aiv || aiv > 127) &&
2853 ckWARN(WARN_PACK))
2854 Perl_warner(aTHX_ packWARN(WARN_PACK),
2855 "Character in 'c' format wrapped in pack");
2856 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2857 }
2858 break;
2859 case 'C':
f337b084
TH
2860 if (len == 0) {
2861 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2862 break;
2863 }
2864 GROWING(0, cat, start, cur, len);
a6ec74c1 2865 while (len-- > 0) {
f337b084 2866 IV aiv;
a6ec74c1 2867 fromstr = NEXTFROM;
f337b084
TH
2868 aiv = SvIV(fromstr);
2869 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2870 ckWARN(WARN_PACK))
2871 Perl_warner(aTHX_ packWARN(WARN_PACK),
2872 "Character in 'C' format wrapped in pack");
f337b084
TH
2873 *cur++ = aiv & 0xff;
2874 }
fc241834
RGS
2875 break;
2876 case 'W': {
2877 char *end;
2878 U8 in_bytes = IN_BYTES;
2879
2880 end = start+SvLEN(cat)-1;
2881 if (utf8) end -= UTF8_MAXLEN-1;
2882 while (len-- > 0) {
2883 UV auv;
2884 fromstr = NEXTFROM;
2885 auv = SvUV(fromstr);
2886 if (in_bytes) auv = auv % 0x100;
2887 if (utf8) {
2888 W_utf8:
2889 if (cur > end) {
2890 *cur = '\0';
2891 SvCUR(cat) = cur - start;
2892
2893 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2894 end = start+SvLEN(cat)-UTF8_MAXLEN;
2895 }
2896 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2897 ckWARN(WARN_UTF8) ?
2898 0 : UNICODE_ALLOW_ANY);
2899 } else {
2900 if (auv >= 0x100) {
2901 if (!SvUTF8(cat)) {
2902 *cur = '\0';
2903 SvCUR(cat) = cur - start;
2904 marked_upgrade(aTHX_ cat, symptr);
2905 lookahead.flags |= FLAG_DO_UTF8;
2906 lookahead.strbeg = symptr->strbeg;
2907 utf8 = 1;
2908 start = SvPVX(cat);
2909 cur = start + SvCUR(cat);
2910 end = start+SvLEN(cat)-UTF8_MAXLEN;
2911 goto W_utf8;
2912 }
2913 if (ckWARN(WARN_PACK))
2914 Perl_warner(aTHX_ packWARN(WARN_PACK),
2915 "Character in 'W' format wrapped in pack");
2916 auv &= 0xff;
2917 }
2918 if (cur >= end) {
2919 *cur = '\0';
2920 SvCUR(cat) = cur - start;
2921 GROWING(0, cat, start, cur, len+1);
2922 end = start+SvLEN(cat)-1;
2923 }
2924 *(U8 *) cur++ = auv;
a6ec74c1
JH
2925 }
2926 }
2927 break;
fc241834
RGS
2928 }
2929 case 'U': {
2930 char *end;
2931
2932 if (len == 0) {
2933 if (!(symptr->flags & FLAG_DO_UTF8)) {
2934 marked_upgrade(aTHX_ cat, symptr);
2935 lookahead.flags |= FLAG_DO_UTF8;
2936 lookahead.strbeg = symptr->strbeg;
2937 }
2938 utf8 = 0;
2939 goto no_change;
2940 }
2941
2942 end = start+SvLEN(cat);
2943 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2944 while (len-- > 0) {
fc241834 2945 UV auv;
a6ec74c1 2946 fromstr = NEXTFROM;
fc241834
RGS
2947 auv = SvUV(fromstr);
2948 if (utf8) {
2949 char buffer[UTF8_MAXLEN], *endb;
2950 endb = uvuni_to_utf8_flags(buffer, auv,
2951 ckWARN(WARN_UTF8) ?
2952 0 : UNICODE_ALLOW_ANY);
2953 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2954 *cur = '\0';
2955 SvCUR(cat) = cur - start;
2956 GROWING(0, cat, start, cur,
2957 len+(endb-buffer)*UTF8_EXPAND);
2958 end = start+SvLEN(cat);
2959 }
2960 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2961 } else {
2962 if (cur >= end) {
2963 *cur = '\0';
2964 SvCUR(cat) = cur - start;
2965 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2966 end = start+SvLEN(cat)-UTF8_MAXLEN;
2967 }
2968 cur = uvuni_to_utf8_flags(cur, auv,
2969 ckWARN(WARN_UTF8) ?
2970 0 : UNICODE_ALLOW_ANY);
2971 }
a6ec74c1 2972 }
a6ec74c1 2973 break;
fc241834 2974 }
a6ec74c1
JH
2975 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2976 case 'f':
a6ec74c1 2977 while (len-- > 0) {
f337b084
TH
2978 float afloat;
2979 NV anv;
a6ec74c1 2980 fromstr = NEXTFROM;
f337b084 2981 anv = SvNV(fromstr);
5cdb9e01 2982#ifdef __VOS__
f337b084 2983 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
2984 during conversion from double to float into infinity, so we
2985 do it by hand. This code should either be generalized for
2986 any OS that needs it, or removed if and when VOS implements
2987 posix-976 (suggestion to support mapping to infinity).
2988 Paul.Green@stratus.com 02-04-02. */
f337b084 2989 if (anv > FLT_MAX)
fc241834 2990 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 2991 else if (anv < -FLT_MAX)
fc241834 2992 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
2993 else afloat = (float) anv;
2994#else /* __VOS__ */
baf3cf9c 2995# if defined(VMS) && !defined(__IEEE_FP)
f337b084 2996 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2997 * on Alpha; fake it if we don't have them.
2998 */
f337b084 2999 if (anv > FLT_MAX)
fc241834 3000 afloat = FLT_MAX;
f337b084 3001 else if (anv < -FLT_MAX)
fc241834 3002 afloat = -FLT_MAX;
f337b084 3003 else afloat = (float)anv;
baf3cf9c 3004# else
f337b084 3005 afloat = (float)anv;
baf3cf9c 3006# endif
f337b084 3007#endif /* __VOS__ */
1109a392 3008 DO_BO_PACK_N(afloat, float);
f337b084 3009 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3010 }
3011 break;
3012 case 'd':
a6ec74c1 3013 while (len-- > 0) {
f337b084
TH
3014 double adouble;
3015 NV anv;
a6ec74c1 3016 fromstr = NEXTFROM;
f337b084 3017 anv = SvNV(fromstr);
5cdb9e01 3018#ifdef __VOS__
f337b084 3019 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3020 during conversion from long double to double into infinity,
3021 so we do it by hand. This code should either be generalized
3022 for any OS that needs it, or removed if and when VOS
3023 implements posix-976 (suggestion to support mapping to
3024 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3025 if (anv > DBL_MAX)
fc241834 3026 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3027 else if (anv < -DBL_MAX)
fc241834 3028 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3029 else adouble = (double) anv;
3030#else /* __VOS__ */
baf3cf9c 3031# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3032 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3033 * on Alpha; fake it if we don't have them.
3034 */
f337b084 3035 if (anv > DBL_MAX)
fc241834 3036 adouble = DBL_MAX;
f337b084 3037 else if (anv < -DBL_MAX)
fc241834 3038 adouble = -DBL_MAX;
f337b084 3039 else adouble = (double)anv;
baf3cf9c 3040# else
f337b084 3041 adouble = (double)anv;
baf3cf9c 3042# endif
f337b084 3043#endif /* __VOS__ */
1109a392 3044 DO_BO_PACK_N(adouble, double);
f337b084 3045 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3046 }
3047 break;
fc241834
RGS
3048 case 'F': {
3049 NV anv;
1109a392 3050 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3051 while (len-- > 0) {
3052 fromstr = NEXTFROM;
3053 anv = SvNV(fromstr);
1109a392 3054 DO_BO_PACK_N(anv, NV);
fc241834 3055 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3056 }
3057 break;
fc241834 3058 }
92d41999 3059#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3060 case 'D': {
3061 long double aldouble;
1109a392
MHM
3062 /* long doubles can have unused bits, which may be nonzero */
3063 Zero(&aldouble, 1, long double);
92d41999
JH
3064 while (len-- > 0) {
3065 fromstr = NEXTFROM;
3066 aldouble = (long double)SvNV(fromstr);
1109a392 3067 DO_BO_PACK_N(aldouble, long double);
fc241834 3068 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3069 }
3070 break;
fc241834 3071 }
92d41999 3072#endif
7212898e 3073#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3074 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3075#endif
a6ec74c1
JH
3076 case 'n':
3077 while (len-- > 0) {
f337b084 3078 I16 ai16;
a6ec74c1 3079 fromstr = NEXTFROM;
ef108786 3080 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3081#ifdef HAS_HTONS
ef108786 3082 ai16 = PerlSock_htons(ai16);
a6ec74c1 3083#endif
f337b084 3084 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3085 }
3086 break;
7212898e 3087#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3088 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3089#endif
a6ec74c1
JH
3090 case 'v':
3091 while (len-- > 0) {
f337b084 3092 I16 ai16;
a6ec74c1 3093 fromstr = NEXTFROM;
ef108786 3094 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3095#ifdef HAS_HTOVS
ef108786 3096 ai16 = htovs(ai16);
a6ec74c1 3097#endif
f337b084 3098 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3099 }
3100 break;
49704364 3101 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3102#if SHORTSIZE != SIZE16
fc241834 3103 while (len-- > 0) {
f337b084 3104 unsigned short aushort;
fc241834
RGS
3105 fromstr = NEXTFROM;
3106 aushort = SvUV(fromstr);
3107 DO_BO_PACK(aushort, s);
f337b084 3108 PUSH_VAR(utf8, cur, aushort);
fc241834 3109 }
49704364
WL
3110 break;
3111#else
3112 /* Fall through! */
a6ec74c1 3113#endif
49704364 3114 case 'S':
fc241834 3115 while (len-- > 0) {
f337b084 3116 U16 au16;
fc241834
RGS
3117 fromstr = NEXTFROM;
3118 au16 = (U16)SvUV(fromstr);
3119 DO_BO_PACK(au16, 16);
f337b084 3120 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3121 }
3122 break;
49704364 3123 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3124#if SHORTSIZE != SIZE16
fc241834 3125 while (len-- > 0) {
f337b084 3126 short ashort;
fc241834
RGS
3127 fromstr = NEXTFROM;
3128 ashort = SvIV(fromstr);
3129 DO_BO_PACK(ashort, s);
f337b084 3130 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3131 }
49704364
WL
3132 break;
3133#else
3134 /* Fall through! */
a6ec74c1 3135#endif
49704364
WL
3136 case 's':
3137 while (len-- > 0) {
f337b084 3138 I16 ai16;
49704364 3139 fromstr = NEXTFROM;
ef108786
MHM
3140 ai16 = (I16)SvIV(fromstr);
3141 DO_BO_PACK(ai16, 16);
f337b084 3142 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3143 }
3144 break;
3145 case 'I':
49704364 3146 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3147 while (len-- > 0) {
f337b084 3148 unsigned int auint;
a6ec74c1
JH
3149 fromstr = NEXTFROM;
3150 auint = SvUV(fromstr);
1109a392 3151 DO_BO_PACK(auint, i);
f337b084 3152 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3153 }
3154 break;
92d41999
JH
3155 case 'j':
3156 while (len-- > 0) {
f337b084 3157 IV aiv;
92d41999
JH
3158 fromstr = NEXTFROM;
3159 aiv = SvIV(fromstr);
1109a392
MHM
3160#if IVSIZE == INTSIZE
3161 DO_BO_PACK(aiv, i);
3162#elif IVSIZE == LONGSIZE
3163 DO_BO_PACK(aiv, l);
3164#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3165 DO_BO_PACK(aiv, 64);
f337b084
TH
3166#else
3167 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3168#endif
f337b084 3169 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3170 }
3171 break;
3172 case 'J':
3173 while (len-- > 0) {
f337b084 3174 UV auv;
92d41999
JH
3175 fromstr = NEXTFROM;
3176 auv = SvUV(fromstr);
1109a392
MHM
3177#if UVSIZE == INTSIZE
3178 DO_BO_PACK(auv, i);
3179#elif UVSIZE == LONGSIZE
3180 DO_BO_PACK(auv, l);
3181#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3182 DO_BO_PACK(auv, 64);
f337b084
TH
3183#else
3184 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3185#endif
f337b084 3186 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3187 }
3188 break;
a6ec74c1
JH
3189 case 'w':
3190 while (len-- > 0) {
f337b084 3191 NV anv;
a6ec74c1 3192 fromstr = NEXTFROM;
15e9f109 3193 anv = SvNV(fromstr);
a6ec74c1 3194
f337b084
TH
3195 if (anv < 0) {
3196 *cur = '\0';
3197 SvCUR(cat) = cur - start;
49704364 3198 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3199 }
a6ec74c1 3200
196b62db
NC
3201 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3202 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3203 any negative IVs will have already been got by the croak()
3204 above. IOK is untrue for fractions, so we test them
3205 against UV_MAX_P1. */
f337b084
TH
3206 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3207 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3208 char *in = buf + sizeof(buf);
196b62db 3209 UV auv = SvUV(fromstr);
a6ec74c1
JH
3210
3211 do {
eb160463 3212 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3213 auv >>= 7;
3214 } while (auv);
3215 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3216 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3217 in, (buf + sizeof(buf)) - in);
3218 } else if (SvPOKp(fromstr))
3219 goto w_string;
a6ec74c1 3220 else if (SvNOKp(fromstr)) {
0258719b
NC
3221 /* 10**NV_MAX_10_EXP is the largest power of 10
3222 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3223 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3224 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3225 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3226 Some C compilers are strict about integral constant
3227 expressions so we conservatively divide by a slightly
3228 smaller integer instead of multiplying by the exact
3229 floating-point value.
0258719b
NC
3230 */
3231#ifdef NV_MAX_10_EXP
f337b084 3232 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3233 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3234#else
f337b084 3235 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3236 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3237#endif
a6ec74c1
JH
3238 char *in = buf + sizeof(buf);
3239
15e9f109 3240 anv = Perl_floor(anv);
a6ec74c1 3241 do {
15e9f109 3242 NV next = Perl_floor(anv / 128);
a6ec74c1 3243 if (in <= buf) /* this cannot happen ;-) */
49704364 3244 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3245 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3246 anv = next;
3247 } while (anv > 0);
a6ec74c1 3248 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3249 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3250 in, (buf + sizeof(buf)) - in);
3251 } else {
735b914b
JH
3252 char *from, *result, *in;
3253 SV *norm;
3254 STRLEN len;
3255 bool done;
3256
f337b084 3257 w_string:
735b914b
JH
3258 /* Copy string and check for compliance */
3259 from = SvPV(fromstr, len);
3260 if ((norm = is_an_int(from, len)) == NULL)
49704364 3261 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3262
3263 New('w', result, len, char);
3264 in = result + len;
3265 done = FALSE;
f337b084 3266 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3267 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3268 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3269 in, (result + len) - in);
735b914b
JH
3270 Safefree(result);
3271 SvREFCNT_dec(norm); /* free norm */
fc241834 3272 }
a6ec74c1
JH
3273 }
3274 break;
3275 case 'i':
49704364 3276 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3277 while (len-- > 0) {
f337b084 3278 int aint;
a6ec74c1
JH
3279 fromstr = NEXTFROM;
3280 aint = SvIV(fromstr);
1109a392 3281 DO_BO_PACK(aint, i);
f337b084 3282 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3283 }
3284 break;
7212898e 3285#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3286 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3287#endif
a6ec74c1
JH
3288 case 'N':
3289 while (len-- > 0) {
f337b084 3290 U32 au32;
a6ec74c1 3291 fromstr = NEXTFROM;
ef108786 3292 au32 = SvUV(fromstr);
a6ec74c1 3293#ifdef HAS_HTONL
ef108786 3294 au32 = PerlSock_htonl(au32);
a6ec74c1 3295#endif
f337b084 3296 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3297 }
3298 break;
7212898e 3299#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3300 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3301#endif
a6ec74c1
JH
3302 case 'V':
3303 while (len-- > 0) {
f337b084 3304 U32 au32;
a6ec74c1 3305 fromstr = NEXTFROM;
ef108786 3306 au32 = SvUV(fromstr);
a6ec74c1 3307#ifdef HAS_HTOVL
ef108786 3308 au32 = htovl(au32);
a6ec74c1 3309#endif
f337b084 3310 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3311 }
3312 break;
49704364 3313 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3314#if LONGSIZE != SIZE32
fc241834 3315 while (len-- > 0) {
f337b084 3316 unsigned long aulong;
fc241834
RGS
3317 fromstr = NEXTFROM;
3318 aulong = SvUV(fromstr);
3319 DO_BO_PACK(aulong, l);
f337b084 3320 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3321 }
49704364
WL
3322 break;
3323#else
3324 /* Fall though! */
a6ec74c1 3325#endif
49704364 3326 case 'L':
fc241834 3327 while (len-- > 0) {
f337b084 3328 U32 au32;
fc241834
RGS
3329 fromstr = NEXTFROM;
3330 au32 = SvUV(fromstr);
3331 DO_BO_PACK(au32, 32);
f337b084 3332 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3333 }
3334 break;
49704364 3335 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3336#if LONGSIZE != SIZE32
fc241834 3337 while (len-- > 0) {
f337b084 3338 long along;
fc241834
RGS
3339 fromstr = NEXTFROM;
3340 along = SvIV(fromstr);
3341 DO_BO_PACK(along, l);
f337b084 3342 PUSH_VAR(utf8, cur, along);
a6ec74c1 3343 }
49704364
WL
3344 break;
3345#else
3346 /* Fall though! */
a6ec74c1 3347#endif
49704364
WL
3348 case 'l':
3349 while (len-- > 0) {
f337b084 3350 I32 ai32;
49704364 3351 fromstr = NEXTFROM;
ef108786
MHM
3352 ai32 = SvIV(fromstr);
3353 DO_BO_PACK(ai32, 32);
f337b084 3354 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3355 }
3356 break;
3357#ifdef HAS_QUAD
3358 case 'Q':
3359 while (len-- > 0) {
f337b084 3360 Uquad_t auquad;
a6ec74c1 3361 fromstr = NEXTFROM;
f337b084 3362 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3363 DO_BO_PACK(auquad, 64);
f337b084 3364 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3365 }
3366 break;
3367 case 'q':
3368 while (len-- > 0) {
f337b084 3369 Quad_t aquad;
a6ec74c1
JH
3370 fromstr = NEXTFROM;
3371 aquad = (Quad_t)SvIV(fromstr);
1109a392 3372 DO_BO_PACK(aquad, 64);
f337b084 3373 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3374 }
3375 break;
f337b084 3376#endif /* HAS_QUAD */
a6ec74c1
JH
3377 case 'P':
3378 len = 1; /* assume SV is correct length */
f337b084 3379 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3380 /* Fall through! */
a6ec74c1
JH
3381 case 'p':
3382 while (len-- > 0) {
f337b084
TH
3383 char *aptr;
3384
a6ec74c1 3385 fromstr = NEXTFROM;
28a4f200
TH
3386 SvGETMAGIC(fromstr);
3387 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3388 else {
3389 STRLEN n_a;
3390 /* XXX better yet, could spirit away the string to
3391 * a safe spot and hang on to it until the result
3392 * of pack() (and all copies of the result) are
3393 * gone.
3394 */
f337b084
TH
3395 if (ckWARN(WARN_PACK) &&
3396 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3397 !SvREADONLY(fromstr)))) {
9014280d 3398 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3399 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3400 }
3401 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3402 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3403 else
28a4f200 3404 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3405 }
1109a392 3406 DO_BO_PACK_P(aptr);
f337b084 3407 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3408 }
3409 break;
fc241834
RGS
3410 case 'u': {
3411 char *aptr, *aend;
3412 bool from_utf8;
f337b084 3413
a6ec74c1 3414 fromstr = NEXTFROM;
fc241834
RGS
3415 if (len <= 2) len = 45;
3416 else len = len / 3 * 3;
3417 if (len >= 64) {
3418 Perl_warner(aTHX_ packWARN(WARN_PACK),
3419 "Field too wide in 'u' format in pack");
3420 len = 63;
3421 }
a6ec74c1 3422 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3423 from_utf8 = DO_UTF8(fromstr);
3424 if (from_utf8) {
3425 aend = aptr + fromlen;
3426 fromlen = sv_len_utf8(fromstr);
3427 } else aend = NULL; /* Unused, but keep compilers happy */
3428 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3429 while (fromlen > 0) {
fc241834 3430 U8 *end;
a6ec74c1 3431 I32 todo;
fc241834 3432 U8 hunk[1+63/3*4+1];
a6ec74c1 3433
eb160463 3434 if ((I32)fromlen > len)
a6ec74c1
JH
3435 todo = len;
3436 else
3437 todo = fromlen;
fc241834
RGS
3438 if (from_utf8) {
3439 char buffer[64];
3440 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3441 'u' | TYPE_IS_PACK)) {
3442 *cur = '\0';
3443 SvCUR(cat) = cur - start;
3444 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3445 }
3446 end = doencodes(hunk, buffer, todo);
3447 } else {
3448 end = doencodes(hunk, aptr, todo);
3449 aptr += todo;
3450 }
3451 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3452 fromlen -= todo;
3453 }
a6ec74c1
JH
3454 break;
3455 }
f337b084
TH
3456 }
3457 *cur = '\0';
3458 SvCUR(cat) = cur - start;
3459 no_change:
49704364 3460 *symptr = lookahead;
a6ec74c1 3461 }
49704364 3462 return beglist;
18529408
IZ
3463}
3464#undef NEXTFROM
3465
3466
3467PP(pp_pack)
3468{
3469 dSP; dMARK; dORIGMARK; dTARGET;
3470 register SV *cat = TARG;
3471 STRLEN fromlen;
3472 register char *pat = SvPVx(*++MARK, fromlen);
3473 register char *patend = pat + fromlen;
3474
3475 MARK++;
3476 sv_setpvn(cat, "", 0);
f337b084 3477 SvUTF8_off(cat);
18529408 3478
7accc089 3479 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3480
a6ec74c1
JH
3481 SvSETMAGIC(cat);
3482 SP = ORIGMARK;
3483 PUSHs(cat);
3484 RETURN;
3485}
a6ec74c1 3486
73cb7263
NC
3487/*
3488 * Local variables:
3489 * c-indentation-style: bsd
3490 * c-basic-offset: 4
3491 * indent-tabs-mode: t
3492 * End:
3493 *
edf815fd 3494 * vim: shiftwidth=4:
73cb7263 3495*/