This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Change my email, put real name of Pixel
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084
TH
121#define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
123
49704364
WL
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e
NC
185#ifdef PERL_PACK_CAN_SHRIEKSIGN
186#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
187#else
188#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
200# define DO_BO_UNPACK_PTR(var, type, pre_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast)
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
7212898e 232# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
7212898e 246# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e
NC
276# if PTRSIZE == INTSIZE
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279# elif PTRSIZE == LONGSIZE
280# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
282# else
283# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285# endif
1109a392 286
7212898e 287# if defined(my_htolen) && defined(my_letohn) && \
1109a392 288 defined(my_htoben) && defined(my_betohn)
7212898e 289# define DO_BO_UNPACK_N(var, type) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
294 default: break; \
295 } \
296 } STMT_END
297
7212898e 298# define DO_BO_PACK_N(var, type) \
1109a392 299 STMT_START { \
66c611c5 300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
303 default: break; \
304 } \
305 } STMT_END
7212898e
NC
306# else
307# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309# endif
310
f337b084 311#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 312
78d46eaa 313#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 314#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
315#define PACK_SIZE_MASK 0x3F
316
317
318struct packsize_t {
58c0bd94 319 const unsigned char *array;
78d46eaa
NC
320 int first;
321 int size;
322};
323
324#define PACK_SIZE_NORMAL 0
325#define PACK_SIZE_SHRIEKING 1
326
327/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
328 in). You're unlikely ever to need to regenerate them. */
329#if 'J'-'I' == 1
330/* ASCII */
331unsigned char size_normal[53] = {
332 /* C */ sizeof(unsigned char),
80a13697 333#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
78d46eaa 334 /* D */ LONG_DOUBLESIZE,
80a13697
NC
335#else
336 0,
337#endif
78d46eaa
NC
338 0,
339 /* F */ NVSIZE,
340 0, 0,
341 /* I */ sizeof(unsigned int),
342 /* J */ UVSIZE,
343 0,
344 /* L */ SIZE32,
345 0,
346 /* N */ SIZE32,
347 0, 0,
80a13697 348#if defined(HAS_QUAD)
78d46eaa 349 /* Q */ sizeof(Uquad_t),
80a13697
NC
350#else
351 0,
352#endif
78d46eaa
NC
353 0,
354 /* S */ SIZE16,
355 0,
f337b084 356 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
78d46eaa 357 /* V */ SIZE32,
f337b084 358 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
08ca2aa3 359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78d46eaa
NC
360 /* c */ sizeof(char),
361 /* d */ sizeof(double),
362 0,
363 /* f */ sizeof(float),
364 0, 0,
365 /* i */ sizeof(int),
366 /* j */ IVSIZE,
367 0,
368 /* l */ SIZE32,
369 0,
370 /* n */ SIZE16,
371 0,
a72c271b 372 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 373#if defined(HAS_QUAD)
78d46eaa 374 /* q */ sizeof(Quad_t),
80a13697
NC
375#else
376 0,
377#endif
78d46eaa
NC
378 0,
379 /* s */ SIZE16,
380 0, 0,
381 /* v */ SIZE16,
f337b084 382 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
78d46eaa
NC
383};
384unsigned char size_shrieking[46] = {
385 /* I */ sizeof(unsigned int),
386 0, 0,
387 /* L */ sizeof(unsigned long),
388 0,
7212898e 389#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 390 /* N */ SIZE32,
7212898e
NC
391#else
392 0,
393#endif
78d46eaa
NC
394 0, 0, 0, 0,
395 /* S */ sizeof(unsigned short),
396 0, 0,
7212898e 397#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 398 /* V */ SIZE32,
7212898e
NC
399#else
400 0,
401#endif
78d46eaa
NC
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* i */ sizeof(int),
404 0, 0,
405 /* l */ sizeof(long),
406 0,
7212898e 407#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 408 /* n */ SIZE16,
7212898e
NC
409#else
410 0,
411#endif
78d46eaa
NC
412 0, 0, 0, 0,
413 /* s */ sizeof(short),
414 0, 0,
7212898e 415#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 416 /* v */ SIZE16
7212898e
NC
417#else
418 0
419#endif
78d46eaa
NC
420};
421struct packsize_t packsize[2] = {
422 {size_normal, 67, 53},
423 {size_shrieking, 73, 46}
424};
425#else
426/* EBCDIC (or bust) */
08ca2aa3 427unsigned char size_normal[100] = {
78d46eaa
NC
428 /* c */ sizeof(char),
429 /* d */ sizeof(double),
430 0,
431 /* f */ sizeof(float),
432 0, 0,
433 /* i */ sizeof(int),
434 0, 0, 0, 0, 0, 0, 0,
435 /* j */ IVSIZE,
436 0,
437 /* l */ SIZE32,
438 0,
439 /* n */ SIZE16,
440 0,
a72c271b 441 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 442#if defined(HAS_QUAD)
78d46eaa 443 /* q */ sizeof(Quad_t),
80a13697
NC
444#else
445 0,
446#endif
78d46eaa
NC
447 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 /* s */ SIZE16,
449 0, 0,
450 /* v */ SIZE16,
f337b084 451 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
78d46eaa
NC
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0,
454 /* C */ sizeof(unsigned char),
80a13697 455#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
78d46eaa 456 /* D */ LONG_DOUBLESIZE,
80a13697
NC
457#else
458 0,
459#endif
78d46eaa
NC
460 0,
461 /* F */ NVSIZE,
462 0, 0,
463 /* I */ sizeof(unsigned int),
464 0, 0, 0, 0, 0, 0, 0,
465 /* J */ UVSIZE,
466 0,
467 /* L */ SIZE32,
468 0,
469 /* N */ SIZE32,
470 0, 0,
80a13697 471#if defined(HAS_QUAD)
78d46eaa 472 /* Q */ sizeof(Uquad_t),
80a13697
NC
473#else
474 0,
475#endif
78d46eaa
NC
476 0, 0, 0, 0, 0, 0, 0, 0, 0,
477 /* S */ SIZE16,
478 0,
f337b084 479 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
7212898e 480 /* V */ SIZE32,
f337b084 481 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
78d46eaa
NC
482};
483unsigned char size_shrieking[93] = {
484 /* i */ sizeof(int),
485 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 /* l */ sizeof(long),
487 0,
7212898e 488#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 489 /* n */ SIZE16,
7212898e
NC
490#else
491 0,
492#endif
78d46eaa
NC
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 /* s */ sizeof(short),
495 0, 0,
7212898e 496#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 497 /* v */ SIZE16,
7212898e
NC
498#else
499 0,
500#endif
78d46eaa
NC
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 /* I */ sizeof(unsigned int),
504 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 /* L */ sizeof(unsigned long),
506 0,
7212898e 507#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 508 /* N */ SIZE32,
7212898e
NC
509#else
510 0,
511#endif
78d46eaa
NC
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 /* S */ sizeof(unsigned short),
514 0, 0,
7212898e 515#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 516 /* V */ SIZE32
7212898e
NC
517#else
518 0
519#endif
78d46eaa
NC
520};
521struct packsize_t packsize[2] = {
08ca2aa3 522 {size_normal, 131, 100},
78d46eaa
NC
523 {size_shrieking, 137, 93}
524};
525#endif
526
08ca2aa3 527STATIC U8
f337b084 528uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
529{
530 UV val;
531 STRLEN retlen;
f337b084
TH
532 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
533 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
534 /* We try to process malformed UTF-8 as much as possible (preferrably with
535 warnings), but these two mean we make no progress in the string and
536 might enter an infinite loop */
537 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
538 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
539 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 540 if (val >= 0x100) {
f337b084 541 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
542 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
543 "Character in '%c' format wrapped in unpack",
f337b084 544 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
545 val &= 0xff;
546 }
547 *s += retlen;
548 return val;
549}
550
f337b084
TH
551#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
552 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
553 *(U8 *)(s)++)
554
555STATIC bool
f337b084 556uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
557{
558 UV val;
559 STRLEN retlen;
560 char *from = *s;
561 int bad = 0;
562 U32 flags = ckWARN(WARN_UTF8) ?
563 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
564 for (;buf_len > 0; buf_len--) {
565 if (from >= end) return FALSE;
f337b084 566 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
567 if (retlen == (STRLEN) -1 || retlen == 0) {
568 from += UTF8SKIP(from);
569 bad |= 1;
570 } else from += retlen;
571 if (val >= 0x100) {
572 bad |= 2;
573 val &= 0xff;
574 }
575 *(U8 *)buf++ = val;
576 }
577 /* We have enough characters for the buffer. Did we have problems ? */
578 if (bad) {
579 if (bad & 1) {
580 /* Rewalk the string fragment while warning */
581 char *ptr;
582 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
583 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
584 if (ptr >= end) break;
f337b084 585 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
586 }
587 if (from > end) from = end;
588 }
589 if ((bad & 2) && ckWARN(WARN_UNPACK))
f337b084
TH
590 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
591 WARN_PACK : WARN_UNPACK),
592 "Character(s) in '%c' format wrapped in %s",
593 (int) TYPE_NO_MODIFIERS(datumtype),
594 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
595 }
596 *s = from;
597 return TRUE;
598}
599
600STATIC bool
601next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
602{
603 UV val;
604 STRLEN retlen;
f337b084 605 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
606 if (val >= 0x100 || !ISUUCHAR(val) ||
607 retlen == (STRLEN) -1 || retlen == 0) {
608 *out = 0;
609 return FALSE;
610 }
611 *out = PL_uudmap[val] & 077;
f337b084 612 *s += retlen;
08ca2aa3
TH
613 return TRUE;
614}
78d46eaa 615
f337b084
TH
616STATIC void
617bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
618 U8 buffer[UTF8_MAXLEN];
619 U8 *end = start + len;
620 char *d = *dest;
621 while (start < end) {
622 int length =
623 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
624 switch(length) {
625 case 1:
626 *d++ = buffer[0];
627 break;
628 case 2:
629 *d++ = buffer[0];
630 *d++ = buffer[1];
631 break;
632 default:
633 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
634 *start, length);
635 }
636 start++;
637 }
638 *dest = d;
639}
640
641#define PUSH_BYTES(utf8, cur, buf, len) \
642STMT_START { \
643 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
644 else { \
645 Copy(buf, cur, len, char); \
646 (cur) += (len); \
647 } \
648} STMT_END
649
650#define GROWING(utf8, cat, start, cur, in_len) \
651STMT_START { \
652 STRLEN glen = (in_len); \
3473cf63 653 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
654 if ((cur) + glen >= (start) + SvLEN(cat)) { \
655 (start) = sv_exp_grow(aTHX_ cat, glen); \
656 (cur) = (start) + SvCUR(cat); \
657 } \
658} STMT_END
659
660#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
661STMT_START { \
662 STRLEN glen = (in_len); \
663 STRLEN gl = glen; \
3473cf63 664 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
665 if ((cur) + gl >= (start) + SvLEN(cat)) { \
666 *cur = '\0'; \
667 SvCUR(cat) = (cur) - (start); \
668 (start) = sv_exp_grow(aTHX_ cat, gl); \
669 (cur) = (start) + SvCUR(cat); \
670 } \
671 PUSH_BYTES(utf8, cur, buf, glen); \
672} STMT_END
673
674#define PUSH_BYTE(utf8, s, byte) \
675STMT_START { \
676 if (utf8) { \
677 U8 au8 = (byte); \
678 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
679 } else *(U8 *)(s)++ = (byte); \
680} STMT_END
681
682/* Only to be used inside a loop (see the break) */
683#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
684STMT_START { \
685 STRLEN retlen; \
686 if (str >= end) break; \
687 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
688 if (retlen == (STRLEN) -1 || retlen == 0) { \
689 *cur = '\0'; \
690 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
691 } \
692 str += retlen; \
693} STMT_END
694
206947d2 695/* Returns the sizeof() struct described by pat */
028d1f6d 696STATIC I32
f337b084 697S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 698{
f337b084 699 I32 total = 0;
206947d2 700
49704364 701 while (next_symbol(symptr)) {
f337b084
TH
702 I32 len;
703 int star, size;
704 int which = (symptr->code & TYPE_IS_SHRIEKING) ?
705 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
706 int offset = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
707
708 switch (symptr->howlen) {
49704364
WL
709 case e_star:
710 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
711 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
712 break;
f337b084
TH
713 default:
714 /* e_no_len and e_number */
715 len = symptr->length;
716 break;
49704364
WL
717 }
718
80a13697
NC
719 if ((offset >= 0) && (offset < packsize[which].size))
720 size = packsize[which].array[offset] & PACK_SIZE_MASK;
721 else
206947d2 722 size = 0;
78d46eaa 723
80a13697
NC
724 if (!size) {
725 /* endianness doesn't influence the size of a type */
726 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
727 default:
728 Perl_croak(aTHX_ "Invalid type '%c' in %s",
729 (int)TYPE_NO_MODIFIERS(symptr->code),
730 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
731 case '@':
732 case '/':
733 case 'U': /* XXXX Is it correct? */
734 case 'w':
735 case 'u':
736 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
737 (int)symptr->code,
738 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
739 case '%':
740 size = 0;
741 break;
742 case '(':
743 {
744 tempsym_t savsym = *symptr;
745 symptr->patptr = savsym.grpbeg;
746 symptr->patend = savsym.grpend;
747 /* XXXX Theoretically, we need to measure many times at
748 different positions, since the subexpression may contain
749 alignment commands, but be not of aligned length.
750 Need to detect this and croak(). */
751 size = measure_struct(symptr);
752 *symptr = savsym;
753 break;
754 }
755 case 'X' | TYPE_IS_SHRIEKING:
756 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
757 */
758 if (!len) /* Avoid division by 0 */
759 len = 1;
760 len = total % len; /* Assumed: the start is aligned. */
761 /* FALL THROUGH */
762 case 'X':
763 size = -1;
764 if (total < len)
765 Perl_croak(aTHX_ "'X' outside of string in %s",
766 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
767 break;
768 case 'x' | TYPE_IS_SHRIEKING:
769 if (!len) /* Avoid division by 0 */
770 len = 1;
771 star = total % len; /* Assumed: the start is aligned. */
772 if (star) /* Other portable ways? */
773 len = len - star;
774 else
775 len = 0;
776 /* FALL THROUGH */
777 case 'x':
778 case 'A':
779 case 'Z':
780 case 'a':
80a13697
NC
781 size = 1;
782 break;
783 case 'B':
784 case 'b':
785 len = (len + 7)/8;
786 size = 1;
787 break;
788 case 'H':
789 case 'h':
790 len = (len + 1)/2;
791 size = 1;
792 break;
78d46eaa 793
80a13697
NC
794 case 'P':
795 len = 1;
796 size = sizeof(char*);
78d46eaa
NC
797 break;
798 }
206947d2
IZ
799 }
800 total += len * size;
801 }
802 return total;
803}
804
49704364
WL
805
806/* locate matching closing parenthesis or bracket
807 * returns char pointer to char after match, or NULL
808 */
809STATIC char *
810S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 811{
49704364
WL
812 while (patptr < patend) {
813 char c = *patptr++;
814
815 if (isSPACE(c))
816 continue;
817 else if (c == ender)
818 return patptr-1;
819 else if (c == '#') {
820 while (patptr < patend && *patptr != '\n')
821 patptr++;
822 continue;
823 } else if (c == '(')
824 patptr = group_end(patptr, patend, ')') + 1;
825 else if (c == '[')
826 patptr = group_end(patptr, patend, ']') + 1;
18529408 827 }
49704364
WL
828 Perl_croak(aTHX_ "No group ending character '%c' found in template",
829 ender);
830 return 0;
18529408
IZ
831}
832
49704364
WL
833
834/* Convert unsigned decimal number to binary.
835 * Expects a pointer to the first digit and address of length variable
836 * Advances char pointer to 1st non-digit char and returns number
837 */
18529408 838STATIC char *
49704364
WL
839S_get_num(pTHX_ register char *patptr, I32 *lenptr )
840{
841 I32 len = *patptr++ - '0';
842 while (isDIGIT(*patptr)) {
843 if (len >= 0x7FFFFFFF/10)
844 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
845 len = (len * 10) + (*patptr++ - '0');
846 }
847 *lenptr = len;
848 return patptr;
849}
850
851/* The marvellous template parsing routine: Using state stored in *symptr,
852 * locates next template code and count
853 */
854STATIC bool
f337b084 855S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 856{
f337b084
TH
857 char* patptr = symptr->patptr;
858 char* patend = symptr->patend;
859 const char *allowed = "";
49704364
WL
860
861 symptr->flags &= ~FLAG_SLASH;
862
863 while (patptr < patend) {
864 if (isSPACE(*patptr))
865 patptr++;
866 else if (*patptr == '#') {
867 patptr++;
868 while (patptr < patend && *patptr != '\n')
869 patptr++;
870 if (patptr < patend)
871 patptr++;
872 } else {
873 /* We should have found a template code */
874 I32 code = *patptr++ & 0xFF;
66c611c5 875 U32 inherited_modifiers = 0;
49704364
WL
876
877 if (code == ','){ /* grandfather in commas but with a warning */
878 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
879 symptr->flags |= FLAG_COMMA;
880 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
881 "Invalid type ',' in %s",
882 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
883 }
884 continue;
885 }
886
887 /* for '(', skip to ')' */
888 if (code == '(') {
889 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
890 Perl_croak(aTHX_ "()-group starts with a count in %s",
891 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
892 symptr->grpbeg = patptr;
893 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
894 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
895 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
896 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
897 }
898
66c611c5
MHM
899 /* look for group modifiers to inherit */
900 if (TYPE_ENDIANNESS(symptr->flags)) {
901 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
902 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
903 }
904
1109a392
MHM
905 /* look for modifiers */
906 while (patptr < patend) {
1109a392
MHM
907 I32 modifier = 0;
908 switch (*patptr) {
909 case '!':
910 modifier = TYPE_IS_SHRIEKING;
7212898e 911 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 912 break;
7212898e 913#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
914 case '>':
915 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 916 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
917 break;
918 case '<':
919 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 920 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 921 break;
f337b084 922#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
923 default:
924 break;
925 }
66c611c5 926
1109a392
MHM
927 if (modifier == 0)
928 break;
66c611c5 929
1109a392
MHM
930 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
931 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
932 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
933
934 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
935 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
936 (int) TYPE_NO_MODIFIERS(code),
937 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
938 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
939 TYPE_ENDIANNESS_MASK)
940 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
941 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
942
1109a392
MHM
943 if (ckWARN(WARN_UNPACK)) {
944 if (code & modifier)
945 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
946 "Duplicate modifier '%c' after '%c' in %s",
947 *patptr, (int) TYPE_NO_MODIFIERS(code),
948 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
949 }
66c611c5 950
1109a392
MHM
951 code |= modifier;
952 patptr++;
49704364
WL
953 }
954
66c611c5
MHM
955 /* inherit modifiers */
956 code |= inherited_modifiers;
957
49704364
WL
958 /* look for count and/or / */
959 if (patptr < patend) {
960 if (isDIGIT(*patptr)) {
961 patptr = get_num( patptr, &symptr->length );
962 symptr->howlen = e_number;
963
964 } else if (*patptr == '*') {
965 patptr++;
966 symptr->howlen = e_star;
967
968 } else if (*patptr == '[') {
969 char* lenptr = ++patptr;
970 symptr->howlen = e_number;
971 patptr = group_end( patptr, patend, ']' ) + 1;
972 /* what kind of [] is it? */
973 if (isDIGIT(*lenptr)) {
974 lenptr = get_num( lenptr, &symptr->length );
975 if( *lenptr != ']' )
976 Perl_croak(aTHX_ "Malformed integer in [] in %s",
977 symptr->flags & FLAG_PACK ? "pack" : "unpack");
978 } else {
979 tempsym_t savsym = *symptr;
980 symptr->patend = patptr-1;
981 symptr->patptr = lenptr;
982 savsym.length = measure_struct(symptr);
983 *symptr = savsym;
984 }
985 } else {
986 symptr->howlen = e_no_len;
987 symptr->length = 1;
988 }
989
990 /* try to find / */
991 while (patptr < patend) {
992 if (isSPACE(*patptr))
993 patptr++;
994 else if (*patptr == '#') {
995 patptr++;
996 while (patptr < patend && *patptr != '\n')
997 patptr++;
998 if (patptr < patend)
999 patptr++;
1000 } else {
66c611c5 1001 if (*patptr == '/') {
49704364
WL
1002 symptr->flags |= FLAG_SLASH;
1003 patptr++;
66c611c5
MHM
1004 if (patptr < patend &&
1005 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
WL
1006 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1007 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1008 }
1009 break;
1010 }
18529408 1011 }
49704364
WL
1012 } else {
1013 /* at end - no count, no / */
1014 symptr->howlen = e_no_len;
1015 symptr->length = 1;
1016 }
1017
1018 symptr->code = code;
1019 symptr->patptr = patptr;
1020 return TRUE;
18529408 1021 }
49704364
WL
1022 }
1023 symptr->patptr = patptr;
1024 return FALSE;
18529408
IZ
1025}
1026
18529408 1027/*
08ca2aa3
TH
1028 There is no way to cleanly handle the case where we should process the
1029 string per byte in its upgraded form while it's really in downgraded form
1030 (e.g. estimates like strend-s as an upper bound for the number of
1031 characters left wouldn't work). So if we foresee the need of this
1032 (pattern starts with U or contains U0), we want to work on the encoded
1033 version of the string. Users are advised to upgrade their pack string
1034 themselves if they need to do a lot of unpacks like this on it
1035*/
1036STATIC bool
1037need_utf8(const char *pat, const char *patend)
1038{
1039 bool first = TRUE;
1040 while (pat < patend) {
1041 if (pat[0] == '#') {
1042 pat++;
1043 pat = memchr(pat, '\n', patend-pat);
1044 if (!pat) return FALSE;
1045 } else if (pat[0] == 'U') {
1046 if (first || pat[1] == '0') return TRUE;
1047 } else first = FALSE;
1048 pat++;
1049 }
1050 return FALSE;
1051}
1052
1053STATIC char
1054first_symbol(const char *pat, const char *patend) {
1055 while (pat < patend) {
1056 if (pat[0] != '#') return pat[0];
1057 pat++;
1058 pat = memchr(pat, '\n', patend-pat);
1059 if (!pat) return 0;
1060 pat++;
1061 }
1062 return 0;
1063}
1064
1065/*
18529408
IZ
1066=for apidoc unpack_str
1067
7accc089
JH
1068The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1069and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1070
1071=cut */
1072
1073I32
f337b084 1074Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1075{
49704364 1076 tempsym_t sym = { 0 };
08ca2aa3 1077
f337b084 1078 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1079 else if (need_utf8(pat, patend)) {
1080 /* We probably should try to avoid this in case a scalar context call
1081 wouldn't get to the "U0" */
1082 STRLEN len = strend - s;
f337b084 1083 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1084 SAVEFREEPV(s);
1085 strend = s + len;
f337b084 1086 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1087 }
1088
f337b084
TH
1089 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1090 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1091
49704364
WL
1092 sym.patptr = pat;
1093 sym.patend = patend;
1094 sym.flags = flags;
1095
1096 return unpack_rec(&sym, s, s, strend, NULL );
1097}
1098
7accc089
JH
1099/*
1100=for apidoc unpackstring
1101
608d3aed
WL
1102The engine implementing unpack() Perl function. C<unpackstring> puts the
1103extracted list items on the stack and returns the number of elements.
1104Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1105
1106=cut */
1107
1108I32
f337b084 1109Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1110{
1111 tempsym_t sym = { 0 };
08ca2aa3 1112
f337b084 1113 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1114 else if (need_utf8(pat, patend)) {
1115 /* We probably should try to avoid this in case a scalar context call
1116 wouldn't get to the "U0" */
1117 STRLEN len = strend - s;
f337b084 1118 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1119 SAVEFREEPV(s);
1120 strend = s + len;
f337b084 1121 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1122 }
1123
f337b084
TH
1124 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1125 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1126
7accc089
JH
1127 sym.patptr = pat;
1128 sym.patend = patend;
1129 sym.flags = flags;
1130
1131 return unpack_rec(&sym, s, s, strend, NULL );
1132}
1133
49704364
WL
1134STATIC
1135I32
08ca2aa3 1136S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1137{
a6ec74c1 1138 dSP;
18529408
IZ
1139 SV *sv;
1140 I32 start_sp_offset = SP - PL_stack_base;
49704364 1141 howlen_t howlen;
a6ec74c1 1142
a6ec74c1 1143 I32 checksum = 0;
92d41999 1144 UV cuv = 0;
a6ec74c1 1145 NV cdouble = 0.0;
f337b084 1146 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364
WL
1147 char* strrelbeg = s;
1148 bool beyond = FALSE;
21c16052 1149 bool explicit_length;
49704364 1150 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1151 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
49704364 1152
49704364 1153 while (next_symbol(symptr)) {
f337b084
TH
1154 I32 len, ai32;
1155 I32 datumtype = symptr->code;
206947d2 1156 /* do first one only unless in list context
08ca2aa3 1157 / is implemented by unpacking the count, then popping it from the
206947d2 1158 stack, so must check that we're not in the middle of a / */
49704364 1159 if ( unpack_only_one
206947d2 1160 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1161 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1162 break;
49704364 1163
f337b084 1164 switch (howlen = symptr->howlen) {
49704364
WL
1165 case e_star:
1166 len = strend - strbeg; /* long enough */
1167 break;
f337b084
TH
1168 default:
1169 /* e_no_len and e_number */
1170 len = symptr->length;
1171 break;
49704364 1172 }
18529408 1173
21c16052 1174 explicit_length = TRUE;
a6ec74c1 1175 redo_switch:
49704364 1176 beyond = s >= strend;
78d46eaa 1177 {
f337b084
TH
1178 struct packsize_t *pack_props =
1179 &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
1180 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
a72c271b 1181 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
f337b084 1182 int offset = rawtype - pack_props->first;
78d46eaa 1183
f337b084 1184 if (offset >= 0 && offset < pack_props->size) {
78d46eaa 1185 /* Data about this template letter */
f337b084 1186 unsigned char data = pack_props->array[offset];
78d46eaa
NC
1187
1188 if (data) {
1189 /* data nonzero means we can process this letter. */
1190 long size = data & PACK_SIZE_MASK;
1191 long howmany = (strend - s) / size;
1192 if (len > howmany)
1193 len = howmany;
1194
1195 if (!checksum || (data & PACK_SIZE_CANNOT_CSUM)) {
08ca2aa3 1196 if (len && unpack_only_one) len = 1;
78d46eaa
NC
1197 EXTEND(SP, len);
1198 EXTEND_MORTAL(len);
1199 }
1200 }
1201 }
1202 }
1109a392 1203 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1204 default:
1109a392 1205 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1206
a6ec74c1 1207 case '%':
49704364 1208 if (howlen == e_no_len)
18529408 1209 len = 16; /* len is not specified */
a6ec74c1 1210 checksum = len;
92d41999 1211 cuv = 0;
a6ec74c1 1212 cdouble = 0;
18529408 1213 continue;
a6ec74c1 1214 break;
18529408
IZ
1215 case '(':
1216 {
49704364 1217 tempsym_t savsym = *symptr;
66c611c5
MHM
1218 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1219 symptr->flags |= group_modifiers;
49704364
WL
1220 symptr->patend = savsym.grpend;
1221 symptr->level++;
18529408
IZ
1222 PUTBACK;
1223 while (len--) {
49704364 1224 symptr->patptr = savsym.grpbeg;
f337b084
TH
1225 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1226 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1227 unpack_rec(symptr, s, strbeg, strend, &s);
1228 if (s == strend && savsym.howlen == e_star)
49704364 1229 break; /* No way to continue */
18529408
IZ
1230 }
1231 SPAGAIN;
66c611c5 1232 symptr->flags &= ~group_modifiers;
49704364
WL
1233 savsym.flags = symptr->flags;
1234 *symptr = savsym;
18529408
IZ
1235 break;
1236 }
a6ec74c1 1237 case '@':
08ca2aa3
TH
1238 if (utf8) {
1239 s = strrelbeg;
1240 while (len > 0) {
1241 if (s >= strend)
1242 Perl_croak(aTHX_ "'@' outside of string in unpack");
1243 s += UTF8SKIP(s);
1244 len--;
1245 }
1246 if (s > strend)
1247 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1248 } else {
49704364
WL
1249 if (len > strend - strrelbeg)
1250 Perl_croak(aTHX_ "'@' outside of string in unpack");
1251 s = strrelbeg + len;
08ca2aa3 1252 }
a6ec74c1 1253 break;
62f95557
IZ
1254 case 'X' | TYPE_IS_SHRIEKING:
1255 if (!len) /* Avoid division by 0 */
1256 len = 1;
08ca2aa3
TH
1257 if (utf8) {
1258 char *hop, *last;
f337b084
TH
1259 I32 l = len;
1260 hop = last = strbeg;
1261 while (hop < s) {
1262 hop += UTF8SKIP(hop);
1263 if (--l == 0) {
08ca2aa3 1264 last = hop;
f337b084
TH
1265 l = len;
1266 }
08ca2aa3 1267 }
f337b084
TH
1268 if (last > s)
1269 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1270 s = last;
1271 break;
f337b084
TH
1272 }
1273 len = (s - strbeg) % len;
62f95557 1274 /* FALL THROUGH */
a6ec74c1 1275 case 'X':
08ca2aa3
TH
1276 if (utf8) {
1277 while (len > 0) {
1278 if (s <= strbeg)
1279 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1280 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1281 if (s <= strbeg)
1282 Perl_croak(aTHX_ "'X' outside of string in unpack");
1283 }
1284 len--;
1285 }
1286 } else {
a6ec74c1 1287 if (len > s - strbeg)
49704364 1288 Perl_croak(aTHX_ "'X' outside of string in unpack" );
a6ec74c1 1289 s -= len;
08ca2aa3 1290 }
a6ec74c1 1291 break;
62f95557
IZ
1292 case 'x' | TYPE_IS_SHRIEKING:
1293 if (!len) /* Avoid division by 0 */
1294 len = 1;
f337b084
TH
1295 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1296 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1297 if (ai32 == 0) break;
1298 len -= ai32;
62f95557 1299 /* FALL THROUGH */
a6ec74c1 1300 case 'x':
08ca2aa3
TH
1301 if (utf8) {
1302 while (len>0) {
1303 if (s >= strend)
1304 Perl_croak(aTHX_ "'x' outside of string in unpack");
1305 s += UTF8SKIP(s);
1306 len--;
1307 }
1308 } else {
a6ec74c1 1309 if (len > strend - s)
49704364 1310 Perl_croak(aTHX_ "'x' outside of string in unpack");
a6ec74c1 1311 s += len;
f337b084 1312 }
a6ec74c1
JH
1313 break;
1314 case '/':
49704364
WL
1315 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1316 break;
a6ec74c1
JH
1317 case 'A':
1318 case 'Z':
1319 case 'a':
08ca2aa3
TH
1320 if (checksum) {
1321 /* Preliminary length estimate is assumed done in 'W' */
1322 if (len > strend - s) len = strend - s;
1323 goto W_checksum;
1324 }
1325 if (utf8) {
1326 I32 l;
1327 char *hop;
1328 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1329 if (hop >= strend) {
1330 if (hop > strend)
1331 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1332 break;
a6ec74c1 1333 }
a6ec74c1 1334 }
08ca2aa3
TH
1335 if (hop > strend)
1336 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1337 len = hop - s;
1338 } else if (len > strend - s)
1339 len = strend - s;
1340
1341 if (datumtype == 'Z') {
1342 /* 'Z' strips stuff after first null */
f337b084
TH
1343 char *ptr, *end;
1344 end = s + len;
1345 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1346 sv = newSVpvn(s, ptr-s);
1347 if (howlen == e_star) /* exact for 'Z*' */
1348 len = ptr-s + (ptr != strend ? 1 : 0);
1349 } else if (datumtype == 'A') {
1350 /* 'A' strips both nulls and spaces */
1351 char *ptr;
1352 for (ptr = s+len-1; ptr >= s; ptr--)
1353 if (*ptr != 0 && !isSPACE(*ptr)) break;
1354 ptr++;
1355 sv = newSVpvn(s, ptr-s);
1356 } else sv = newSVpvn(s, len);
1357
1358 if (utf8) {
1359 SvUTF8_on(sv);
1360 /* Undo any upgrade done due to need_utf8() */
f337b084 1361 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1362 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1363 }
1364 XPUSHs(sv_2mortal(sv));
08ca2aa3 1365 s += len;
a6ec74c1
JH
1366 break;
1367 case 'B':
08ca2aa3
TH
1368 case 'b': {
1369 char *str;
49704364 1370 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1371 len = (strend - s) * 8;
1372 if (checksum) {
1373 if (!PL_bitcount) {
08ca2aa3 1374 int bits;
a6ec74c1
JH
1375 Newz(601, PL_bitcount, 256, char);
1376 for (bits = 1; bits < 256; bits++) {
1377 if (bits & 1) PL_bitcount[bits]++;
1378 if (bits & 2) PL_bitcount[bits]++;
1379 if (bits & 4) PL_bitcount[bits]++;
1380 if (bits & 8) PL_bitcount[bits]++;
1381 if (bits & 16) PL_bitcount[bits]++;
1382 if (bits & 32) PL_bitcount[bits]++;
1383 if (bits & 64) PL_bitcount[bits]++;
1384 if (bits & 128) PL_bitcount[bits]++;
1385 }
1386 }
f337b084 1387 if (utf8)
08ca2aa3 1388 while (len >= 8 && s < strend) {
f337b084 1389 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1390 len -= 8;
1391 }
f337b084 1392 else
a6ec74c1 1393 while (len >= 8) {
08ca2aa3 1394 cuv += PL_bitcount[*(U8 *)s++];
a6ec74c1
JH
1395 len -= 8;
1396 }
08ca2aa3
TH
1397 if (len && s < strend) {
1398 U8 bits;
f337b084
TH
1399 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1400 if (datumtype == 'b')
a6ec74c1 1401 while (len-- > 0) {
92d41999 1402 if (bits & 1) cuv++;
a6ec74c1
JH
1403 bits >>= 1;
1404 }
f337b084 1405 else
a6ec74c1 1406 while (len-- > 0) {
08ca2aa3 1407 if (bits & 0x80) cuv++;
a6ec74c1
JH
1408 bits <<= 1;
1409 }
1410 }
a6ec74c1
JH
1411 break;
1412 }
08ca2aa3
TH
1413
1414 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1415 SvPOK_on(sv);
1416 str = SvPVX(sv);
1417 if (datumtype == 'b') {
f337b084 1418 U8 bits = 0;
08ca2aa3
TH
1419 ai32 = len;
1420 for (len = 0; len < ai32; len++) {
1421 if (len & 7) bits >>= 1;
1422 else if (utf8) {
1423 if (s >= strend) break;
f337b084 1424 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1425 } else bits = *(U8 *) s++;
1426 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1427 }
08ca2aa3 1428 } else {
f337b084 1429 U8 bits = 0;
08ca2aa3
TH
1430 ai32 = len;
1431 for (len = 0; len < ai32; len++) {
1432 if (len & 7) bits <<= 1;
1433 else if (utf8) {
1434 if (s >= strend) break;
f337b084 1435 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1436 } else bits = *(U8 *) s++;
1437 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1438 }
1439 }
1440 *str = '\0';
08ca2aa3
TH
1441 SvCUR_set(sv, str - SvPVX(sv));
1442 XPUSHs(sv);
a6ec74c1 1443 break;
08ca2aa3 1444 }
a6ec74c1 1445 case 'H':
08ca2aa3
TH
1446 case 'h': {
1447 char *str;
1448 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1449 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1450 len = (strend - s) * 2;
08ca2aa3 1451 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1452 SvPOK_on(sv);
1453 str = SvPVX(sv);
1454 if (datumtype == 'h') {
f337b084 1455 U8 bits = 0;
08ca2aa3
TH
1456 ai32 = len;
1457 for (len = 0; len < ai32; len++) {
1458 if (len & 1) bits >>= 4;
1459 else if (utf8) {
1460 if (s >= strend) break;
f337b084 1461 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1462 } else bits = * (U8 *) s++;
a6ec74c1
JH
1463 *str++ = PL_hexdigit[bits & 15];
1464 }
08ca2aa3 1465 } else {
f337b084 1466 U8 bits = 0;
08ca2aa3
TH
1467 ai32 = len;
1468 for (len = 0; len < ai32; len++) {
1469 if (len & 1) bits <<= 4;
1470 else if (utf8) {
1471 if (s >= strend) break;
f337b084 1472 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1473 } else bits = *(U8 *) s++;
a6ec74c1
JH
1474 *str++ = PL_hexdigit[(bits >> 4) & 15];
1475 }
1476 }
1477 *str = '\0';
08ca2aa3
TH
1478 SvCUR_set(sv, str - SvPVX(sv));
1479 XPUSHs(sv);
a6ec74c1 1480 break;
08ca2aa3 1481 }
a6ec74c1 1482 case 'c':
73cb7263 1483 while (len-- > 0) {
f337b084 1484 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1485 if (aint >= 128) /* fake up signed chars */
1486 aint -= 256;
08ca2aa3 1487 if (!checksum)
ac7f3b1b 1488 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1489 else if (checksum > bits_in_uv)
1490 cdouble += (NV)aint;
1491 else
1492 cuv += aint;
a6ec74c1
JH
1493 }
1494 break;
1495 case 'C':
08ca2aa3
TH
1496 case 'W':
1497 W_checksum:
35bcd338 1498 if (len == 0) {
08ca2aa3
TH
1499 if (explicit_length && datumtype == 'C')
1500 /* Switch to "character" mode */
f337b084 1501 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1502 break;
1503 }
08ca2aa3 1504 if (datumtype == 'C' ?
f337b084
TH
1505 (symptr->flags & FLAG_DO_UTF8) &&
1506 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1507 while (len-- > 0 && s < strend) {
1508 UV val;
1509 STRLEN retlen;
f337b084
TH
1510 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1511 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1512 if (retlen == (STRLEN) -1 || retlen == 0)
1513 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1514 s += retlen;
1515 if (!checksum)
1516 PUSHs(sv_2mortal(newSVuv((UV) val)));
1517 else if (checksum > bits_in_uv)
1518 cdouble += (NV) val;
d6d3e8bd 1519 else
08ca2aa3 1520 cuv += val;
a6ec74c1 1521 }
08ca2aa3 1522 } else if (!checksum)
a6ec74c1 1523 while (len-- > 0) {
08ca2aa3
TH
1524 U8 ch = *(U8 *) s++;
1525 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1526 }
08ca2aa3
TH
1527 else if (checksum > bits_in_uv)
1528 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1529 else
1530 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1531 break;
1532 case 'U':
35bcd338 1533 if (len == 0) {
08ca2aa3
TH
1534 if (explicit_length) {
1535 /* Switch to "bytes in UTF-8" mode */
f337b084 1536 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1537 else
1538 /* Should be impossible due to the need_utf8() test */
1539 Perl_croak(aTHX_ "U0 mode on a byte string");
1540 }
35bcd338
JH
1541 break;
1542 }
08ca2aa3 1543 if (len > strend - s) len = strend - s;
73cb7263 1544 if (!checksum) {
08ca2aa3
TH
1545 if (len && unpack_only_one) len = 1;
1546 EXTEND(SP, len);
1547 EXTEND_MORTAL(len);
a6ec74c1 1548 }
08ca2aa3
TH
1549 while (len-- > 0 && s < strend) {
1550 STRLEN retlen;
1551 UV auv;
1552 if (utf8) {
1553 U8 result[UTF8_MAXLEN];
1554 char *ptr;
1555 STRLEN len;
1556 ptr = s;
1557 /* Bug: warns about bad utf8 even if we are short on bytes
1558 and will break out of the loop */
f337b084 1559 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
08ca2aa3
TH
1560 break;
1561 len = UTF8SKIP(result);
f337b084
TH
1562 if (!uni_to_bytes(aTHX_ &ptr, strend,
1563 &result[1], len-1, 'U')) break;
08ca2aa3
TH
1564 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1565 s = ptr;
1566 } else {
1567 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1568 if (retlen == (STRLEN) -1 || retlen == 0)
1569 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1570 s += retlen;
1571 }
1572 if (!checksum)
1573 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1574 else if (checksum > bits_in_uv)
08ca2aa3 1575 cdouble += (NV) auv;
73cb7263 1576 else
08ca2aa3 1577 cuv += auv;
a6ec74c1
JH
1578 }
1579 break;
49704364
WL
1580 case 's' | TYPE_IS_SHRIEKING:
1581#if SHORTSIZE != SIZE16
73cb7263 1582 while (len-- > 0) {
08ca2aa3 1583 short ashort;
f337b084
TH
1584 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1585 DO_BO_UNPACK(ashort, s);
08ca2aa3 1586 if (!checksum)
ac7f3b1b 1587 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1588 else if (checksum > bits_in_uv)
1589 cdouble += (NV)ashort;
1590 else
1591 cuv += ashort;
49704364
WL
1592 }
1593 break;
1594#else
1595 /* Fallthrough! */
a6ec74c1 1596#endif
49704364 1597 case 's':
73cb7263 1598 while (len-- > 0) {
08ca2aa3
TH
1599 I16 ai16;
1600
1601#if U16SIZE > SIZE16
1602 ai16 = 0;
1603#endif
f337b084 1604 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1605 DO_BO_UNPACK(ai16, 16);
1109a392 1606#if U16SIZE > SIZE16
73cb7263
NC
1607 if (ai16 > 32767)
1608 ai16 -= 65536;
a6ec74c1 1609#endif
08ca2aa3 1610 if (!checksum)
ac7f3b1b 1611 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1612 else if (checksum > bits_in_uv)
1613 cdouble += (NV)ai16;
1614 else
1615 cuv += ai16;
a6ec74c1
JH
1616 }
1617 break;
49704364
WL
1618 case 'S' | TYPE_IS_SHRIEKING:
1619#if SHORTSIZE != SIZE16
73cb7263 1620 while (len-- > 0) {
08ca2aa3 1621 unsigned short aushort;
f337b084
TH
1622 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1623 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1624 if (!checksum)
1625 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1626 else if (checksum > bits_in_uv)
1627 cdouble += (NV)aushort;
1628 else
1629 cuv += aushort;
49704364
WL
1630 }
1631 break;
1632#else
1633 /* Fallhrough! */
1634#endif
a6ec74c1
JH
1635 case 'v':
1636 case 'n':
1637 case 'S':
73cb7263 1638 while (len-- > 0) {
08ca2aa3
TH
1639 U16 au16;
1640#if U16SIZE > SIZE16
1641 au16 = 0;
1642#endif
f337b084 1643 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1644 DO_BO_UNPACK(au16, 16);
a6ec74c1 1645#ifdef HAS_NTOHS
73cb7263
NC
1646 if (datumtype == 'n')
1647 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1648#endif
1649#ifdef HAS_VTOHS
73cb7263
NC
1650 if (datumtype == 'v')
1651 au16 = vtohs(au16);
a6ec74c1 1652#endif
08ca2aa3
TH
1653 if (!checksum)
1654 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1655 else if (checksum > bits_in_uv)
f337b084 1656 cdouble += (NV) au16;
73cb7263
NC
1657 else
1658 cuv += au16;
a6ec74c1
JH
1659 }
1660 break;
7212898e 1661#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1662 case 'v' | TYPE_IS_SHRIEKING:
1663 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1664 while (len-- > 0) {
08ca2aa3
TH
1665 I16 ai16;
1666# if U16SIZE > SIZE16
1667 ai16 = 0;
1668# endif
f337b084 1669 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1670# ifdef HAS_NTOHS
73cb7263 1671 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1672 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1673# endif /* HAS_NTOHS */
1674# ifdef HAS_VTOHS
73cb7263 1675 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1676 ai16 = (I16) vtohs((U16) ai16);
1677# endif /* HAS_VTOHS */
1678 if (!checksum)
ac7f3b1b 1679 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1680 else if (checksum > bits_in_uv)
08ca2aa3 1681 cdouble += (NV) ai16;
73cb7263
NC
1682 else
1683 cuv += ai16;
068bd2e7
MHM
1684 }
1685 break;
08ca2aa3 1686#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1687 case 'i':
49704364 1688 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1689 while (len-- > 0) {
08ca2aa3 1690 int aint;
f337b084
TH
1691 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1692 DO_BO_UNPACK(aint, i);
08ca2aa3 1693 if (!checksum)
ac7f3b1b 1694 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV)aint;
1697 else
1698 cuv += aint;
a6ec74c1
JH
1699 }
1700 break;
1701 case 'I':
49704364 1702 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1703 while (len-- > 0) {
08ca2aa3 1704 unsigned int auint;
f337b084
TH
1705 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1706 DO_BO_UNPACK(auint, i);
08ca2aa3 1707 if (!checksum)
ac7f3b1b 1708 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)auint;
1711 else
1712 cuv += auint;
a6ec74c1
JH
1713 }
1714 break;
92d41999 1715 case 'j':
73cb7263 1716 while (len-- > 0) {
08ca2aa3 1717 IV aiv;
f337b084 1718 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1719#if IVSIZE == INTSIZE
f337b084 1720 DO_BO_UNPACK(aiv, i);
1109a392 1721#elif IVSIZE == LONGSIZE
f337b084 1722 DO_BO_UNPACK(aiv, l);
1109a392 1723#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1724 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1725#else
1726 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1727#endif
08ca2aa3 1728 if (!checksum)
ac7f3b1b 1729 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1730 else if (checksum > bits_in_uv)
1731 cdouble += (NV)aiv;
1732 else
1733 cuv += aiv;
92d41999
JH
1734 }
1735 break;
1736 case 'J':
73cb7263 1737 while (len-- > 0) {
08ca2aa3 1738 UV auv;
f337b084 1739 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1740#if IVSIZE == INTSIZE
f337b084 1741 DO_BO_UNPACK(auv, i);
08ca2aa3 1742#elif IVSIZE == LONGSIZE
f337b084 1743 DO_BO_UNPACK(auv, l);
08ca2aa3 1744#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1745 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1746#else
1747 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1748#endif
08ca2aa3 1749 if (!checksum)
ac7f3b1b 1750 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1751 else if (checksum > bits_in_uv)
1752 cdouble += (NV)auv;
1753 else
1754 cuv += auv;
92d41999
JH
1755 }
1756 break;
49704364
WL
1757 case 'l' | TYPE_IS_SHRIEKING:
1758#if LONGSIZE != SIZE32
73cb7263 1759 while (len-- > 0) {
08ca2aa3 1760 long along;
f337b084
TH
1761 SHIFT_VAR(utf8, s, strend, along, datumtype);
1762 DO_BO_UNPACK(along, l);
08ca2aa3 1763 if (!checksum)
ac7f3b1b 1764 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1765 else if (checksum > bits_in_uv)
1766 cdouble += (NV)along;
1767 else
1768 cuv += along;
49704364
WL
1769 }
1770 break;
1771#else
1772 /* Fallthrough! */
a6ec74c1 1773#endif
49704364 1774 case 'l':
73cb7263 1775 while (len-- > 0) {
08ca2aa3
TH
1776 I32 ai32;
1777#if U32SIZE > SIZE32
1778 ai32 = 0;
1779#endif
f337b084 1780 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1781 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1782#if U32SIZE > SIZE32
08ca2aa3 1783 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1784#endif
08ca2aa3 1785 if (!checksum)
ac7f3b1b 1786 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1787 else if (checksum > bits_in_uv)
1788 cdouble += (NV)ai32;
1789 else
1790 cuv += ai32;
a6ec74c1
JH
1791 }
1792 break;
49704364
WL
1793 case 'L' | TYPE_IS_SHRIEKING:
1794#if LONGSIZE != SIZE32
73cb7263 1795 while (len-- > 0) {
08ca2aa3 1796 unsigned long aulong;
f337b084
TH
1797 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1798 DO_BO_UNPACK(aulong, l);
08ca2aa3 1799 if (!checksum)
ac7f3b1b 1800 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1801 else if (checksum > bits_in_uv)
1802 cdouble += (NV)aulong;
1803 else
1804 cuv += aulong;
49704364
WL
1805 }
1806 break;
1807#else
1808 /* Fall through! */
1809#endif
a6ec74c1
JH
1810 case 'V':
1811 case 'N':
1812 case 'L':
73cb7263 1813 while (len-- > 0) {
08ca2aa3
TH
1814 U32 au32;
1815#if U32SIZE > SIZE32
1816 au32 = 0;
1817#endif
f337b084 1818 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1819 DO_BO_UNPACK(au32, 32);
a6ec74c1 1820#ifdef HAS_NTOHL
73cb7263
NC
1821 if (datumtype == 'N')
1822 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1823#endif
1824#ifdef HAS_VTOHL
73cb7263
NC
1825 if (datumtype == 'V')
1826 au32 = vtohl(au32);
a6ec74c1 1827#endif
08ca2aa3 1828 if (!checksum)
ac7f3b1b 1829 PUSHs(sv_2mortal(newSVuv((UV)au32)));
73cb7263
NC
1830 else if (checksum > bits_in_uv)
1831 cdouble += (NV)au32;
1832 else
1833 cuv += au32;
a6ec74c1
JH
1834 }
1835 break;
7212898e 1836#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1837 case 'V' | TYPE_IS_SHRIEKING:
1838 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1839 while (len-- > 0) {
08ca2aa3
TH
1840 I32 ai32;
1841# if U32SIZE > SIZE32
1842 ai32 = 0;
1843# endif
f337b084 1844 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1845# ifdef HAS_NTOHL
73cb7263
NC
1846 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1847 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1848# endif
1849# ifdef HAS_VTOHL
73cb7263
NC
1850 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1851 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1852# endif
1853 if (!checksum)
ac7f3b1b 1854 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)ai32;
1857 else
1858 cuv += ai32;
068bd2e7
MHM
1859 }
1860 break;
08ca2aa3 1861#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1862 case 'p':
a6ec74c1 1863 while (len-- > 0) {
08ca2aa3 1864 char *aptr;
f337b084 1865 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
5512a2f9 1866 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1867 /* newSVpv generates undef if aptr is NULL */
1868 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1869 }
1870 break;
1871 case 'w':
a6ec74c1
JH
1872 {
1873 UV auv = 0;
1874 U32 bytes = 0;
1875
08ca2aa3
TH
1876 while (len > 0 && s < strend) {
1877 U8 ch;
f337b084 1878 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1879 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1880 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1881 if (ch < 0x80) {
a6ec74c1 1882 bytes = 0;
ac7f3b1b 1883 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1884 len--;
1885 auv = 0;
08ca2aa3 1886 continue;
a6ec74c1 1887 }
08ca2aa3 1888 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1889 char *t;
1890 STRLEN n_a;
1891
1892 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1893 while (s < strend) {
f337b084 1894 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1895 sv = mul128(sv, (U8)(ch & 0x7f));
1896 if (!(ch & 0x80)) {
a6ec74c1
JH
1897 bytes = 0;
1898 break;
1899 }
1900 }
1901 t = SvPV(sv, n_a);
1902 while (*t == '0')
1903 t++;
1904 sv_chop(sv, t);
1905 PUSHs(sv_2mortal(sv));
1906 len--;
1907 auv = 0;
1908 }
1909 }
1910 if ((s >= strend) && bytes)
49704364 1911 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1912 }
1913 break;
1914 case 'P':
49704364
WL
1915 if (symptr->howlen == e_star)
1916 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1917 EXTEND(SP, 1);
08ca2aa3
TH
1918 if (sizeof(char*) <= strend - s) {
1919 char *aptr;
f337b084 1920 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
08ca2aa3 1921 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1922 /* newSVpvn generates undef if aptr is NULL */
1923 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1924 }
a6ec74c1
JH
1925 break;
1926#ifdef HAS_QUAD
1927 case 'q':
73cb7263 1928 while (len-- > 0) {
08ca2aa3 1929 Quad_t aquad;
f337b084
TH
1930 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1931 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
1932 if (!checksum)
1933 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 1934 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
1935 else if (checksum > bits_in_uv)
1936 cdouble += (NV)aquad;
1937 else
1938 cuv += aquad;
1939 }
a6ec74c1
JH
1940 break;
1941 case 'Q':
73cb7263 1942 while (len-- > 0) {
08ca2aa3 1943 Uquad_t auquad;
f337b084
TH
1944 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1945 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
1946 if (!checksum)
1947 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1948 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
1949 else if (checksum > bits_in_uv)
1950 cdouble += (NV)auquad;
1951 else
1952 cuv += auquad;
a6ec74c1
JH
1953 }
1954 break;
08ca2aa3 1955#endif /* HAS_QUAD */
a6ec74c1
JH
1956 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1957 case 'f':
73cb7263 1958 while (len-- > 0) {
08ca2aa3 1959 float afloat;
f337b084 1960 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1961 DO_BO_UNPACK_N(afloat, float);
1962 if (!checksum)
1963 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1964 else
73cb7263
NC
1965 cdouble += afloat;
1966 }
a6ec74c1
JH
1967 break;
1968 case 'd':
73cb7263 1969 while (len-- > 0) {
08ca2aa3 1970 double adouble;
f337b084 1971 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1972 DO_BO_UNPACK_N(adouble, double);
1973 if (!checksum)
1974 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
1975 else
73cb7263
NC
1976 cdouble += adouble;
1977 }
a6ec74c1 1978 break;
92d41999 1979 case 'F':
73cb7263 1980 while (len-- > 0) {
08ca2aa3 1981 NV anv;
f337b084 1982 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
1983 DO_BO_UNPACK_N(anv, NV);
1984 if (!checksum)
1985 PUSHs(sv_2mortal(newSVnv(anv)));
1986 else
73cb7263
NC
1987 cdouble += anv;
1988 }
92d41999
JH
1989 break;
1990#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1991 case 'D':
73cb7263 1992 while (len-- > 0) {
08ca2aa3 1993 long double aldouble;
f337b084 1994 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
1995 DO_BO_UNPACK_N(aldouble, long double);
1996 if (!checksum)
1997 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
1998 else
1999 cdouble += aldouble;
92d41999
JH
2000 }
2001 break;
2002#endif
a6ec74c1
JH
2003 case 'u':
2004 /* MKS:
2005 * Initialise the decode mapping. By using a table driven
2006 * algorithm, the code will be character-set independent
2007 * (and just as fast as doing character arithmetic)
2008 */
2009 if (PL_uudmap['M'] == 0) {
2010 int i;
2011
2012 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2013 PL_uudmap[(U8)PL_uuemap[i]] = i;
2014 /*
2015 * Because ' ' and '`' map to the same value,
2016 * we need to decode them both the same.
2017 */
2018 PL_uudmap[' '] = 0;
2019 }
08ca2aa3
TH
2020 {
2021 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2022 sv = sv_2mortal(NEWSV(42, l));
2023 if (l) SvPOK_on(sv);
2024 }
2025 if (utf8) {
2026 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2027 I32 a, b, c, d;
2028 char hunk[4];
2029
2030 hunk[3] = '\0';
2031 while (len > 0) {
2032 next_uni_uu(aTHX_ &s, strend, &a);
2033 next_uni_uu(aTHX_ &s, strend, &b);
2034 next_uni_uu(aTHX_ &s, strend, &c);
2035 next_uni_uu(aTHX_ &s, strend, &d);
2036 hunk[0] = (char)((a << 2) | (b >> 4));
2037 hunk[1] = (char)((b << 4) | (c >> 2));
2038 hunk[2] = (char)((c << 6) | d);
2039 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2040 len -= 3;
2041 }
2042 if (s < strend) {
2043 if (*s == '\n') s++;
2044 else {
2045 /* possible checksum byte */
2046 char *skip = s+UTF8SKIP(s);
2047 if (skip < strend && *skip == '\n') s = skip+1;
2048 }
2049 }
2050 }
2051 } else {
a6ec74c1
JH
2052 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2053 I32 a, b, c, d;
2054 char hunk[4];
2055
2056 hunk[3] = '\0';
2057 len = PL_uudmap[*(U8*)s++] & 077;
2058 while (len > 0) {
2059 if (s < strend && ISUUCHAR(*s))
2060 a = PL_uudmap[*(U8*)s++] & 077;
2061 else
2062 a = 0;
2063 if (s < strend && ISUUCHAR(*s))
2064 b = PL_uudmap[*(U8*)s++] & 077;
2065 else
2066 b = 0;
2067 if (s < strend && ISUUCHAR(*s))
2068 c = PL_uudmap[*(U8*)s++] & 077;
2069 else
2070 c = 0;
2071 if (s < strend && ISUUCHAR(*s))
2072 d = PL_uudmap[*(U8*)s++] & 077;
2073 else
2074 d = 0;
eb160463
GS
2075 hunk[0] = (char)((a << 2) | (b >> 4));
2076 hunk[1] = (char)((b << 4) | (c >> 2));
2077 hunk[2] = (char)((c << 6) | d);
a6ec74c1
JH
2078 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2079 len -= 3;
2080 }
2081 if (*s == '\n')
2082 s++;
92aa5668
JH
2083 else /* possible checksum byte */
2084 if (s + 1 < strend && s[1] == '\n')
2085 s += 2;
a6ec74c1 2086 }
08ca2aa3
TH
2087 }
2088 XPUSHs(sv);
a6ec74c1
JH
2089 break;
2090 }
49704364 2091
a6ec74c1 2092 if (checksum) {
1109a392 2093 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2094 (checksum > bits_in_uv &&
08ca2aa3
TH
2095 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2096 NV trouble, anv;
a6ec74c1 2097
08ca2aa3 2098 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2099 while (checksum >= 16) {
2100 checksum -= 16;
08ca2aa3 2101 anv *= 65536.0;
a6ec74c1 2102 }
a6ec74c1 2103 while (cdouble < 0.0)
08ca2aa3
TH
2104 cdouble += anv;
2105 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2106 sv = newSVnv(cdouble);
a6ec74c1
JH
2107 }
2108 else {
fa8ec7c1
NC
2109 if (checksum < bits_in_uv) {
2110 UV mask = ((UV)1 << checksum) - 1;
92d41999 2111 cuv &= mask;
a6ec74c1 2112 }
c4c5f44a 2113 sv = newSVuv(cuv);
a6ec74c1
JH
2114 }
2115 XPUSHs(sv_2mortal(sv));
2116 checksum = 0;
2117 }
49704364
WL
2118
2119 if (symptr->flags & FLAG_SLASH){
2120 if (SP - PL_stack_base - start_sp_offset <= 0)
2121 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2122 if( next_symbol(symptr) ){
2123 if( symptr->howlen == e_number )
2124 Perl_croak(aTHX_ "Count after length/code in unpack" );
2125 if( beyond ){
2126 /* ...end of char buffer then no decent length available */
2127 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2128 } else {
2129 /* take top of stack (hope it's numeric) */
2130 len = POPi;
2131 if( len < 0 )
2132 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2133 }
2134 } else {
2135 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2136 }
2137 datumtype = symptr->code;
21c16052 2138 explicit_length = FALSE;
49704364
WL
2139 goto redo_switch;
2140 }
a6ec74c1 2141 }
49704364 2142
18529408
IZ
2143 if (new_s)
2144 *new_s = s;
2145 PUTBACK;
2146 return SP - PL_stack_base - start_sp_offset;
2147}
2148
2149PP(pp_unpack)
2150{
2151 dSP;
bab9c0ac 2152 dPOPPOPssrl;
18529408
IZ
2153 I32 gimme = GIMME_V;
2154 STRLEN llen;
2155 STRLEN rlen;
08ca2aa3
TH
2156 char *pat = SvPV(left, llen);
2157 char *s = SvPV(right, rlen);
18529408 2158 char *strend = s + rlen;
08ca2aa3
TH
2159 char *patend = pat + llen;
2160 I32 cnt;
18529408
IZ
2161
2162 PUTBACK;
7accc089 2163 cnt = unpackstring(pat, patend, s, strend,
49704364 2164 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2165 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2166
18529408
IZ
2167 SPAGAIN;
2168 if ( !cnt && gimme == G_SCALAR )
2169 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2170 RETURN;
2171}
2172
f337b084
TH
2173STATIC U8 *
2174doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2175{
f337b084 2176 *h++ = PL_uuemap[len];
a6ec74c1 2177 while (len > 2) {
f337b084
TH
2178 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2179 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2180 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2181 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2182 s += 3;
2183 len -= 3;
2184 }
2185 if (len > 0) {
2186 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2187 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2188 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2189 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2190 *h++ = PL_uuemap[0];
a6ec74c1 2191 }
f337b084
TH
2192 *h++ = '\n';
2193 return h;
a6ec74c1
JH
2194}
2195
2196STATIC SV *
2197S_is_an_int(pTHX_ char *s, STRLEN l)
2198{
2199 STRLEN n_a;
2200 SV *result = newSVpvn(s, l);
2201 char *result_c = SvPV(result, n_a); /* convenience */
2202 char *out = result_c;
2203 bool skip = 1;
2204 bool ignore = 0;
2205
2206 while (*s) {
2207 switch (*s) {
2208 case ' ':
2209 break;
2210 case '+':
2211 if (!skip) {
2212 SvREFCNT_dec(result);
2213 return (NULL);
2214 }
2215 break;
2216 case '0':
2217 case '1':
2218 case '2':
2219 case '3':
2220 case '4':
2221 case '5':
2222 case '6':
2223 case '7':
2224 case '8':
2225 case '9':
2226 skip = 0;
2227 if (!ignore) {
2228 *(out++) = *s;
2229 }
2230 break;
2231 case '.':
2232 ignore = 1;
2233 break;
2234 default:
2235 SvREFCNT_dec(result);
2236 return (NULL);
2237 }
2238 s++;
2239 }
2240 *(out++) = '\0';
2241 SvCUR_set(result, out - result_c);
2242 return (result);
2243}
2244
2245/* pnum must be '\0' terminated */
2246STATIC int
2247S_div128(pTHX_ SV *pnum, bool *done)
2248{
2249 STRLEN len;
2250 char *s = SvPV(pnum, len);
2251 int m = 0;
2252 int r = 0;
2253 char *t = s;
2254
2255 *done = 1;
2256 while (*t) {
2257 int i;
2258
2259 i = m * 10 + (*t - '0');
2260 m = i & 0x7F;
2261 r = (i >> 7); /* r < 10 */
2262 if (r) {
2263 *done = 0;
2264 }
2265 *(t++) = '0' + r;
2266 }
2267 *(t++) = '\0';
2268 SvCUR_set(pnum, (STRLEN) (t - s));
2269 return (m);
2270}
2271
49704364 2272
a6ec74c1 2273
18529408
IZ
2274/*
2275=for apidoc pack_cat
2276
7accc089
JH
2277The engine implementing pack() Perl function. Note: parameters next_in_list and
2278flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2279
2280=cut */
2281
49704364 2282
18529408
IZ
2283void
2284Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2285{
49704364
WL
2286 tempsym_t sym = { 0 };
2287 sym.patptr = pat;
2288 sym.patend = patend;
7accc089
JH
2289 sym.flags = FLAG_PACK;
2290
2291 (void)pack_rec( cat, &sym, beglist, endlist );
2292}
2293
2294
2295/*
2296=for apidoc packlist
2297
2298The engine implementing pack() Perl function.
2299
2300=cut */
2301
2302
2303void
2304Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2305{
f337b084 2306 STRLEN no_len;
7accc089 2307 tempsym_t sym = { 0 };
f337b084 2308
7accc089
JH
2309 sym.patptr = pat;
2310 sym.patend = patend;
2311 sym.flags = FLAG_PACK;
49704364 2312
f337b084
TH
2313 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2314 Also make sure any UTF8 flag is loaded */
2315 SvPV_force(cat, no_len);
2316 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2317
49704364
WL
2318 (void)pack_rec( cat, &sym, beglist, endlist );
2319}
2320
f337b084
TH
2321/* like sv_utf8_upgrade, but also repoint the group start markers */
2322STATIC void
2323marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2324 STRLEN len;
2325 tempsym_t *group;
2326 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2327
2328 if (SvUTF8(sv)) return;
2329
2330 from_start = SvPVX(sv);
2331 from_end = from_start + SvCUR(sv);
2332 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2333 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2334 if (from_ptr == from_end) {
2335 /* Simple case: no character needs to be changed */
2336 SvUTF8_on(sv);
2337 return;
2338 }
2339
3473cf63 2340 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2341 New('U', to_start, len, char);
2342 Copy(from_start, to_start, from_ptr-from_start, char);
2343 to_ptr = to_start + (from_ptr-from_start);
2344
2345 New('U', marks, sym_ptr->level+2, char *);
2346 for (group=sym_ptr; group; group = group->previous)
2347 marks[group->level] = from_start + group->strbeg;
2348 marks[sym_ptr->level+1] = from_end+1;
2349 for (m = marks; *m < from_ptr; m++)
2350 *m = to_start + (*m-from_start);
2351
2352 for (;from_ptr < from_end; from_ptr++) {
2353 while (*m == from_ptr) *m++ = to_ptr;
2354 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2355 }
2356 *to_ptr = 0;
2357
2358 while (*m == from_ptr) *m++ = to_ptr;
2359 if (m != marks + sym_ptr->level+1) {
2360 Safefree(marks);
2361 Safefree(to_start);
2362 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2363 }
2364 for (group=sym_ptr; group; group = group->previous)
2365 group->strbeg = marks[group->level] - to_start;
2366 Safefree(marks);
2367
2368 if (SvOOK(sv)) {
2369 if (SvIVX(sv)) {
2370 SvLEN(sv) += SvIVX(sv);
2371 from_start -= SvIVX(sv);
2372 SvIV_set(sv, 0);
2373 }
2374 SvFLAGS(sv) &= ~SVf_OOK;
2375 }
2376 if (SvLEN(sv) != 0)
2377 Safefree(from_start);
2378 SvPVX(sv) = to_start;
2379 SvCUR(sv) = to_ptr - to_start;
2380 SvLEN(sv) = len;
2381 SvUTF8_on(sv);
2382}
2383
2384/* Exponential string grower. Makes string extension effectively O(n)
2385 needed says how many extra bytes we need (not counting the final '\0')
2386 Only grows the string if there is an actual lack of space
2387*/
2388STATIC char *
2389sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2390 STRLEN cur = SvCUR(sv);
2391 STRLEN len = SvLEN(sv);
2392 STRLEN extend;
2393 if (len - cur > needed) return SvPVX(sv);
2394 extend = needed > len ? needed : len;
2395 return SvGROW(sv, len+extend+1);
2396}
49704364
WL
2397
2398STATIC
2399SV **
f337b084 2400S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2401{
49704364 2402 tempsym_t lookahead;
f337b084
TH
2403 I32 items = endlist - beglist;
2404 bool found = next_symbol(symptr);
2405 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2406
2407 if (symptr->level == 0 && found && symptr->code == 'U') {
2408 marked_upgrade(aTHX_ cat, symptr);
2409 symptr->flags |= FLAG_DO_UTF8;
2410 utf8 = 0;
49704364 2411 }
f337b084 2412 symptr->strbeg = SvCUR(cat);
49704364
WL
2413
2414 while (found) {
f337b084
TH
2415 SV *fromstr;
2416 STRLEN fromlen;
2417 I32 len;
a6ec74c1 2418 SV *lengthcode = Nullsv;
49704364 2419 I32 datumtype = symptr->code;
f337b084
TH
2420 howlen_t howlen = symptr->howlen;
2421 char *start = SvPVX(cat);
2422 char *cur = start + SvCUR(cat);
49704364 2423
f337b084
TH
2424#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2425
2426 switch (howlen) {
49704364 2427 case e_star:
f337b084
TH
2428 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2429 0 : items;
2430 break;
2431 default:
2432 /* e_no_len and e_number */
2433 len = symptr->length;
49704364
WL
2434 break;
2435 }
2436
f337b084
TH
2437 if (len) {
2438 struct packsize_t *pack_props =
2439 &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
2440 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
2441 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
2442 int offset = rawtype - pack_props->first;
2443
2444 if (offset >= 0 && offset < pack_props->size) {
2445 /* Data about this template letter */
2446 unsigned char data = pack_props->array[offset];
2447
2448 if (data && !(data & PACK_SIZE_UNPREDICTABLE)) {
2449 /* We can process this letter. */
2450 STRLEN size = data & PACK_SIZE_MASK;
2451 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2452 }
2453 }
2454
2455 }
2456
49704364
WL
2457 /* Look ahead for next symbol. Do we have code/code? */
2458 lookahead = *symptr;
2459 found = next_symbol(&lookahead);
2460 if ( symptr->flags & FLAG_SLASH ) {
f337b084 2461 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
49704364
WL
2462 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2463 e_star != lookahead.howlen )
2464 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
f337b084
TH
2465 lengthcode =
2466 sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
a6ec74c1 2467 }
49704364 2468
f337b084
TH
2469 /* Code inside the switch must take care to properly update
2470 cat (CUR length and '\0' termination) if it updated *cur and
2471 doesn't simply leave using break */
1109a392 2472 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2473 default:
f337b084
TH
2474 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2475 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2476 case '%':
49704364 2477 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2478 case '@':
f337b084
TH
2479 if (utf8) {
2480 char *s = start + symptr->strbeg;
2481 while (len > 0 && s < cur) {
2482 s += UTF8SKIP(s);
2483 len--;
2484 }
2485 if (s > cur)
2486 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2487 if (len > 0) {
2488 grow:
2489 GROWING(0, cat, start, cur, len);
2490 Zero(cur, len, char);
2491 cur += len;
2492 } else if (s < cur) cur = s;
2493 else goto no_change;
2494 } else {
2495 len -= cur - (start+symptr->strbeg);
2496 if (len > 0) goto grow;
a6ec74c1 2497 len = -len;
f337b084
TH
2498 if (len > 0) goto shrink;
2499 else goto no_change;
2500 }
a6ec74c1 2501 break;
f337b084 2502 case '(': {
49704364 2503 tempsym_t savsym = *symptr;
66c611c5
MHM
2504 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2505 symptr->flags |= group_modifiers;
49704364
WL
2506 symptr->patend = savsym.grpend;
2507 symptr->level++;
f337b084 2508 symptr->previous = &lookahead;
18529408 2509 while (len--) {
f337b084
TH
2510 U32 was_utf8;
2511 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2512 else symptr->flags &= ~FLAG_PARSE_UTF8;
2513 was_utf8 = SvUTF8(cat);
49704364 2514 symptr->patptr = savsym.grpbeg;
f337b084
TH
2515 beglist = pack_rec(cat, symptr, beglist, endlist);
2516 if (SvUTF8(cat) != was_utf8)
2517 /* This had better be an upgrade while in utf8==0 mode */
2518 utf8 = 1;
2519
49704364 2520 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2521 break; /* No way to continue */
2522 }
f337b084
TH
2523 lookahead.flags = symptr->flags & ~group_modifiers;
2524 goto no_change;
18529408 2525 }
62f95557
IZ
2526 case 'X' | TYPE_IS_SHRIEKING:
2527 if (!len) /* Avoid division by 0 */
2528 len = 1;
f337b084
TH
2529 if (utf8) {
2530 char *hop, *last;
2531 I32 l = len;
2532 hop = last = start;
2533 while (hop < cur) {
2534 hop += UTF8SKIP(hop);
2535 if (--l == 0) {
2536 last = hop;
2537 l = len;
2538 }
2539 }
2540 if (last > cur)
2541 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2542 cur = last;
2543 break;
2544 }
2545 len = (cur-start) % len;
62f95557 2546 /* FALL THROUGH */
a6ec74c1 2547 case 'X':
f337b084
TH
2548 if (utf8) {
2549 if (len < 1) goto no_change;
2550 while (len > 0) {
2551 if (cur <= start)
2552 Perl_croak(aTHX_ "'X' outside of string in pack");
2553 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2554 if (cur <= start)
2555 Perl_croak(aTHX_ "'X' outside of string in pack");
2556 }
2557 len--;
2558 }
2559 } else {
a6ec74c1 2560 shrink:
f337b084 2561 if (cur - start < len)
49704364 2562 Perl_croak(aTHX_ "'X' outside of string in pack");
f337b084
TH
2563 cur -= len;
2564 }
2565 if (cur < start+symptr->strbeg) {
2566 /* Make sure group starts don't point into the void */
2567 tempsym_t *group;
2568 STRLEN length = cur-start;
2569 for (group = symptr;
2570 group && length < group->strbeg;
2571 group = group->previous) group->strbeg = length;
2572 lookahead.strbeg = length;
2573 }
a6ec74c1 2574 break;
f337b084
TH
2575 case 'x' | TYPE_IS_SHRIEKING: {
2576 I32 ai32;
62f95557
IZ
2577 if (!len) /* Avoid division by 0 */
2578 len = 1;
f337b084
TH
2579 if (utf8) ai32 = utf8_length(start, cur) % len;
2580 else ai32 = (cur - start) % len;
2581 if (ai32 == 0) goto no_change;
2582 len -= ai32;
2583 }
62f95557 2584 /* FALL THROUGH */
a6ec74c1 2585 case 'x':
f337b084 2586 goto grow;
a6ec74c1
JH
2587 case 'A':
2588 case 'Z':
f337b084
TH
2589 case 'a': {
2590 char *aptr;
2591
a6ec74c1
JH
2592 fromstr = NEXTFROM;
2593 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2594 if (DO_UTF8(fromstr)) {
2595 char *end, *s;
2596
2597 if (!utf8 && !SvUTF8(cat)) {
2598 marked_upgrade(aTHX_ cat, symptr);
2599 lookahead.flags |= FLAG_DO_UTF8;
2600 lookahead.strbeg = symptr->strbeg;
2601 utf8 = 1;
2602 start = SvPVX(cat);
2603 cur = start + SvCUR(cat);
2604 }
49704364 2605 if (howlen == e_star) {
f337b084
TH
2606 if (utf8) goto string_copy;
2607 len = fromlen+1;
2608 }
2609 s = aptr;
2610 end = aptr + fromlen;
2611 fromlen = datumtype == 'Z' ? len-1 : len;
2612 while ((I32) fromlen > 0 && s < end) {
2613 s += UTF8SKIP(s);
2614 fromlen--;
2615 }
2616 if (s > end)
2617 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2618 if (utf8) {
a6ec74c1 2619 len = fromlen;
f337b084
TH
2620 if (datumtype == 'Z') len++;
2621 fromlen = s-aptr;
2622 len += fromlen;
2623
2624 goto string_copy;
2625 }
2626 fromlen = len - fromlen;
2627 if (datumtype == 'Z') fromlen--;
2628 if (howlen == e_star) {
2629 len = fromlen;
2630 if (datumtype == 'Z') len++;
a6ec74c1 2631 }
f337b084
TH
2632 GROWING(0, cat, start, cur, len);
2633 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2634 datumtype | TYPE_IS_PACK))
2635 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2636 cur += fromlen;
a6ec74c1 2637 len -= fromlen;
f337b084
TH
2638 } else if (utf8) {
2639 if (howlen == e_star) {
2640 len = fromlen;
2641 if (datumtype == 'Z') len++;
a6ec74c1 2642 }
f337b084
TH
2643 if (len <= (I32) fromlen) {
2644 fromlen = len;
2645 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2646 }
3473cf63
RGS
2647 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2648 upgrade, so:
2649 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2650 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2651 len -= fromlen;
2652 while (fromlen > 0) {
2653 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2654 aptr++;
2655 fromlen--;
a6ec74c1 2656 }
f337b084
TH
2657 } else {
2658 string_copy:
2659 if (howlen == e_star) {
2660 len = fromlen;
2661 if (datumtype == 'Z') len++;
2662 }
2663 if (len <= (I32) fromlen) {
2664 fromlen = len;
2665 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2666 }
f337b084
TH
2667 GROWING(0, cat, start, cur, len);
2668 Copy(aptr, cur, fromlen, char);
2669 cur += fromlen;
2670 len -= fromlen;
a6ec74c1 2671 }
f337b084
TH
2672 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2673 cur += len;
a6ec74c1 2674 break;
f337b084 2675 }
a6ec74c1 2676 case 'B':
f337b084
TH
2677 case 'b': {
2678 char *str, *end;
2679 I32 l, field_len;
2680 U8 bits;
2681 bool utf8_source;
2682 U32 utf8_flags;
a6ec74c1
JH
2683
2684 fromstr = NEXTFROM;
a6ec74c1 2685 str = SvPV(fromstr, fromlen);
f337b084
TH
2686 end = str + fromlen;
2687 if (DO_UTF8(fromstr)) {
2688 utf8_source = TRUE;
2689 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2690 } else {
2691 utf8_source = FALSE;
2692 utf8_flags = 0; /* Unused, but keep compilers happy */
2693 }
2694 if (howlen == e_star) len = fromlen;
2695 field_len = (len+7)/8;
2696 GROWING(utf8, cat, start, cur, field_len);
2697 if (len > (I32)fromlen) len = fromlen;
2698 bits = 0;
2699 l = 0;
2700 if (datumtype == 'B')
2701 while (l++ < len) {
2702 if (utf8_source) {
2703 UV val;
2704 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2705 bits |= val & 1;
2706 } else bits |= *str++ & 1;
2707 if (l & 7) bits <<= 1;
a6ec74c1 2708 else {
f337b084
TH
2709 PUSH_BYTE(utf8, cur, bits);
2710 bits = 0;
a6ec74c1
JH
2711 }
2712 }
f337b084
TH
2713 else
2714 /* datumtype == 'b' */
2715 while (l++ < len) {
2716 if (utf8_source) {
2717 UV val;
2718 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2719 if (val & 1) bits |= 0x80;
2720 } else if (*str++ & 1)
2721 bits |= 0x80;
2722 if (l & 7) bits >>= 1;
a6ec74c1 2723 else {
f337b084
TH
2724 PUSH_BYTE(utf8, cur, bits);
2725 bits = 0;
a6ec74c1
JH
2726 }
2727 }
f337b084
TH
2728 l--;
2729 if (l & 7) {
a6ec74c1 2730 if (datumtype == 'B')
f337b084 2731 bits <<= 7 - (l & 7);
a6ec74c1 2732 else
f337b084
TH
2733 bits >>= 7 - (l & 7);
2734 PUSH_BYTE(utf8, cur, bits);
2735 l += 7;
a6ec74c1 2736 }
f337b084
TH
2737 /* Determine how many chars are left in the requested field */
2738 l /= 8;
2739 if (howlen == e_star) field_len = 0;
2740 else field_len -= l;
2741 Zero(cur, field_len, char);
2742 cur += field_len;
a6ec74c1 2743 break;
f337b084 2744 }
a6ec74c1 2745 case 'H':
f337b084
TH
2746 case 'h': {
2747 char *str, *end;
2748 I32 l, field_len;
2749 U8 bits;
2750 bool utf8_source;
2751 U32 utf8_flags;
a6ec74c1
JH
2752
2753 fromstr = NEXTFROM;
a6ec74c1 2754 str = SvPV(fromstr, fromlen);
f337b084
TH
2755 end = str + fromlen;
2756 if (DO_UTF8(fromstr)) {
2757 utf8_source = TRUE;
2758 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2759 } else {
2760 utf8_source = FALSE;
2761 utf8_flags = 0; /* Unused, but keep compilers happy */
2762 }
2763 if (howlen == e_star) len = fromlen;
2764 field_len = (len+1)/2;
2765 GROWING(utf8, cat, start, cur, field_len);
2766 if (!utf8 && len > (I32)fromlen) len = fromlen;
2767 bits = 0;
2768 l = 0;
2769 if (datumtype == 'H')
2770 while (l++ < len) {
2771 if (utf8_source) {
2772 UV val;
2773 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2774 if (val < 256 && isALPHA(val))
2775 bits |= (val + 9) & 0xf;
a6ec74c1 2776 else
f337b084
TH
2777 bits |= val & 0xf;
2778 } else if (isALPHA(*str))
2779 bits |= (*str++ + 9) & 0xf;
2780 else
2781 bits |= *str++ & 0xf;
2782 if (l & 1) bits <<= 4;
a6ec74c1 2783 else {
f337b084
TH
2784 PUSH_BYTE(utf8, cur, bits);
2785 bits = 0;
a6ec74c1
JH
2786 }
2787 }
f337b084
TH
2788 else
2789 while (l++ < len) {
2790 if (utf8_source) {
2791 UV val;
2792 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2793 if (val < 256 && isALPHA(val))
2794 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2795 else
f337b084
TH
2796 bits |= (val & 0xf) << 4;
2797 } else if (isALPHA(*str))
2798 bits |= ((*str++ + 9) & 0xf) << 4;
2799 else
2800 bits |= (*str++ & 0xf) << 4;
2801 if (l & 1) bits >>= 4;
2802 else {
2803 PUSH_BYTE(utf8, cur, bits);
2804 bits = 0;
a6ec74c1
JH
2805 }
2806 }
f337b084
TH
2807 l--;
2808 if (l & 1) {
2809 PUSH_BYTE(utf8, cur, bits);
2810 l++;
2811 }
2812 /* Determine how many chars are left in the requested field */
2813 l /= 2;
2814 if (howlen == e_star) field_len = 0;
2815 else field_len -= l;
2816 Zero(cur, field_len, char);
2817 cur += field_len;
2818 break;
a6ec74c1 2819 }
f337b084
TH
2820 case 'c':
2821 while (len-- > 0) {
2822 IV aiv;
2823 fromstr = NEXTFROM;
2824 aiv = SvIV(fromstr);
2825 if ((-128 > aiv || aiv > 127) &&
2826 ckWARN(WARN_PACK))
2827 Perl_warner(aTHX_ packWARN(WARN_PACK),
2828 "Character in 'c' format wrapped in pack");
2829 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2830 }
2831 break;
2832 case 'C':
f337b084
TH
2833 if (len == 0) {
2834 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2835 break;
2836 }
2837 GROWING(0, cat, start, cur, len);
a6ec74c1 2838 while (len-- > 0) {
f337b084 2839 IV aiv;
a6ec74c1 2840 fromstr = NEXTFROM;
f337b084
TH
2841 aiv = SvIV(fromstr);
2842 if ((0 > aiv || aiv > 0xff) &&
a6ec74c1 2843 ckWARN(WARN_PACK))
9014280d 2844 Perl_warner(aTHX_ packWARN(WARN_PACK),
49704364 2845 "Character in 'C' format wrapped in pack");
f337b084
TH
2846 *cur++ = aiv & 0xff;
2847 }
a6ec74c1 2848 break;
f337b084
TH
2849 case 'W': {
2850 char *end;
2851 U8 in_bytes = IN_BYTES;
2852
2853 end = start+SvLEN(cat)-1;
2854 if (utf8) end -= UTF8_MAXLEN-1;
2855 while (len-- > 0) {
2856 UV auv;
2857 fromstr = NEXTFROM;
2858 auv = SvUV(fromstr);
2859 if (in_bytes) auv = auv % 0x100;
2860 if (utf8) {
2861 W_utf8:
2862 if (cur > end) {
2863 *cur = '\0';
2864 SvCUR(cat) = cur - start;
2865
2866 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2867 end = start+SvLEN(cat)-UTF8_MAXLEN;
2868 }
2869 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2870 ckWARN(WARN_UTF8) ?
2871 0 : UNICODE_ALLOW_ANY);
2872 } else {
2873 if (auv >= 0x100) {
2874 if (!SvUTF8(cat)) {
2875 *cur = '\0';
2876 SvCUR(cat) = cur - start;
2877 marked_upgrade(aTHX_ cat, symptr);
2878 lookahead.flags |= FLAG_DO_UTF8;
2879 lookahead.strbeg = symptr->strbeg;
2880 utf8 = 1;
2881 start = SvPVX(cat);
2882 cur = start + SvCUR(cat);
2883 end = start+SvLEN(cat)-UTF8_MAXLEN;
2884 goto W_utf8;
2885 }
2886 if (ckWARN(WARN_PACK))
9014280d 2887 Perl_warner(aTHX_ packWARN(WARN_PACK),
f337b084
TH
2888 "Character in 'W' format wrapped in pack");
2889 auv &= 0xff;
2890 }
2891 if (cur >= end) {
2892 *cur = '\0';
2893 SvCUR(cat) = cur - start;
2894 GROWING(0, cat, start, cur, len+1);
2895 end = start+SvLEN(cat)-1;
2896 }
2897 *(U8 *) cur++ = auv;
a6ec74c1
JH
2898 }
2899 }
2900 break;
f337b084
TH
2901 }
2902 case 'U': {
2903 char *end;
2904
2905 if (len == 0) {
2906 if (!(symptr->flags & FLAG_DO_UTF8)) {
2907 marked_upgrade(aTHX_ cat, symptr);
2908 lookahead.flags |= FLAG_DO_UTF8;
2909 lookahead.strbeg = symptr->strbeg;
2910 }
2911 utf8 = 0;
2912 goto no_change;
2913 }
2914
2915 end = start+SvLEN(cat);
2916 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2917 while (len-- > 0) {
f337b084 2918 UV auv;
a6ec74c1 2919 fromstr = NEXTFROM;
f337b084
TH
2920 auv = SvUV(fromstr);
2921 if (utf8) {
c0ffc3dc
RGS
2922 char buffer[UTF8_MAXLEN], *endb;
2923 endb = uvuni_to_utf8_flags(buffer, auv,
f337b084
TH
2924 ckWARN(WARN_UTF8) ?
2925 0 : UNICODE_ALLOW_ANY);
3473cf63 2926 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
f337b084
TH
2927 *cur = '\0';
2928 SvCUR(cat) = cur - start;
3473cf63
RGS
2929 GROWING(0, cat, start, cur,
2930 len+(endb-buffer)*UTF8_EXPAND);
2931 end = start+SvLEN(cat);
f337b084 2932 }
c0ffc3dc 2933 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
f337b084
TH
2934 } else {
2935 if (cur >= end) {
2936 *cur = '\0';
2937 SvCUR(cat) = cur - start;
2938 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2939 end = start+SvLEN(cat)-UTF8_MAXLEN;
2940 }
2941 cur = uvuni_to_utf8_flags(cur, auv,
52ea3e69 2942 ckWARN(WARN_UTF8) ?
f337b084
TH
2943 0 : UNICODE_ALLOW_ANY);
2944 }
a6ec74c1 2945 }
a6ec74c1 2946 break;
f337b084 2947 }
a6ec74c1
JH
2948 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2949 case 'f':
a6ec74c1 2950 while (len-- > 0) {
f337b084
TH
2951 float afloat;
2952 NV anv;
a6ec74c1 2953 fromstr = NEXTFROM;
f337b084 2954 anv = SvNV(fromstr);
5cdb9e01 2955#ifdef __VOS__
f337b084 2956 /* VOS does not automatically map a floating-point overflow
5cdb9e01
PG
2957 during conversion from double to float into infinity, so we
2958 do it by hand. This code should either be generalized for
2959 any OS that needs it, or removed if and when VOS implements
2960 posix-976 (suggestion to support mapping to infinity).
2961 Paul.Green@stratus.com 02-04-02. */
f337b084 2962 if (anv > FLT_MAX)
5cdb9e01 2963 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 2964 else if (anv < -FLT_MAX)
5cdb9e01 2965 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
2966 else afloat = (float) anv;
2967#else /* __VOS__ */
baf3cf9c 2968# if defined(VMS) && !defined(__IEEE_FP)
f337b084 2969 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
baf3cf9c
CB
2970 * on Alpha; fake it if we don't have them.
2971 */
f337b084 2972 if (anv > FLT_MAX)
baf3cf9c 2973 afloat = FLT_MAX;
f337b084 2974 else if (anv < -FLT_MAX)
baf3cf9c 2975 afloat = -FLT_MAX;
f337b084 2976 else afloat = (float)anv;
baf3cf9c 2977# else
f337b084 2978 afloat = (float)anv;
baf3cf9c 2979# endif
f337b084 2980#endif /* __VOS__ */
1109a392 2981 DO_BO_PACK_N(afloat, float);
f337b084 2982 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2983 }
2984 break;
2985 case 'd':
a6ec74c1 2986 while (len-- > 0) {
f337b084
TH
2987 double adouble;
2988 NV anv;
a6ec74c1 2989 fromstr = NEXTFROM;
f337b084 2990 anv = SvNV(fromstr);
5cdb9e01 2991#ifdef __VOS__
f337b084 2992 /* VOS does not automatically map a floating-point overflow
5cdb9e01
PG
2993 during conversion from long double to double into infinity,
2994 so we do it by hand. This code should either be generalized
2995 for any OS that needs it, or removed if and when VOS
2996 implements posix-976 (suggestion to support mapping to
2997 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 2998 if (anv > DBL_MAX)
5cdb9e01 2999 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3000 else if (anv < -DBL_MAX)
5cdb9e01 3001 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3002 else adouble = (double) anv;
3003#else /* __VOS__ */
baf3cf9c 3004# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3005 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
baf3cf9c
CB
3006 * on Alpha; fake it if we don't have them.
3007 */
f337b084 3008 if (anv > DBL_MAX)
baf3cf9c 3009 adouble = DBL_MAX;
f337b084 3010 else if (anv < -DBL_MAX)
baf3cf9c 3011 adouble = -DBL_MAX;
f337b084 3012 else adouble = (double)anv;
baf3cf9c 3013# else
f337b084 3014 adouble = (double)anv;
baf3cf9c 3015# endif
f337b084 3016#endif /* __VOS__ */
1109a392 3017 DO_BO_PACK_N(adouble, double);
f337b084 3018 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3019 }
3020 break;
f337b084
TH
3021 case 'F': {
3022 NV anv;
1109a392 3023 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3024 while (len-- > 0) {
3025 fromstr = NEXTFROM;
3026 anv = SvNV(fromstr);
1109a392 3027 DO_BO_PACK_N(anv, NV);
f337b084 3028 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3029 }
3030 break;
f337b084 3031 }
92d41999 3032#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
f337b084
TH
3033 case 'D': {
3034 long double aldouble;
1109a392
MHM
3035 /* long doubles can have unused bits, which may be nonzero */
3036 Zero(&aldouble, 1, long double);
92d41999
JH
3037 while (len-- > 0) {
3038 fromstr = NEXTFROM;
3039 aldouble = (long double)SvNV(fromstr);
1109a392 3040 DO_BO_PACK_N(aldouble, long double);
f337b084 3041 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3042 }
3043 break;
f337b084 3044 }
92d41999 3045#endif
7212898e 3046#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3047 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3048#endif
a6ec74c1
JH
3049 case 'n':
3050 while (len-- > 0) {
f337b084 3051 I16 ai16;
a6ec74c1 3052 fromstr = NEXTFROM;
ef108786 3053 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3054#ifdef HAS_HTONS
ef108786 3055 ai16 = PerlSock_htons(ai16);
a6ec74c1 3056#endif
f337b084 3057 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3058 }
3059 break;
7212898e 3060#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3061 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3062#endif
a6ec74c1
JH
3063 case 'v':
3064 while (len-- > 0) {
f337b084 3065 I16 ai16;
a6ec74c1 3066 fromstr = NEXTFROM;
ef108786 3067 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3068#ifdef HAS_HTOVS
ef108786 3069 ai16 = htovs(ai16);
a6ec74c1 3070#endif
f337b084 3071 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3072 }
3073 break;
49704364 3074 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3075#if SHORTSIZE != SIZE16
a6ec74c1 3076 while (len-- > 0) {
f337b084 3077 unsigned short aushort;
a6ec74c1
JH
3078 fromstr = NEXTFROM;
3079 aushort = SvUV(fromstr);
1109a392 3080 DO_BO_PACK(aushort, s);
f337b084 3081 PUSH_VAR(utf8, cur, aushort);
49704364
WL
3082 }
3083 break;
3084#else
3085 /* Fall through! */
a6ec74c1 3086#endif
49704364 3087 case 'S':
a6ec74c1 3088 while (len-- > 0) {
f337b084 3089 U16 au16;
a6ec74c1 3090 fromstr = NEXTFROM;
ef108786
MHM
3091 au16 = (U16)SvUV(fromstr);
3092 DO_BO_PACK(au16, 16);
f337b084 3093 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3094 }
3095 break;
49704364 3096 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3097#if SHORTSIZE != SIZE16
a6ec74c1 3098 while (len-- > 0) {
f337b084 3099 short ashort;
a6ec74c1
JH
3100 fromstr = NEXTFROM;
3101 ashort = SvIV(fromstr);
1109a392 3102 DO_BO_PACK(ashort, s);
f337b084 3103 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3104 }
49704364
WL
3105 break;
3106#else
3107 /* Fall through! */
a6ec74c1 3108#endif
49704364
WL
3109 case 's':
3110 while (len-- > 0) {
f337b084 3111 I16 ai16;
49704364 3112 fromstr = NEXTFROM;
ef108786
MHM
3113 ai16 = (I16)SvIV(fromstr);
3114 DO_BO_PACK(ai16, 16);
f337b084 3115 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3116 }
3117 break;
3118 case 'I':
49704364 3119 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3120 while (len-- > 0) {
f337b084 3121 unsigned int auint;
a6ec74c1
JH
3122 fromstr = NEXTFROM;
3123 auint = SvUV(fromstr);
1109a392 3124 DO_BO_PACK(auint, i);
f337b084 3125 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3126 }
3127 break;
92d41999
JH
3128 case 'j':
3129 while (len-- > 0) {
f337b084 3130 IV aiv;
92d41999
JH
3131 fromstr = NEXTFROM;
3132 aiv = SvIV(fromstr);
1109a392
MHM
3133#if IVSIZE == INTSIZE
3134 DO_BO_PACK(aiv, i);
3135#elif IVSIZE == LONGSIZE
3136 DO_BO_PACK(aiv, l);
3137#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3138 DO_BO_PACK(aiv, 64);
f337b084
TH
3139#else
3140 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3141#endif
f337b084 3142 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3143 }
3144 break;
3145 case 'J':
3146 while (len-- > 0) {
f337b084 3147 UV auv;
92d41999
JH
3148 fromstr = NEXTFROM;
3149 auv = SvUV(fromstr);
1109a392
MHM
3150#if UVSIZE == INTSIZE
3151 DO_BO_PACK(auv, i);
3152#elif UVSIZE == LONGSIZE
3153 DO_BO_PACK(auv, l);
3154#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3155 DO_BO_PACK(auv, 64);
f337b084
TH
3156#else
3157 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3158#endif
f337b084 3159 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3160 }
3161 break;
a6ec74c1
JH
3162 case 'w':
3163 while (len-- > 0) {
f337b084 3164 NV anv;
a6ec74c1 3165 fromstr = NEXTFROM;
15e9f109 3166 anv = SvNV(fromstr);
a6ec74c1 3167
f337b084
TH
3168 if (anv < 0) {
3169 *cur = '\0';
3170 SvCUR(cat) = cur - start;
49704364 3171 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3172 }
a6ec74c1 3173
196b62db
NC
3174 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3175 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3176 any negative IVs will have already been got by the croak()
3177 above. IOK is untrue for fractions, so we test them
3178 against UV_MAX_P1. */
f337b084
TH
3179 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3180 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3181 char *in = buf + sizeof(buf);
196b62db 3182 UV auv = SvUV(fromstr);
a6ec74c1
JH
3183
3184 do {
eb160463 3185 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3186 auv >>= 7;
3187 } while (auv);
3188 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3189 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3190 in, (buf + sizeof(buf)) - in);
3191 } else if (SvPOKp(fromstr))
3192 goto w_string;
a6ec74c1 3193 else if (SvNOKp(fromstr)) {
0258719b
NC
3194 /* 10**NV_MAX_10_EXP is the largest power of 10
3195 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3196 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3197 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3198 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3199 Some C compilers are strict about integral constant
3200 expressions so we conservatively divide by a slightly
3201 smaller integer instead of multiplying by the exact
3202 floating-point value.
0258719b
NC
3203 */
3204#ifdef NV_MAX_10_EXP
f337b084 3205 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3206 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3207#else
f337b084 3208 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3209 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3210#endif
a6ec74c1
JH
3211 char *in = buf + sizeof(buf);
3212
15e9f109 3213 anv = Perl_floor(anv);
a6ec74c1 3214 do {
15e9f109 3215 NV next = Perl_floor(anv / 128);
a6ec74c1 3216 if (in <= buf) /* this cannot happen ;-) */
49704364 3217 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3218 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3219 anv = next;
3220 } while (anv > 0);
a6ec74c1 3221 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3222 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3223 in, (buf + sizeof(buf)) - in);
3224 } else {
735b914b
JH
3225 char *from, *result, *in;
3226 SV *norm;
3227 STRLEN len;
3228 bool done;
3229
f337b084 3230 w_string:
735b914b
JH
3231 /* Copy string and check for compliance */
3232 from = SvPV(fromstr, len);
3233 if ((norm = is_an_int(from, len)) == NULL)
49704364 3234 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3235
3236 New('w', result, len, char);
3237 in = result + len;
3238 done = FALSE;
f337b084 3239 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3240 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3241 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3242 in, (result + len) - in);
735b914b
JH
3243 Safefree(result);
3244 SvREFCNT_dec(norm); /* free norm */
3245 }
a6ec74c1
JH
3246 }
3247 break;
3248 case 'i':
49704364 3249 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3250 while (len-- > 0) {
f337b084 3251 int aint;
a6ec74c1
JH
3252 fromstr = NEXTFROM;
3253 aint = SvIV(fromstr);
1109a392 3254 DO_BO_PACK(aint, i);
f337b084 3255 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3256 }
3257 break;
7212898e 3258#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3259 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3260#endif
a6ec74c1
JH
3261 case 'N':
3262 while (len-- > 0) {
f337b084 3263 U32 au32;
a6ec74c1 3264 fromstr = NEXTFROM;
ef108786 3265 au32 = SvUV(fromstr);
a6ec74c1 3266#ifdef HAS_HTONL
ef108786 3267 au32 = PerlSock_htonl(au32);
a6ec74c1 3268#endif
f337b084 3269 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3270 }
3271 break;
7212898e 3272#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3273 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3274#endif
a6ec74c1
JH
3275 case 'V':
3276 while (len-- > 0) {
f337b084 3277 U32 au32;
a6ec74c1 3278 fromstr = NEXTFROM;
ef108786 3279 au32 = SvUV(fromstr);
a6ec74c1 3280#ifdef HAS_HTOVL
ef108786 3281 au32 = htovl(au32);
a6ec74c1 3282#endif
f337b084 3283 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3284 }
3285 break;
49704364 3286 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3287#if LONGSIZE != SIZE32
a6ec74c1 3288 while (len-- > 0) {
f337b084 3289 unsigned long aulong;
a6ec74c1
JH
3290 fromstr = NEXTFROM;
3291 aulong = SvUV(fromstr);
1109a392 3292 DO_BO_PACK(aulong, l);
f337b084 3293 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3294 }
49704364
WL
3295 break;
3296#else
3297 /* Fall though! */
a6ec74c1 3298#endif
49704364 3299 case 'L':
a6ec74c1 3300 while (len-- > 0) {
f337b084 3301 U32 au32;
a6ec74c1 3302 fromstr = NEXTFROM;
ef108786
MHM
3303 au32 = SvUV(fromstr);
3304 DO_BO_PACK(au32, 32);
f337b084 3305 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3306 }
3307 break;
49704364 3308 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3309#if LONGSIZE != SIZE32
a6ec74c1 3310 while (len-- > 0) {
f337b084 3311 long along;
a6ec74c1
JH
3312 fromstr = NEXTFROM;
3313 along = SvIV(fromstr);
1109a392 3314 DO_BO_PACK(along, l);
f337b084 3315 PUSH_VAR(utf8, cur, along);
a6ec74c1 3316 }
49704364
WL
3317 break;
3318#else
3319 /* Fall though! */
a6ec74c1 3320#endif
49704364
WL
3321 case 'l':
3322 while (len-- > 0) {
f337b084 3323 I32 ai32;
49704364 3324 fromstr = NEXTFROM;
ef108786
MHM
3325 ai32 = SvIV(fromstr);
3326 DO_BO_PACK(ai32, 32);
f337b084 3327 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3328 }
3329 break;
3330#ifdef HAS_QUAD
3331 case 'Q':
3332 while (len-- > 0) {
f337b084 3333 Uquad_t auquad;
a6ec74c1 3334 fromstr = NEXTFROM;
f337b084 3335 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3336 DO_BO_PACK(auquad, 64);
f337b084 3337 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3338 }
3339 break;
3340 case 'q':
3341 while (len-- > 0) {
f337b084 3342 Quad_t aquad;
a6ec74c1
JH
3343 fromstr = NEXTFROM;
3344 aquad = (Quad_t)SvIV(fromstr);
1109a392 3345 DO_BO_PACK(aquad, 64);
f337b084 3346 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3347 }
3348 break;
f337b084 3349#endif /* HAS_QUAD */
a6ec74c1
JH
3350 case 'P':
3351 len = 1; /* assume SV is correct length */
f337b084 3352 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3353 /* Fall through! */
a6ec74c1
JH
3354 case 'p':
3355 while (len-- > 0) {
f337b084
TH
3356 char *aptr;
3357
a6ec74c1 3358 fromstr = NEXTFROM;
28a4f200
TH
3359 SvGETMAGIC(fromstr);
3360 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3361 else {
3362 STRLEN n_a;
3363 /* XXX better yet, could spirit away the string to
3364 * a safe spot and hang on to it until the result
3365 * of pack() (and all copies of the result) are
3366 * gone.
3367 */
f337b084
TH
3368 if (ckWARN(WARN_PACK) &&
3369 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3370 !SvREADONLY(fromstr)))) {
9014280d 3371 Perl_warner(aTHX_ packWARN(WARN_PACK),
a6ec74c1
JH
3372 "Attempt to pack pointer to temporary value");
3373 }
3374 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3375 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3376 else
28a4f200 3377 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3378 }
1109a392 3379 DO_BO_PACK_P(aptr);
f337b084 3380 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3381 }
3382 break;
f337b084
TH
3383 case 'u': {
3384 char *aptr, *aend;
3385 bool from_utf8;
3386
a6ec74c1 3387 fromstr = NEXTFROM;
f337b084
TH
3388 if (len <= 2) len = 45;
3389 else len = len / 3 * 3;
3390 if (len >= 64) {
3391 Perl_warner(aTHX_ packWARN(WARN_PACK),
3392 "Field too wide in 'u' format in pack");
3393 len = 63;
3394 }
a6ec74c1 3395 aptr = SvPV(fromstr, fromlen);
f337b084
TH
3396 from_utf8 = DO_UTF8(fromstr);
3397 if (from_utf8) {
3398 aend = aptr + fromlen;
3399 fromlen = sv_len_utf8(fromstr);
3400 } else aend = NULL; /* Unused, but keep compilers happy */
3401 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3402 while (fromlen > 0) {
f337b084 3403 U8 *end;
a6ec74c1 3404 I32 todo;
f337b084 3405 U8 hunk[1+63/3*4+1];
a6ec74c1 3406
eb160463 3407 if ((I32)fromlen > len)
a6ec74c1
JH
3408 todo = len;
3409 else
3410 todo = fromlen;
f337b084
TH
3411 if (from_utf8) {
3412 char buffer[64];
3413 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3414 'u' | TYPE_IS_PACK)) {
3415 *cur = '\0';
3416 SvCUR(cat) = cur - start;
3417 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3418 }
3419 end = doencodes(hunk, buffer, todo);
3420 } else {
3421 end = doencodes(hunk, aptr, todo);
a6ec74c1
JH
3422 aptr += todo;
3423 }
f337b084
TH
3424 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3425 fromlen -= todo;
3426 }
a6ec74c1
JH
3427 break;
3428 }
f337b084
TH
3429 }
3430 *cur = '\0';
3431 SvCUR(cat) = cur - start;
3432 no_change:
49704364 3433 *symptr = lookahead;
a6ec74c1 3434 }
49704364 3435 return beglist;
18529408
IZ
3436}
3437#undef NEXTFROM
3438
3439
3440PP(pp_pack)
3441{
3442 dSP; dMARK; dORIGMARK; dTARGET;
3443 register SV *cat = TARG;
3444 STRLEN fromlen;
3445 register char *pat = SvPVx(*++MARK, fromlen);
3446 register char *patend = pat + fromlen;
3447
3448 MARK++;
3449 sv_setpvn(cat, "", 0);
f337b084 3450 SvUTF8_off(cat);
18529408 3451
7accc089 3452 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3453
a6ec74c1
JH
3454 SvSETMAGIC(cat);
3455 SP = ORIGMARK;
3456 PUSHs(cat);
3457 RETURN;
3458}
a6ec74c1 3459
73cb7263
NC
3460/*
3461 * Local variables:
3462 * c-indentation-style: bsd
3463 * c-basic-offset: 4
3464 * indent-tabs-mode: t
3465 * End:
3466 *
edf815fd 3467 * vim: shiftwidth=4:
73cb7263 3468*/