This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #126325] don't read past the end of the source for pack [Hh]
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
9df874cd 154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
LW
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
e6006968
KW
219#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 221
66c611c5 222/* type modifiers */
62f95557 223#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
224#define TYPE_IS_BIG_ENDIAN 0x200
225#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 226#define TYPE_IS_PACK 0x800
1109a392 227#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 228#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
229#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230
7212898e
NC
231# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233
234# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235
78d46eaa 236#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 237#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
238#define PACK_SIZE_MASK 0x3F
239
4ad176ef 240#include "packsizetables.inc"
78d46eaa 241
20aa3a38
NC
242static void
243S_reverse_copy(const char *src, char *dest, STRLEN len)
244{
245 dest += len;
246 while (len--)
247 *--dest = *src++;
248}
249
08ca2aa3 250STATIC U8
9df874cd 251utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 252{
08ca2aa3 253 STRLEN retlen;
0bcc34c2 254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 256 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
257 warnings), but these two mean we make no progress in the string and
258 might enter an infinite loop */
259 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
a2a5de95
NC
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
266 val &= 0xff;
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
274 *(U8 *)(s)++)
275
276STATIC bool
9df874cd 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3
TH
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
f337b084 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
f7fe979e 310 const char *ptr;
9e27e96a 311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
c80e42f3 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
315 }
316 if (from > end) from = end;
317 }
a2a5de95
NC
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 320 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
d21d7215 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
230e1fce
NC
357 (cur) += (len); \
358 } \
f337b084
TH
359} STMT_END
360
361#define GROWING(utf8, cat, start, cur, in_len) \
362STMT_START { \
363 STRLEN glen = (in_len); \
3473cf63 364 if (utf8) glen *= UTF8_EXPAND; \
f337b084 365 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 366 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
367 (cur) = (start) + SvCUR(cat); \
368 } \
369} STMT_END
370
371#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
372STMT_START { \
f7fe979e 373 const STRLEN glen = (in_len); \
f337b084 374 STRLEN gl = glen; \
3473cf63 375 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
376 if ((cur) + gl >= (start) + SvLEN(cat)) { \
377 *cur = '\0'; \
b162af07 378 SvCUR_set((cat), (cur) - (start)); \
0bd48802 379 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
380 (cur) = (start) + SvCUR(cat); \
381 } \
3a88beaa 382 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
383} STMT_END
384
385#define PUSH_BYTE(utf8, s, byte) \
386STMT_START { \
387 if (utf8) { \
f7fe979e 388 const U8 au8 = (byte); \
d21d7215 389 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
390 } else *(U8 *)(s)++ = (byte); \
391} STMT_END
392
393/* Only to be used inside a loop (see the break) */
394#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
395STMT_START { \
396 STRLEN retlen; \
397 if (str >= end) break; \
398 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
399 if (retlen == (STRLEN) -1 || retlen == 0) { \
400 *cur = '\0'; \
401 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
402 } \
403 str += retlen; \
404} STMT_END
405
f7fe979e
AL
406static const char *_action( const tempsym_t* symptr )
407{
10edeb5d 408 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
409}
410
206947d2 411/* Returns the sizeof() struct described by pat */
028d1f6d 412STATIC I32
f337b084 413S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 414{
f337b084 415 I32 total = 0;
206947d2 416
7918f24d
NC
417 PERL_ARGS_ASSERT_MEASURE_STRUCT;
418
49704364 419 while (next_symbol(symptr)) {
f337b084 420 I32 len;
f7fe979e 421 int size;
f337b084
TH
422
423 switch (symptr->howlen) {
fc241834 424 case e_star:
49704364 425 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 426 _action( symptr ) );
81d52ecd 427
f337b084
TH
428 default:
429 /* e_no_len and e_number */
430 len = symptr->length;
431 break;
49704364
LW
432 }
433
a7a3cfaa 434 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 435 if (!size) {
f7fe979e 436 int star;
80a13697
NC
437 /* endianness doesn't influence the size of a type */
438 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
439 default:
440 Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 442 _action( symptr ) );
28be1210
TH
443 case '.' | TYPE_IS_SHRIEKING:
444 case '@' | TYPE_IS_SHRIEKING:
80a13697 445 case '@':
28be1210 446 case '.':
80a13697
NC
447 case '/':
448 case 'U': /* XXXX Is it correct? */
449 case 'w':
450 case 'u':
451 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 452 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 453 _action( symptr ) );
80a13697
NC
454 case '%':
455 size = 0;
456 break;
457 case '(':
fc241834
RGS
458 {
459 tempsym_t savsym = *symptr;
460 symptr->patptr = savsym.grpbeg;
461 symptr->patend = savsym.grpend;
462 /* XXXX Theoretically, we need to measure many times at
463 different positions, since the subexpression may contain
464 alignment commands, but be not of aligned length.
465 Need to detect this and croak(). */
466 size = measure_struct(symptr);
467 *symptr = savsym;
468 break;
469 }
80a13697
NC
470 case 'X' | TYPE_IS_SHRIEKING:
471 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
472 */
473 if (!len) /* Avoid division by 0 */
474 len = 1;
475 len = total % len; /* Assumed: the start is aligned. */
924ba076 476 /* FALLTHROUGH */
80a13697
NC
477 case 'X':
478 size = -1;
479 if (total < len)
f7fe979e 480 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
481 break;
482 case 'x' | TYPE_IS_SHRIEKING:
483 if (!len) /* Avoid division by 0 */
484 len = 1;
485 star = total % len; /* Assumed: the start is aligned. */
486 if (star) /* Other portable ways? */
487 len = len - star;
488 else
489 len = 0;
924ba076 490 /* FALLTHROUGH */
80a13697
NC
491 case 'x':
492 case 'A':
493 case 'Z':
494 case 'a':
80a13697
NC
495 size = 1;
496 break;
497 case 'B':
498 case 'b':
499 len = (len + 7)/8;
500 size = 1;
501 break;
502 case 'H':
503 case 'h':
504 len = (len + 1)/2;
505 size = 1;
506 break;
78d46eaa 507
80a13697
NC
508 case 'P':
509 len = 1;
510 size = sizeof(char*);
78d46eaa
NC
511 break;
512 }
206947d2
IZ
513 }
514 total += len * size;
515 }
516 return total;
517}
518
49704364
LW
519
520/* locate matching closing parenthesis or bracket
521 * returns char pointer to char after match, or NULL
522 */
f7fe979e 523STATIC const char *
5aaab254 524S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 525{
7918f24d
NC
526 PERL_ARGS_ASSERT_GROUP_END;
527
49704364 528 while (patptr < patend) {
f7fe979e 529 const char c = *patptr++;
49704364
LW
530
531 if (isSPACE(c))
532 continue;
533 else if (c == ender)
534 return patptr-1;
535 else if (c == '#') {
536 while (patptr < patend && *patptr != '\n')
537 patptr++;
538 continue;
539 } else if (c == '(')
540 patptr = group_end(patptr, patend, ')') + 1;
541 else if (c == '[')
542 patptr = group_end(patptr, patend, ']') + 1;
18529408 543 }
49704364
LW
544 Perl_croak(aTHX_ "No group ending character '%c' found in template",
545 ender);
a25b5927 546 NOT_REACHED; /* NOTREACHED */
18529408
IZ
547}
548
49704364
LW
549
550/* Convert unsigned decimal number to binary.
551 * Expects a pointer to the first digit and address of length variable
552 * Advances char pointer to 1st non-digit char and returns number
fc241834 553 */
f7fe979e 554STATIC const char *
5aaab254 555S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
556{
557 I32 len = *patptr++ - '0';
7918f24d
NC
558
559 PERL_ARGS_ASSERT_GET_NUM;
560
49704364
LW
561 while (isDIGIT(*patptr)) {
562 if (len >= 0x7FFFFFFF/10)
563 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564 len = (len * 10) + (*patptr++ - '0');
565 }
566 *lenptr = len;
567 return patptr;
568}
569
570/* The marvellous template parsing routine: Using state stored in *symptr,
571 * locates next template code and count
572 */
573STATIC bool
f337b084 574S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 575{
f7fe979e 576 const char* patptr = symptr->patptr;
0bcc34c2 577 const char* const patend = symptr->patend;
49704364 578
7918f24d
NC
579 PERL_ARGS_ASSERT_NEXT_SYMBOL;
580
49704364
LW
581 symptr->flags &= ~FLAG_SLASH;
582
583 while (patptr < patend) {
584 if (isSPACE(*patptr))
585 patptr++;
586 else if (*patptr == '#') {
587 patptr++;
588 while (patptr < patend && *patptr != '\n')
589 patptr++;
590 if (patptr < patend)
591 patptr++;
592 } else {
fc241834 593 /* We should have found a template code */
49704364 594 I32 code = *patptr++ & 0xFF;
66c611c5 595 U32 inherited_modifiers = 0;
49704364
LW
596
597 if (code == ','){ /* grandfather in commas but with a warning */
598 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599 symptr->flags |= FLAG_COMMA;
600 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 601 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
602 }
603 continue;
604 }
fc241834 605
49704364 606 /* for '(', skip to ')' */
fc241834 607 if (code == '(') {
49704364
LW
608 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 610 _action( symptr ) );
49704364
LW
611 symptr->grpbeg = patptr;
612 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 615 _action( symptr ) );
49704364
LW
616 }
617
66c611c5
MHM
618 /* look for group modifiers to inherit */
619 if (TYPE_ENDIANNESS(symptr->flags)) {
620 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
622 }
623
1109a392
MHM
624 /* look for modifiers */
625 while (patptr < patend) {
b7787f18
AL
626 const char *allowed;
627 I32 modifier;
1109a392
MHM
628 switch (*patptr) {
629 case '!':
630 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 631 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
632 break;
633 case '>':
634 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 635 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
636 break;
637 case '<':
638 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 639 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
640 break;
641 default:
b7787f18
AL
642 allowed = "";
643 modifier = 0;
1109a392
MHM
644 break;
645 }
66c611c5 646
1109a392
MHM
647 if (modifier == 0)
648 break;
66c611c5 649
1109a392
MHM
650 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 652 allowed, _action( symptr ) );
66c611c5
MHM
653
654 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 655 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 656 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
657 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658 TYPE_ENDIANNESS_MASK)
659 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 660 *patptr, _action( symptr ) );
66c611c5 661
a2a5de95
NC
662 if ((code & modifier)) {
663 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Duplicate modifier '%c' after '%c' in %s",
665 *patptr, (int) TYPE_NO_MODIFIERS(code),
666 _action( symptr ) );
1109a392 667 }
66c611c5 668
1109a392
MHM
669 code |= modifier;
670 patptr++;
49704364
LW
671 }
672
66c611c5
MHM
673 /* inherit modifiers */
674 code |= inherited_modifiers;
675
fc241834 676 /* look for count and/or / */
49704364
LW
677 if (patptr < patend) {
678 if (isDIGIT(*patptr)) {
679 patptr = get_num( patptr, &symptr->length );
680 symptr->howlen = e_number;
681
682 } else if (*patptr == '*') {
683 patptr++;
684 symptr->howlen = e_star;
685
686 } else if (*patptr == '[') {
f7fe979e 687 const char* lenptr = ++patptr;
49704364
LW
688 symptr->howlen = e_number;
689 patptr = group_end( patptr, patend, ']' ) + 1;
690 /* what kind of [] is it? */
691 if (isDIGIT(*lenptr)) {
692 lenptr = get_num( lenptr, &symptr->length );
693 if( *lenptr != ']' )
694 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 695 _action( symptr ) );
49704364
LW
696 } else {
697 tempsym_t savsym = *symptr;
698 symptr->patend = patptr-1;
699 symptr->patptr = lenptr;
700 savsym.length = measure_struct(symptr);
701 *symptr = savsym;
702 }
703 } else {
704 symptr->howlen = e_no_len;
705 symptr->length = 1;
706 }
707
708 /* try to find / */
709 while (patptr < patend) {
710 if (isSPACE(*patptr))
711 patptr++;
712 else if (*patptr == '#') {
713 patptr++;
714 while (patptr < patend && *patptr != '\n')
715 patptr++;
716 if (patptr < patend)
717 patptr++;
718 } else {
66c611c5 719 if (*patptr == '/') {
49704364
LW
720 symptr->flags |= FLAG_SLASH;
721 patptr++;
66c611c5
MHM
722 if (patptr < patend &&
723 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 724 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 725 _action( symptr ) );
49704364
LW
726 }
727 break;
728 }
18529408 729 }
49704364
LW
730 } else {
731 /* at end - no count, no / */
732 symptr->howlen = e_no_len;
733 symptr->length = 1;
734 }
735
736 symptr->code = code;
fc241834 737 symptr->patptr = patptr;
49704364 738 return TRUE;
18529408 739 }
49704364 740 }
fc241834 741 symptr->patptr = patptr;
49704364 742 return FALSE;
18529408
IZ
743}
744
18529408 745/*
fc241834 746 There is no way to cleanly handle the case where we should process the
08ca2aa3 747 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
748 (e.g. estimates like strend-s as an upper bound for the number of
749 characters left wouldn't work). So if we foresee the need of this
750 (pattern starts with U or contains U0), we want to work on the encoded
751 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
752 themselves if they need to do a lot of unpacks like this on it
753*/
fc241834 754STATIC bool
08ca2aa3
TH
755need_utf8(const char *pat, const char *patend)
756{
757 bool first = TRUE;
7918f24d
NC
758
759 PERL_ARGS_ASSERT_NEED_UTF8;
760
08ca2aa3
TH
761 while (pat < patend) {
762 if (pat[0] == '#') {
763 pat++;
f7fe979e 764 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
765 if (!pat) return FALSE;
766 } else if (pat[0] == 'U') {
767 if (first || pat[1] == '0') return TRUE;
768 } else first = FALSE;
769 pat++;
770 }
771 return FALSE;
772}
773
774STATIC char
775first_symbol(const char *pat, const char *patend) {
7918f24d
NC
776 PERL_ARGS_ASSERT_FIRST_SYMBOL;
777
08ca2aa3
TH
778 while (pat < patend) {
779 if (pat[0] != '#') return pat[0];
780 pat++;
f7fe979e 781 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
782 if (!pat) return 0;
783 pat++;
784 }
785 return 0;
786}
787
788/*
dcccc8ff
KW
789
790=head1 Pack and Unpack
791
7accc089
JH
792=for apidoc unpackstring
793
796b6530 794The engine implementing the C<unpack()> Perl function.
21ebfc7a 795
796b6530
KW
796Using the template C<pat..patend>, this function unpacks the string
797C<s..strend> into a number of mortal SVs, which it pushes onto the perl
798argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 799C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
800pushed elements.
801
796b6530
KW
802The C<strend> and C<patend> pointers should point to the byte following the
803last character of each string.
21ebfc7a
DM
804
805Although this function returns its values on the perl argument stack, it
806doesn't take any parameters from that stack (and thus in particular
796b6530 807there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 808example).
7accc089
JH
809
810=cut */
811
812I32
f7fe979e 813Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 814{
f7fe979e 815 tempsym_t sym;
08ca2aa3 816
7918f24d
NC
817 PERL_ARGS_ASSERT_UNPACKSTRING;
818
f337b084 819 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
820 else if (need_utf8(pat, patend)) {
821 /* We probably should try to avoid this in case a scalar context call
822 wouldn't get to the "U0" */
823 STRLEN len = strend - s;
230e1fce 824 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
825 SAVEFREEPV(s);
826 strend = s + len;
f337b084 827 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
828 }
829
f337b084
TH
830 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 flags |= FLAG_PARSE_UTF8;
08ca2aa3 832
f7fe979e 833 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
834
835 return unpack_rec(&sym, s, s, strend, NULL );
836}
837
4136a0f7 838STATIC I32
f7fe979e 839S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 840{
20b7effb 841 dSP;
3297d27d 842 SV *sv = NULL;
f7fe979e 843 const I32 start_sp_offset = SP - PL_stack_base;
49704364 844 howlen_t howlen;
a6ec74c1 845 I32 checksum = 0;
92d41999 846 UV cuv = 0;
a6ec74c1 847 NV cdouble = 0.0;
f337b084 848 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 849 bool beyond = FALSE;
21c16052 850 bool explicit_length;
9e27e96a 851 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 852 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
853
854 PERL_ARGS_ASSERT_UNPACK_REC;
855
28be1210 856 symptr->strbeg = s - strbeg;
49704364 857
49704364 858 while (next_symbol(symptr)) {
a7a3cfaa 859 packprops_t props;
9e27e96a 860 I32 len;
f337b084 861 I32 datumtype = symptr->code;
a1219b5e 862 bool needs_swap;
206947d2 863 /* do first one only unless in list context
08ca2aa3 864 / is implemented by unpacking the count, then popping it from the
206947d2 865 stack, so must check that we're not in the middle of a / */
49704364 866 if ( unpack_only_one
206947d2 867 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 868 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 869 break;
49704364 870
f337b084 871 switch (howlen = symptr->howlen) {
fc241834
RGS
872 case e_star:
873 len = strend - strbeg; /* long enough */
49704364 874 break;
f337b084
TH
875 default:
876 /* e_no_len and e_number */
877 len = symptr->length;
878 break;
49704364 879 }
18529408 880
21c16052 881 explicit_length = TRUE;
a6ec74c1 882 redo_switch:
49704364 883 beyond = s >= strend;
a7a3cfaa
TH
884
885 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
886 if (props) {
887 /* props nonzero means we can process this letter. */
9e27e96a
AL
888 const long size = props & PACK_SIZE_MASK;
889 const long howmany = (strend - s) / size;
a7a3cfaa
TH
890 if (len > howmany)
891 len = howmany;
892
893 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 if (len && unpack_only_one) len = 1;
895 EXTEND(SP, len);
896 EXTEND_MORTAL(len);
78d46eaa
NC
897 }
898 }
a7a3cfaa 899
a1219b5e
NC
900 needs_swap = NEEDS_SWAP(datumtype);
901
1109a392 902 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 903 default:
1109a392 904 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 905
a6ec74c1 906 case '%':
49704364 907 if (howlen == e_no_len)
18529408 908 len = 16; /* len is not specified */
a6ec74c1 909 checksum = len;
92d41999 910 cuv = 0;
a6ec74c1 911 cdouble = 0;
18529408 912 continue;
81d52ecd 913
18529408
IZ
914 case '(':
915 {
49704364 916 tempsym_t savsym = *symptr;
9e27e96a 917 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 918 symptr->flags |= group_modifiers;
49704364 919 symptr->patend = savsym.grpend;
28be1210 920 symptr->previous = &savsym;
49704364 921 symptr->level++;
18529408 922 PUTBACK;
c6f750d1 923 if (len && unpack_only_one) len = 1;
18529408 924 while (len--) {
49704364 925 symptr->patptr = savsym.grpbeg;
f337b084
TH
926 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
927 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
928 unpack_rec(symptr, s, strbeg, strend, &s);
929 if (s == strend && savsym.howlen == e_star)
49704364 930 break; /* No way to continue */
18529408
IZ
931 }
932 SPAGAIN;
28be1210 933 savsym.flags = symptr->flags & ~group_modifiers;
49704364 934 *symptr = savsym;
18529408
IZ
935 break;
936 }
28be1210 937 case '.' | TYPE_IS_SHRIEKING:
28be1210 938 case '.': {
9e27e96a 939 const char *from;
28be1210 940 SV *sv;
9e27e96a 941 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
942 if (howlen == e_star) from = strbeg;
943 else if (len <= 0) from = s;
944 else {
945 tempsym_t *group = symptr;
946
947 while (--len && group) group = group->previous;
948 from = group ? strbeg + group->strbeg : strbeg;
949 }
950 sv = from <= s ?
00646304
CB
951 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 953 mXPUSHs(sv);
28be1210
TH
954 break;
955 }
28be1210 956 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 957 case '@':
28be1210 958 s = strbeg + symptr->strbeg;
28be1210 959 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 960 {
08ca2aa3
TH
961 while (len > 0) {
962 if (s >= strend)
963 Perl_croak(aTHX_ "'@' outside of string in unpack");
964 s += UTF8SKIP(s);
965 len--;
966 }
967 if (s > strend)
968 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
969 } else {
28be1210 970 if (strend-s < len)
fc241834 971 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 972 s += len;
08ca2aa3 973 }
a6ec74c1 974 break;
62f95557
IZ
975 case 'X' | TYPE_IS_SHRIEKING:
976 if (!len) /* Avoid division by 0 */
977 len = 1;
08ca2aa3 978 if (utf8) {
f7fe979e 979 const char *hop, *last;
f337b084
TH
980 I32 l = len;
981 hop = last = strbeg;
982 while (hop < s) {
983 hop += UTF8SKIP(hop);
984 if (--l == 0) {
08ca2aa3 985 last = hop;
f337b084
TH
986 l = len;
987 }
fc241834 988 }
f337b084
TH
989 if (last > s)
990 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
991 s = last;
992 break;
f337b084
TH
993 }
994 len = (s - strbeg) % len;
924ba076 995 /* FALLTHROUGH */
a6ec74c1 996 case 'X':
08ca2aa3
TH
997 if (utf8) {
998 while (len > 0) {
999 if (s <= strbeg)
1000 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1001 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1002 if (s <= strbeg)
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1004 }
1005 len--;
1006 }
1007 } else {
fc241834
RGS
1008 if (len > s - strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1010 s -= len;
08ca2aa3 1011 }
a6ec74c1 1012 break;
9e27e96a
AL
1013 case 'x' | TYPE_IS_SHRIEKING: {
1014 I32 ai32;
62f95557
IZ
1015 if (!len) /* Avoid division by 0 */
1016 len = 1;
230e1fce
NC
1017 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1019 if (ai32 == 0) break;
1020 len -= ai32;
9e27e96a 1021 }
924ba076 1022 /* FALLTHROUGH */
a6ec74c1 1023 case 'x':
08ca2aa3
TH
1024 if (utf8) {
1025 while (len>0) {
1026 if (s >= strend)
1027 Perl_croak(aTHX_ "'x' outside of string in unpack");
1028 s += UTF8SKIP(s);
1029 len--;
1030 }
1031 } else {
fc241834
RGS
1032 if (len > strend - s)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1034 s += len;
f337b084 1035 }
a6ec74c1
JH
1036 break;
1037 case '/':
49704364 1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1039
a6ec74c1
JH
1040 case 'A':
1041 case 'Z':
1042 case 'a':
08ca2aa3
TH
1043 if (checksum) {
1044 /* Preliminary length estimate is assumed done in 'W' */
1045 if (len > strend - s) len = strend - s;
1046 goto W_checksum;
1047 }
1048 if (utf8) {
1049 I32 l;
f7fe979e 1050 const char *hop;
08ca2aa3
TH
1051 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 if (hop >= strend) {
1053 if (hop > strend)
1054 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1055 break;
fc241834 1056 }
a6ec74c1 1057 }
08ca2aa3
TH
1058 if (hop > strend)
1059 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1060 len = hop - s;
1061 } else if (len > strend - s)
1062 len = strend - s;
1063
1064 if (datumtype == 'Z') {
1065 /* 'Z' strips stuff after first null */
f7fe979e 1066 const char *ptr, *end;
f337b084
TH
1067 end = s + len;
1068 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1069 sv = newSVpvn(s, ptr-s);
1070 if (howlen == e_star) /* exact for 'Z*' */
1071 len = ptr-s + (ptr != strend ? 1 : 0);
1072 } else if (datumtype == 'A') {
1073 /* 'A' strips both nulls and spaces */
f7fe979e 1074 const char *ptr;
18bdf90a
TH
1075 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1076 for (ptr = s+len-1; ptr >= s; ptr--)
1077 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1078 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1079 if (ptr >= s) ptr += UTF8SKIP(ptr);
1080 else ptr++;
28be1210 1081 if (ptr > s+len)
18bdf90a
TH
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 } else {
1084 for (ptr = s+len-1; ptr >= s; ptr--)
1085 if (*ptr != 0 && !isSPACE(*ptr)) break;
1086 ptr++;
1087 }
08ca2aa3
TH
1088 sv = newSVpvn(s, ptr-s);
1089 } else sv = newSVpvn(s, len);
1090
1091 if (utf8) {
1092 SvUTF8_on(sv);
1093 /* Undo any upgrade done due to need_utf8() */
f337b084 1094 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1095 sv_utf8_downgrade(sv, 0);
a6ec74c1 1096 }
6e449a3a 1097 mXPUSHs(sv);
08ca2aa3 1098 s += len;
a6ec74c1
JH
1099 break;
1100 case 'B':
08ca2aa3
TH
1101 case 'b': {
1102 char *str;
49704364 1103 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1104 len = (strend - s) * 8;
1105 if (checksum) {
f337b084 1106 if (utf8)
08ca2aa3 1107 while (len >= 8 && s < strend) {
9df874cd 1108 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1109 len -= 8;
1110 }
f337b084 1111 else
fc241834 1112 while (len >= 8) {
08ca2aa3 1113 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1114 len -= 8;
1115 }
08ca2aa3
TH
1116 if (len && s < strend) {
1117 U8 bits;
f337b084
TH
1118 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1119 if (datumtype == 'b')
a6ec74c1 1120 while (len-- > 0) {
92d41999 1121 if (bits & 1) cuv++;
a6ec74c1
JH
1122 bits >>= 1;
1123 }
f337b084 1124 else
a6ec74c1 1125 while (len-- > 0) {
08ca2aa3 1126 if (bits & 0x80) cuv++;
a6ec74c1
JH
1127 bits <<= 1;
1128 }
fc241834 1129 }
a6ec74c1
JH
1130 break;
1131 }
08ca2aa3 1132
561b68a9 1133 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1134 SvPOK_on(sv);
1135 str = SvPVX(sv);
1136 if (datumtype == 'b') {
f337b084 1137 U8 bits = 0;
f7fe979e 1138 const I32 ai32 = len;
08ca2aa3
TH
1139 for (len = 0; len < ai32; len++) {
1140 if (len & 7) bits >>= 1;
1141 else if (utf8) {
1142 if (s >= strend) break;
9df874cd 1143 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1144 } else bits = *(U8 *) s++;
1145 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1146 }
08ca2aa3 1147 } else {
f337b084 1148 U8 bits = 0;
f7fe979e 1149 const I32 ai32 = len;
08ca2aa3
TH
1150 for (len = 0; len < ai32; len++) {
1151 if (len & 7) bits <<= 1;
1152 else if (utf8) {
1153 if (s >= strend) break;
9df874cd 1154 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1155 } else bits = *(U8 *) s++;
1156 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1157 }
1158 }
1159 *str = '\0';
aa07b2f6 1160 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1161 XPUSHs(sv);
a6ec74c1 1162 break;
08ca2aa3 1163 }
a6ec74c1 1164 case 'H':
08ca2aa3 1165 case 'h': {
3297d27d 1166 char *str = NULL;
fc241834 1167 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1168 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1169 len = (strend - s) * 2;
858fe5e1
TC
1170 if (!checksum) {
1171 sv = sv_2mortal(newSV(len ? len : 1));
1172 SvPOK_on(sv);
1173 str = SvPVX(sv);
1174 }
a6ec74c1 1175 if (datumtype == 'h') {
f337b084 1176 U8 bits = 0;
9e27e96a 1177 I32 ai32 = len;
fc241834
RGS
1178 for (len = 0; len < ai32; len++) {
1179 if (len & 1) bits >>= 4;
1180 else if (utf8) {
1181 if (s >= strend) break;
9df874cd 1182 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1183 } else bits = * (U8 *) s++;
858fe5e1
TC
1184 if (!checksum)
1185 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1186 }
08ca2aa3 1187 } else {
f337b084 1188 U8 bits = 0;
f7fe979e 1189 const I32 ai32 = len;
08ca2aa3
TH
1190 for (len = 0; len < ai32; len++) {
1191 if (len & 1) bits <<= 4;
1192 else if (utf8) {
1193 if (s >= strend) break;
9df874cd 1194 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1195 } else bits = *(U8 *) s++;
858fe5e1
TC
1196 if (!checksum)
1197 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1198 }
1199 }
858fe5e1
TC
1200 if (!checksum) {
1201 *str = '\0';
1202 SvCUR_set(sv, str - SvPVX_const(sv));
1203 XPUSHs(sv);
1204 }
a6ec74c1 1205 break;
08ca2aa3 1206 }
1651fc44
ML
1207 case 'C':
1208 if (len == 0) {
1209 if (explicit_length)
1210 /* Switch to "character" mode */
1211 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1212 break;
1213 }
924ba076 1214 /* FALLTHROUGH */
a6ec74c1 1215 case 'c':
1651fc44
ML
1216 while (len-- > 0 && s < strend) {
1217 int aint;
1218 if (utf8)
1219 {
1220 STRLEN retlen;
1221 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1222 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1223 if (retlen == (STRLEN) -1 || retlen == 0)
1224 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1225 s += retlen;
1226 }
1227 else
1228 aint = *(U8 *)(s)++;
1229 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1230 aint -= 256;
08ca2aa3 1231 if (!checksum)
6e449a3a 1232 mPUSHi(aint);
73cb7263
NC
1233 else if (checksum > bits_in_uv)
1234 cdouble += (NV)aint;
1235 else
1236 cuv += aint;
a6ec74c1
JH
1237 }
1238 break;
08ca2aa3
TH
1239 case 'W':
1240 W_checksum:
1651fc44 1241 if (utf8) {
08ca2aa3 1242 while (len-- > 0 && s < strend) {
08ca2aa3 1243 STRLEN retlen;
f7fe979e 1244 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 if (!checksum)
6e449a3a 1250 mPUSHu(val);
08ca2aa3
TH
1251 else if (checksum > bits_in_uv)
1252 cdouble += (NV) val;
d6d3e8bd 1253 else
08ca2aa3 1254 cuv += val;
fc241834 1255 }
08ca2aa3 1256 } else if (!checksum)
a6ec74c1 1257 while (len-- > 0) {
f7fe979e 1258 const U8 ch = *(U8 *) s++;
6e449a3a 1259 mPUSHu(ch);
a6ec74c1 1260 }
08ca2aa3
TH
1261 else if (checksum > bits_in_uv)
1262 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1263 else
1264 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1265 break;
1266 case 'U':
35bcd338 1267 if (len == 0) {
c5333953 1268 if (explicit_length && howlen != e_star) {
08ca2aa3 1269 /* Switch to "bytes in UTF-8" mode */
f337b084 1270 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1271 else
1272 /* Should be impossible due to the need_utf8() test */
1273 Perl_croak(aTHX_ "U0 mode on a byte string");
1274 }
35bcd338
JH
1275 break;
1276 }
08ca2aa3 1277 if (len > strend - s) len = strend - s;
fc241834 1278 if (!checksum) {
08ca2aa3
TH
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
fc241834 1282 }
08ca2aa3
TH
1283 while (len-- > 0 && s < strend) {
1284 STRLEN retlen;
1285 UV auv;
1286 if (utf8) {
1287 U8 result[UTF8_MAXLEN];
f7fe979e 1288 const char *ptr = s;
08ca2aa3 1289 STRLEN len;
08ca2aa3
TH
1290 /* Bug: warns about bad utf8 even if we are short on bytes
1291 and will break out of the loop */
9df874cd 1292 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1293 'U'))
08ca2aa3
TH
1294 break;
1295 len = UTF8SKIP(result);
9df874cd 1296 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1297 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1298 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1299 len,
1300 &retlen,
1301 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1302 s = ptr;
1303 } else {
3ece276e
KW
1304 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1305 strend - s,
1306 &retlen,
1307 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1308 if (retlen == (STRLEN) -1 || retlen == 0)
1309 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1310 s += retlen;
1311 }
1312 if (!checksum)
6e449a3a 1313 mPUSHu(auv);
73cb7263 1314 else if (checksum > bits_in_uv)
08ca2aa3 1315 cdouble += (NV) auv;
73cb7263 1316 else
08ca2aa3 1317 cuv += auv;
a6ec74c1
JH
1318 }
1319 break;
49704364
LW
1320 case 's' | TYPE_IS_SHRIEKING:
1321#if SHORTSIZE != SIZE16
73cb7263 1322 while (len-- > 0) {
08ca2aa3 1323 short ashort;
aaec8192 1324 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1325 if (!checksum)
6e449a3a 1326 mPUSHi(ashort);
73cb7263
NC
1327 else if (checksum > bits_in_uv)
1328 cdouble += (NV)ashort;
1329 else
1330 cuv += ashort;
49704364
LW
1331 }
1332 break;
1333#else
924ba076 1334 /* FALLTHROUGH */
a6ec74c1 1335#endif
49704364 1336 case 's':
73cb7263 1337 while (len-- > 0) {
08ca2aa3
TH
1338 I16 ai16;
1339
1340#if U16SIZE > SIZE16
1341 ai16 = 0;
1342#endif
aaec8192 1343 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1344#if U16SIZE > SIZE16
73cb7263
NC
1345 if (ai16 > 32767)
1346 ai16 -= 65536;
a6ec74c1 1347#endif
08ca2aa3 1348 if (!checksum)
6e449a3a 1349 mPUSHi(ai16);
73cb7263
NC
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ai16;
1352 else
1353 cuv += ai16;
a6ec74c1
JH
1354 }
1355 break;
49704364
LW
1356 case 'S' | TYPE_IS_SHRIEKING:
1357#if SHORTSIZE != SIZE16
73cb7263 1358 while (len-- > 0) {
08ca2aa3 1359 unsigned short aushort;
aaec8192
NC
1360 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1361 needs_swap);
08ca2aa3 1362 if (!checksum)
6e449a3a 1363 mPUSHu(aushort);
73cb7263
NC
1364 else if (checksum > bits_in_uv)
1365 cdouble += (NV)aushort;
1366 else
1367 cuv += aushort;
49704364
LW
1368 }
1369 break;
1370#else
924ba076 1371 /* FALLTHROUGH */
49704364 1372#endif
a6ec74c1
JH
1373 case 'v':
1374 case 'n':
1375 case 'S':
73cb7263 1376 while (len-- > 0) {
08ca2aa3
TH
1377 U16 au16;
1378#if U16SIZE > SIZE16
1379 au16 = 0;
1380#endif
aaec8192 1381 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1382 if (datumtype == 'n')
1383 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1384 if (datumtype == 'v')
1385 au16 = vtohs(au16);
08ca2aa3 1386 if (!checksum)
6e449a3a 1387 mPUSHu(au16);
73cb7263 1388 else if (checksum > bits_in_uv)
f337b084 1389 cdouble += (NV) au16;
73cb7263
NC
1390 else
1391 cuv += au16;
a6ec74c1
JH
1392 }
1393 break;
068bd2e7
MHM
1394 case 'v' | TYPE_IS_SHRIEKING:
1395 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1396 while (len-- > 0) {
08ca2aa3
TH
1397 I16 ai16;
1398# if U16SIZE > SIZE16
1399 ai16 = 0;
1400# endif
aaec8192 1401 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1402 /* There should never be any byte-swapping here. */
1403 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1404 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1405 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1406 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1407 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1408 if (!checksum)
6e449a3a 1409 mPUSHi(ai16);
73cb7263 1410 else if (checksum > bits_in_uv)
08ca2aa3 1411 cdouble += (NV) ai16;
73cb7263
NC
1412 else
1413 cuv += ai16;
068bd2e7
MHM
1414 }
1415 break;
a6ec74c1 1416 case 'i':
49704364 1417 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1418 while (len-- > 0) {
08ca2aa3 1419 int aint;
aaec8192 1420 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1421 if (!checksum)
6e449a3a 1422 mPUSHi(aint);
73cb7263
NC
1423 else if (checksum > bits_in_uv)
1424 cdouble += (NV)aint;
1425 else
1426 cuv += aint;
a6ec74c1
JH
1427 }
1428 break;
1429 case 'I':
49704364 1430 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1431 while (len-- > 0) {
08ca2aa3 1432 unsigned int auint;
aaec8192 1433 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1434 if (!checksum)
6e449a3a 1435 mPUSHu(auint);
73cb7263
NC
1436 else if (checksum > bits_in_uv)
1437 cdouble += (NV)auint;
1438 else
1439 cuv += auint;
a6ec74c1
JH
1440 }
1441 break;
92d41999 1442 case 'j':
73cb7263 1443 while (len-- > 0) {
08ca2aa3 1444 IV aiv;
aaec8192 1445 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1446 if (!checksum)
6e449a3a 1447 mPUSHi(aiv);
73cb7263
NC
1448 else if (checksum > bits_in_uv)
1449 cdouble += (NV)aiv;
1450 else
1451 cuv += aiv;
92d41999
JH
1452 }
1453 break;
1454 case 'J':
73cb7263 1455 while (len-- > 0) {
08ca2aa3 1456 UV auv;
aaec8192 1457 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1458 if (!checksum)
6e449a3a 1459 mPUSHu(auv);
73cb7263
NC
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)auv;
1462 else
1463 cuv += auv;
92d41999
JH
1464 }
1465 break;
49704364
LW
1466 case 'l' | TYPE_IS_SHRIEKING:
1467#if LONGSIZE != SIZE32
73cb7263 1468 while (len-- > 0) {
08ca2aa3 1469 long along;
aaec8192 1470 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1471 if (!checksum)
6e449a3a 1472 mPUSHi(along);
73cb7263
NC
1473 else if (checksum > bits_in_uv)
1474 cdouble += (NV)along;
1475 else
1476 cuv += along;
49704364
LW
1477 }
1478 break;
1479#else
924ba076 1480 /* FALLTHROUGH */
a6ec74c1 1481#endif
49704364 1482 case 'l':
73cb7263 1483 while (len-- > 0) {
08ca2aa3
TH
1484 I32 ai32;
1485#if U32SIZE > SIZE32
1486 ai32 = 0;
1487#endif
aaec8192 1488 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1489#if U32SIZE > SIZE32
08ca2aa3 1490 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1491#endif
08ca2aa3 1492 if (!checksum)
6e449a3a 1493 mPUSHi(ai32);
73cb7263
NC
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)ai32;
1496 else
1497 cuv += ai32;
a6ec74c1
JH
1498 }
1499 break;
49704364
LW
1500 case 'L' | TYPE_IS_SHRIEKING:
1501#if LONGSIZE != SIZE32
73cb7263 1502 while (len-- > 0) {
08ca2aa3 1503 unsigned long aulong;
aaec8192 1504 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1505 if (!checksum)
6e449a3a 1506 mPUSHu(aulong);
73cb7263
NC
1507 else if (checksum > bits_in_uv)
1508 cdouble += (NV)aulong;
1509 else
1510 cuv += aulong;
49704364
LW
1511 }
1512 break;
1513#else
924ba076 1514 /* FALLTHROUGH */
49704364 1515#endif
a6ec74c1
JH
1516 case 'V':
1517 case 'N':
1518 case 'L':
73cb7263 1519 while (len-- > 0) {
08ca2aa3
TH
1520 U32 au32;
1521#if U32SIZE > SIZE32
1522 au32 = 0;
1523#endif
aaec8192 1524 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1525 if (datumtype == 'N')
1526 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1527 if (datumtype == 'V')
1528 au32 = vtohl(au32);
08ca2aa3 1529 if (!checksum)
6e449a3a 1530 mPUSHu(au32);
fc241834
RGS
1531 else if (checksum > bits_in_uv)
1532 cdouble += (NV)au32;
1533 else
1534 cuv += au32;
a6ec74c1
JH
1535 }
1536 break;
068bd2e7
MHM
1537 case 'V' | TYPE_IS_SHRIEKING:
1538 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1539 while (len-- > 0) {
08ca2aa3 1540 I32 ai32;
f8e5a5db 1541#if U32SIZE > SIZE32
08ca2aa3 1542 ai32 = 0;
f8e5a5db 1543#endif
aaec8192 1544 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1545 /* There should never be any byte swapping here. */
1546 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1547 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1548 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1549 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1550 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1551 if (!checksum)
6e449a3a 1552 mPUSHi(ai32);
73cb7263
NC
1553 else if (checksum > bits_in_uv)
1554 cdouble += (NV)ai32;
1555 else
1556 cuv += ai32;
068bd2e7
MHM
1557 }
1558 break;
a6ec74c1 1559 case 'p':
a6ec74c1 1560 while (len-- > 0) {
f7fe979e 1561 const char *aptr;
aaec8192 1562 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1563 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1564 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1565 }
1566 break;
1567 case 'w':
a6ec74c1
JH
1568 {
1569 UV auv = 0;
1570 U32 bytes = 0;
fc241834 1571
08ca2aa3
TH
1572 while (len > 0 && s < strend) {
1573 U8 ch;
f337b084 1574 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1575 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1576 /* UTF8_IS_XXXXX not right here because this is a BER, not
1577 * UTF-8 format - using constant 0x80 */
08ca2aa3 1578 if (ch < 0x80) {
a6ec74c1 1579 bytes = 0;
6e449a3a 1580 mPUSHu(auv);
a6ec74c1
JH
1581 len--;
1582 auv = 0;
08ca2aa3 1583 continue;
a6ec74c1 1584 }
08ca2aa3 1585 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1586 const char *t;
a6ec74c1 1587
f5992bc4 1588 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1589 while (s < strend) {
f337b084 1590 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1591 sv = mul128(sv, (U8)(ch & 0x7f));
1592 if (!(ch & 0x80)) {
a6ec74c1
JH
1593 bytes = 0;
1594 break;
1595 }
1596 }
10516c54 1597 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1598 while (*t == '0')
1599 t++;
1600 sv_chop(sv, t);
6e449a3a 1601 mPUSHs(sv);
a6ec74c1
JH
1602 len--;
1603 auv = 0;
1604 }
1605 }
1606 if ((s >= strend) && bytes)
49704364 1607 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1608 }
1609 break;
1610 case 'P':
49704364
LW
1611 if (symptr->howlen == e_star)
1612 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1613 EXTEND(SP, 1);
2d3e0934 1614 if (s + sizeof(char*) <= strend) {
08ca2aa3 1615 char *aptr;
aaec8192 1616 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1617 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1618 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1619 }
a6ec74c1 1620 break;
c174bf3b 1621#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1622 case 'q':
73cb7263 1623 while (len-- > 0) {
08ca2aa3 1624 Quad_t aquad;
aaec8192 1625 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1626 if (!checksum)
c174bf3b 1627 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1628 else if (checksum > bits_in_uv)
1629 cdouble += (NV)aquad;
1630 else
1631 cuv += aquad;
1632 }
a6ec74c1
JH
1633 break;
1634 case 'Q':
73cb7263 1635 while (len-- > 0) {
08ca2aa3 1636 Uquad_t auquad;
aaec8192 1637 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1638 if (!checksum)
c174bf3b 1639 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV)auquad;
1642 else
1643 cuv += auquad;
a6ec74c1
JH
1644 }
1645 break;
1640b983 1646#endif
a6ec74c1
JH
1647 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1648 case 'f':
73cb7263 1649 while (len-- > 0) {
08ca2aa3 1650 float afloat;
aaec8192 1651 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1652 if (!checksum)
6e449a3a 1653 mPUSHn(afloat);
08ca2aa3 1654 else
73cb7263 1655 cdouble += afloat;
fc241834 1656 }
a6ec74c1
JH
1657 break;
1658 case 'd':
73cb7263 1659 while (len-- > 0) {
08ca2aa3 1660 double adouble;
aaec8192 1661 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1662 if (!checksum)
6e449a3a 1663 mPUSHn(adouble);
08ca2aa3 1664 else
73cb7263 1665 cdouble += adouble;
fc241834 1666 }
a6ec74c1 1667 break;
92d41999 1668 case 'F':
73cb7263 1669 while (len-- > 0) {
275663fa 1670 NV_bytes anv;
aaec8192
NC
1671 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1672 datumtype, needs_swap);
08ca2aa3 1673 if (!checksum)
275663fa 1674 mPUSHn(anv.nv);
08ca2aa3 1675 else
275663fa 1676 cdouble += anv.nv;
fc241834 1677 }
92d41999
JH
1678 break;
1679#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1680 case 'D':
73cb7263 1681 while (len-- > 0) {
275663fa 1682 ld_bytes aldouble;
aaec8192
NC
1683 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1684 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1685 /* The most common long double format, the x86 80-bit
1686 * extended precision, has either 2 or 6 unused bytes,
1687 * which may contain garbage, which may contain
1688 * unintentional data. While we do zero the bytes of
1689 * the long double data in pack(), here in unpack() we
1690 * don't, because it's really hard to envision that
1691 * reading the long double off aldouble would be
e075ae47 1692 * affected by the unused bytes.
42262fd3
JH
1693 *
1694 * Note that trying to unpack 'long doubles' of 'long
1695 * doubles' packed in another system is in the general
1696 * case doomed without having more detail. */
08ca2aa3 1697 if (!checksum)
275663fa 1698 mPUSHn(aldouble.ld);
08ca2aa3 1699 else
275663fa 1700 cdouble += aldouble.ld;
92d41999
JH
1701 }
1702 break;
1703#endif
a6ec74c1 1704 case 'u':
858fe5e1 1705 if (!checksum) {
f7fe979e 1706 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1707 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1708 if (l) SvPOK_on(sv);
1709 }
08ca2aa3 1710
bee81d36
KW
1711 /* Note that all legal uuencoded strings are ASCII printables, so
1712 * have the same representation under UTF-8 vs not. This means we
1713 * can ignore UTF8ness on legal input. For illegal we stop at the
1714 * first failure, and don't report where/what that is, so again we
1715 * can ignore UTF8ness */
1716
9233b56b
KW
1717 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1718 I32 a, b, c, d;
1719 char hunk[3];
1720
1721 len = PL_uudmap[*(U8*)s++] & 077;
1722 while (len > 0) {
1723 if (s < strend && ISUUCHAR(*s))
1724 a = PL_uudmap[*(U8*)s++] & 077;
1725 else
1726 a = 0;
1727 if (s < strend && ISUUCHAR(*s))
1728 b = PL_uudmap[*(U8*)s++] & 077;
1729 else
1730 b = 0;
1731 if (s < strend && ISUUCHAR(*s))
1732 c = PL_uudmap[*(U8*)s++] & 077;
1733 else
1734 c = 0;
1735 if (s < strend && ISUUCHAR(*s))
1736 d = PL_uudmap[*(U8*)s++] & 077;
1737 else
1738 d = 0;
1739 hunk[0] = (char)((a << 2) | (b >> 4));
1740 hunk[1] = (char)((b << 4) | (c >> 2));
1741 hunk[2] = (char)((c << 6) | d);
1742 if (!checksum)
1743 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1744 len -= 3;
1745 }
1746 if (*s == '\n')
1747 s++;
1748 else /* possible checksum byte */
1749 if (s + 1 < strend && s[1] == '\n')
1750 s += 2;
1751 }
858fe5e1
TC
1752 if (!checksum)
1753 XPUSHs(sv);
a6ec74c1 1754 break;
99f862a1 1755 } /* End of switch */
49704364 1756
a6ec74c1 1757 if (checksum) {
1109a392 1758 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1759 (checksum > bits_in_uv &&
08ca2aa3
TH
1760 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1761 NV trouble, anv;
a6ec74c1 1762
08ca2aa3 1763 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1764 while (checksum >= 16) {
1765 checksum -= 16;
08ca2aa3 1766 anv *= 65536.0;
a6ec74c1 1767 }
a6ec74c1 1768 while (cdouble < 0.0)
08ca2aa3 1769 cdouble += anv;
09b94b1f
S
1770 cdouble = Perl_modf(cdouble / anv, &trouble);
1771#ifdef LONGDOUBLE_DOUBLEDOUBLE
1772 /* Workaround for powerpc doubledouble modfl bug:
1773 * close to 1.0L and -1.0L cdouble is 0, and trouble
1774 * is cdouble / anv. */
1775 if (trouble != Perl_ceil(trouble)) {
1776 cdouble = trouble;
1777 if (cdouble > 1.0L) cdouble -= 1.0L;
1778 if (cdouble < -1.0L) cdouble += 1.0L;
1779 }
1780#endif
1781 cdouble *= anv;
c4c5f44a 1782 sv = newSVnv(cdouble);
a6ec74c1
JH
1783 }
1784 else {
fa8ec7c1
NC
1785 if (checksum < bits_in_uv) {
1786 UV mask = ((UV)1 << checksum) - 1;
92d41999 1787 cuv &= mask;
a6ec74c1 1788 }
c4c5f44a 1789 sv = newSVuv(cuv);
a6ec74c1 1790 }
6e449a3a 1791 mXPUSHs(sv);
a6ec74c1
JH
1792 checksum = 0;
1793 }
fc241834 1794
49704364
LW
1795 if (symptr->flags & FLAG_SLASH){
1796 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1797 break;
49704364
LW
1798 if( next_symbol(symptr) ){
1799 if( symptr->howlen == e_number )
1800 Perl_croak(aTHX_ "Count after length/code in unpack" );
1801 if( beyond ){
1802 /* ...end of char buffer then no decent length available */
1803 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1804 } else {
1805 /* take top of stack (hope it's numeric) */
1806 len = POPi;
1807 if( len < 0 )
1808 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1809 }
1810 } else {
1811 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1812 }
1813 datumtype = symptr->code;
21c16052 1814 explicit_length = FALSE;
49704364
LW
1815 goto redo_switch;
1816 }
a6ec74c1 1817 }
49704364 1818
18529408
IZ
1819 if (new_s)
1820 *new_s = s;
1821 PUTBACK;
1822 return SP - PL_stack_base - start_sp_offset;
1823}
1824
1825PP(pp_unpack)
1826{
1827 dSP;
bab9c0ac 1828 dPOPPOPssrl;
18529408
IZ
1829 I32 gimme = GIMME_V;
1830 STRLEN llen;
1831 STRLEN rlen;
5c144d81
NC
1832 const char *pat = SvPV_const(left, llen);
1833 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1834 const char *strend = s + rlen;
1835 const char *patend = pat + llen;
08ca2aa3 1836 I32 cnt;
18529408
IZ
1837
1838 PUTBACK;
7accc089 1839 cnt = unpackstring(pat, patend, s, strend,
49704364 1840 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1841 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1842
18529408
IZ
1843 SPAGAIN;
1844 if ( !cnt && gimme == G_SCALAR )
1845 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1846 RETURN;
1847}
1848
f337b084 1849STATIC U8 *
e68aed92 1850doencodes(U8 *h, const U8 *s, I32 len)
a6ec74c1 1851{
f337b084 1852 *h++ = PL_uuemap[len];
a6ec74c1 1853 while (len > 2) {
f337b084
TH
1854 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1855 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1856 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1857 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1858 s += 3;
1859 len -= 3;
1860 }
1861 if (len > 0) {
e68aed92 1862 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1863 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1864 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1865 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1866 *h++ = PL_uuemap[0];
a6ec74c1 1867 }
f337b084
TH
1868 *h++ = '\n';
1869 return h;
a6ec74c1
JH
1870}
1871
1872STATIC SV *
f7fe979e 1873S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1874{
8b6e33c7
AL
1875 SV *result = newSVpvn(s, l);
1876 char *const result_c = SvPV_nolen(result); /* convenience */
1877 char *out = result_c;
1878 bool skip = 1;
1879 bool ignore = 0;
a6ec74c1 1880
7918f24d
NC
1881 PERL_ARGS_ASSERT_IS_AN_INT;
1882
a6ec74c1
JH
1883 while (*s) {
1884 switch (*s) {
1885 case ' ':
1886 break;
1887 case '+':
1888 if (!skip) {
1889 SvREFCNT_dec(result);
1890 return (NULL);
1891 }
1892 break;
1893 case '0':
1894 case '1':
1895 case '2':
1896 case '3':
1897 case '4':
1898 case '5':
1899 case '6':
1900 case '7':
1901 case '8':
1902 case '9':
1903 skip = 0;
1904 if (!ignore) {
1905 *(out++) = *s;
1906 }
1907 break;
1908 case '.':
1909 ignore = 1;
1910 break;
1911 default:
1912 SvREFCNT_dec(result);
1913 return (NULL);
1914 }
1915 s++;
1916 }
1917 *(out++) = '\0';
1918 SvCUR_set(result, out - result_c);
1919 return (result);
1920}
1921
1922/* pnum must be '\0' terminated */
1923STATIC int
1924S_div128(pTHX_ SV *pnum, bool *done)
1925{
8b6e33c7
AL
1926 STRLEN len;
1927 char * const s = SvPV(pnum, len);
1928 char *t = s;
1929 int m = 0;
1930
7918f24d
NC
1931 PERL_ARGS_ASSERT_DIV128;
1932
8b6e33c7
AL
1933 *done = 1;
1934 while (*t) {
1935 const int i = m * 10 + (*t - '0');
1936 const int r = (i >> 7); /* r < 10 */
1937 m = i & 0x7F;
1938 if (r) {
1939 *done = 0;
1940 }
1941 *(t++) = '0' + r;
a6ec74c1 1942 }
8b6e33c7
AL
1943 *(t++) = '\0';
1944 SvCUR_set(pnum, (STRLEN) (t - s));
1945 return (m);
a6ec74c1
JH
1946}
1947
18529408 1948/*
7accc089
JH
1949=for apidoc packlist
1950
796b6530 1951The engine implementing C<pack()> Perl function.
7accc089 1952
bfce84ec
AL
1953=cut
1954*/
7accc089
JH
1955
1956void
5aaab254 1957Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1958{
aadb217d
JH
1959 tempsym_t sym;
1960
7918f24d
NC
1961 PERL_ARGS_ASSERT_PACKLIST;
1962
f7fe979e 1963 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1964
f337b084
TH
1965 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1966 Also make sure any UTF8 flag is loaded */
56eb0262 1967 SvPV_force_nolen(cat);
bfce84ec
AL
1968 if (DO_UTF8(cat))
1969 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1970
49704364
LW
1971 (void)pack_rec( cat, &sym, beglist, endlist );
1972}
1973
f337b084
TH
1974/* like sv_utf8_upgrade, but also repoint the group start markers */
1975STATIC void
1976marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1977 STRLEN len;
1978 tempsym_t *group;
f7fe979e
AL
1979 const char *from_ptr, *from_start, *from_end, **marks, **m;
1980 char *to_start, *to_ptr;
f337b084
TH
1981
1982 if (SvUTF8(sv)) return;
1983
aa07b2f6 1984 from_start = SvPVX_const(sv);
f337b084
TH
1985 from_end = from_start + SvCUR(sv);
1986 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 1987 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
1988 if (from_ptr == from_end) {
1989 /* Simple case: no character needs to be changed */
1990 SvUTF8_on(sv);
1991 return;
1992 }
1993
3473cf63 1994 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 1995 Newx(to_start, len, char);
f337b084
TH
1996 Copy(from_start, to_start, from_ptr-from_start, char);
1997 to_ptr = to_start + (from_ptr-from_start);
1998
a02a5408 1999 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2000 for (group=sym_ptr; group; group = group->previous)
2001 marks[group->level] = from_start + group->strbeg;
2002 marks[sym_ptr->level+1] = from_end+1;
2003 for (m = marks; *m < from_ptr; m++)
2004 *m = to_start + (*m-from_start);
2005
2006 for (;from_ptr < from_end; from_ptr++) {
2007 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2008 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2009 }
2010 *to_ptr = 0;
2011
2012 while (*m == from_ptr) *m++ = to_ptr;
2013 if (m != marks + sym_ptr->level+1) {
2014 Safefree(marks);
2015 Safefree(to_start);
5637ef5b
NC
2016 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2017 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2018 }
2019 for (group=sym_ptr; group; group = group->previous)
2020 group->strbeg = marks[group->level] - to_start;
2021 Safefree(marks);
2022
2023 if (SvOOK(sv)) {
2024 if (SvIVX(sv)) {
b162af07 2025 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2026 from_start -= SvIVX(sv);
2027 SvIV_set(sv, 0);
2028 }
2029 SvFLAGS(sv) &= ~SVf_OOK;
2030 }
2031 if (SvLEN(sv) != 0)
2032 Safefree(from_start);
f880fe2f 2033 SvPV_set(sv, to_start);
b162af07
SP
2034 SvCUR_set(sv, to_ptr - to_start);
2035 SvLEN_set(sv, len);
f337b084
TH
2036 SvUTF8_on(sv);
2037}
2038
2039/* Exponential string grower. Makes string extension effectively O(n)
2040 needed says how many extra bytes we need (not counting the final '\0')
2041 Only grows the string if there is an actual lack of space
2042*/
2043STATIC char *
0bd48802 2044S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2045 const STRLEN cur = SvCUR(sv);
2046 const STRLEN len = SvLEN(sv);
f337b084 2047 STRLEN extend;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_SV_EXP_GROW;
2050
f337b084
TH
2051 if (len - cur > needed) return SvPVX(sv);
2052 extend = needed > len ? needed : len;
2053 return SvGROW(sv, len+extend+1);
2054}
49704364 2055
93f6e112 2056static SV *
b197e565 2057S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2058{
2059 SvGETMAGIC(sv);
93f6e112
FC
2060 if (UNLIKELY(SvAMAGIC(sv)))
2061 sv = sv_2num(sv);
99f450cc 2062 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2063 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2064 const NV nv = SvNV_nomg(sv);
2065 if (c == 'w')
2066 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2067 else
2068 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2069 }
93f6e112 2070 return sv;
354b74ae
FC
2071}
2072
93f6e112
FC
2073#define SvIV_no_inf(sv,d) \
2074 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2075#define SvUV_no_inf(sv,d) \
2076 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2077
49704364
LW
2078STATIC
2079SV **
f337b084 2080S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2081{
49704364 2082 tempsym_t lookahead;
f337b084
TH
2083 I32 items = endlist - beglist;
2084 bool found = next_symbol(symptr);
2085 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2086 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2087 char* from;
f337b084 2088
7918f24d
NC
2089 PERL_ARGS_ASSERT_PACK_REC;
2090
f337b084
TH
2091 if (symptr->level == 0 && found && symptr->code == 'U') {
2092 marked_upgrade(aTHX_ cat, symptr);
2093 symptr->flags |= FLAG_DO_UTF8;
2094 utf8 = 0;
49704364 2095 }
f337b084 2096 symptr->strbeg = SvCUR(cat);
49704364
LW
2097
2098 while (found) {
f337b084
TH
2099 SV *fromstr;
2100 STRLEN fromlen;
2101 I32 len;
a0714e2c 2102 SV *lengthcode = NULL;
49704364 2103 I32 datumtype = symptr->code;
f337b084
TH
2104 howlen_t howlen = symptr->howlen;
2105 char *start = SvPVX(cat);
2106 char *cur = start + SvCUR(cat);
a1219b5e 2107 bool needs_swap;
49704364 2108
fc1bb3f2 2109#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2110#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2111
2112 switch (howlen) {
fc241834 2113 case e_star:
f337b084
TH
2114 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2115 0 : items;
2116 break;
2117 default:
2118 /* e_no_len and e_number */
2119 len = symptr->length;
49704364
LW
2120 break;
2121 }
2122
f337b084 2123 if (len) {
a7a3cfaa 2124 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2125
a7a3cfaa
TH
2126 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2127 /* We can process this letter. */
2128 STRLEN size = props & PACK_SIZE_MASK;
2129 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2130 }
f337b084
TH
2131 }
2132
49704364
LW
2133 /* Look ahead for next symbol. Do we have code/code? */
2134 lookahead = *symptr;
2135 found = next_symbol(&lookahead);
246f24af
TH
2136 if (symptr->flags & FLAG_SLASH) {
2137 IV count;
f337b084 2138 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2139 if (strchr("aAZ", lookahead.code)) {
2140 if (lookahead.howlen == e_number) count = lookahead.length;
2141 else {
ce399ba6 2142 if (items > 0) {
48a5da33 2143 count = sv_len_utf8(*beglist);
ce399ba6 2144 }
246f24af
TH
2145 else count = 0;
2146 if (lookahead.code == 'Z') count++;
2147 }
2148 } else {
2149 if (lookahead.howlen == e_number && lookahead.length < items)
2150 count = lookahead.length;
2151 else count = items;
2152 }
2153 lookahead.howlen = e_number;
2154 lookahead.length = count;
2155 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2156 }
49704364 2157
a1219b5e
NC
2158 needs_swap = NEEDS_SWAP(datumtype);
2159
fc241834
RGS
2160 /* Code inside the switch must take care to properly update
2161 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2162 doesn't simply leave using break */
0c7df902 2163 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2164 default:
f337b084
TH
2165 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2166 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2167 case '%':
49704364 2168 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2169
28be1210 2170 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2171 case '.':
2172 if (howlen == e_star) from = start;
2173 else if (len == 0) from = cur;
2174 else {
2175 tempsym_t *group = symptr;
2176
2177 while (--len && group) group = group->previous;
2178 from = group ? start + group->strbeg : start;
2179 }
2180 fromstr = NEXTFROM;
354b74ae 2181 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2182 goto resize;
28be1210 2183 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2184 case '@':
28be1210
TH
2185 from = start + symptr->strbeg;
2186 resize:
28be1210 2187 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2188 if (len >= 0) {
2189 while (len && from < cur) {
2190 from += UTF8SKIP(from);
2191 len--;
2192 }
2193 if (from > cur)
2194 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2195 if (len) {
2196 /* Here we know from == cur */
2197 grow:
2198 GROWING(0, cat, start, cur, len);
2199 Zero(cur, len, char);
2200 cur += len;
2201 } else if (from < cur) {
2202 len = cur - from;
2203 goto shrink;
2204 } else goto no_change;
2205 } else {
2206 cur = from;
2207 len = -len;
2208 goto utf8_shrink;
f337b084 2209 }
28be1210
TH
2210 else {
2211 len -= cur - from;
f337b084 2212 if (len > 0) goto grow;
28be1210 2213 if (len == 0) goto no_change;
fc241834 2214 len = -len;
28be1210 2215 goto shrink;
f337b084 2216 }
a6ec74c1 2217 break;
81d52ecd 2218
fc241834 2219 case '(': {
49704364 2220 tempsym_t savsym = *symptr;
66c611c5
MHM
2221 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2222 symptr->flags |= group_modifiers;
49704364
LW
2223 symptr->patend = savsym.grpend;
2224 symptr->level++;
f337b084 2225 symptr->previous = &lookahead;
18529408 2226 while (len--) {
f337b084
TH
2227 U32 was_utf8;
2228 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2229 else symptr->flags &= ~FLAG_PARSE_UTF8;
2230 was_utf8 = SvUTF8(cat);
49704364 2231 symptr->patptr = savsym.grpbeg;
f337b084
TH
2232 beglist = pack_rec(cat, symptr, beglist, endlist);
2233 if (SvUTF8(cat) != was_utf8)
2234 /* This had better be an upgrade while in utf8==0 mode */
2235 utf8 = 1;
2236
49704364 2237 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2238 break; /* No way to continue */
2239 }
ee790063 2240 items = endlist - beglist;
f337b084
TH
2241 lookahead.flags = symptr->flags & ~group_modifiers;
2242 goto no_change;
18529408 2243 }
62f95557
IZ
2244 case 'X' | TYPE_IS_SHRIEKING:
2245 if (!len) /* Avoid division by 0 */
2246 len = 1;
f337b084
TH
2247 if (utf8) {
2248 char *hop, *last;
2249 I32 l = len;
2250 hop = last = start;
2251 while (hop < cur) {
2252 hop += UTF8SKIP(hop);
2253 if (--l == 0) {
2254 last = hop;
2255 l = len;
2256 }
2257 }
2258 if (last > cur)
2259 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2260 cur = last;
2261 break;
2262 }
2263 len = (cur-start) % len;
924ba076 2264 /* FALLTHROUGH */
a6ec74c1 2265 case 'X':
f337b084
TH
2266 if (utf8) {
2267 if (len < 1) goto no_change;
28be1210 2268 utf8_shrink:
f337b084
TH
2269 while (len > 0) {
2270 if (cur <= start)
28be1210
TH
2271 Perl_croak(aTHX_ "'%c' outside of string in pack",
2272 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2273 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2274 if (cur <= start)
28be1210
TH
2275 Perl_croak(aTHX_ "'%c' outside of string in pack",
2276 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2277 }
2278 len--;
2279 }
2280 } else {
fc241834 2281 shrink:
f337b084 2282 if (cur - start < len)
28be1210
TH
2283 Perl_croak(aTHX_ "'%c' outside of string in pack",
2284 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2285 cur -= len;
2286 }
2287 if (cur < start+symptr->strbeg) {
2288 /* Make sure group starts don't point into the void */
2289 tempsym_t *group;
9e27e96a 2290 const STRLEN length = cur-start;
f337b084
TH
2291 for (group = symptr;
2292 group && length < group->strbeg;
2293 group = group->previous) group->strbeg = length;
2294 lookahead.strbeg = length;
2295 }
a6ec74c1 2296 break;
fc241834
RGS
2297 case 'x' | TYPE_IS_SHRIEKING: {
2298 I32 ai32;
62f95557
IZ
2299 if (!len) /* Avoid division by 0 */
2300 len = 1;
230e1fce 2301 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2302 else ai32 = (cur - start) % len;
2303 if (ai32 == 0) goto no_change;
2304 len -= ai32;
2305 }
924ba076 2306 /* FALLTHROUGH */
a6ec74c1 2307 case 'x':
f337b084 2308 goto grow;
a6ec74c1
JH
2309 case 'A':
2310 case 'Z':
f337b084 2311 case 'a': {
f7fe979e 2312 const char *aptr;
f337b084 2313
a6ec74c1 2314 fromstr = NEXTFROM;
e62f0680 2315 aptr = SvPV_const(fromstr, fromlen);
f337b084 2316 if (DO_UTF8(fromstr)) {
f7fe979e 2317 const char *end, *s;
f337b084
TH
2318
2319 if (!utf8 && !SvUTF8(cat)) {
2320 marked_upgrade(aTHX_ cat, symptr);
2321 lookahead.flags |= FLAG_DO_UTF8;
2322 lookahead.strbeg = symptr->strbeg;
2323 utf8 = 1;
2324 start = SvPVX(cat);
2325 cur = start + SvCUR(cat);
2326 }
fc241834 2327 if (howlen == e_star) {
f337b084
TH
2328 if (utf8) goto string_copy;
2329 len = fromlen+1;
2330 }
2331 s = aptr;
2332 end = aptr + fromlen;
2333 fromlen = datumtype == 'Z' ? len-1 : len;
2334 while ((I32) fromlen > 0 && s < end) {
2335 s += UTF8SKIP(s);
2336 fromlen--;
2337 }
2338 if (s > end)
2339 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2340 if (utf8) {
fc241834 2341 len = fromlen;
f337b084
TH
2342 if (datumtype == 'Z') len++;
2343 fromlen = s-aptr;
2344 len += fromlen;
fc241834 2345
f337b084 2346 goto string_copy;
fc241834 2347 }
f337b084
TH
2348 fromlen = len - fromlen;
2349 if (datumtype == 'Z') fromlen--;
2350 if (howlen == e_star) {
2351 len = fromlen;
2352 if (datumtype == 'Z') len++;
fc241834 2353 }
f337b084 2354 GROWING(0, cat, start, cur, len);
9df874cd 2355 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2356 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2357 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2358 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2359 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2360 cur += fromlen;
a6ec74c1 2361 len -= fromlen;
f337b084
TH
2362 } else if (utf8) {
2363 if (howlen == e_star) {
2364 len = fromlen;
2365 if (datumtype == 'Z') len++;
a6ec74c1 2366 }
f337b084
TH
2367 if (len <= (I32) fromlen) {
2368 fromlen = len;
2369 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2370 }
fc241834 2371 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2372 upgrade, so:
2373 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2374 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2375 len -= fromlen;
2376 while (fromlen > 0) {
230e1fce 2377 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2378 aptr++;
2379 fromlen--;
fc241834 2380 }
f337b084
TH
2381 } else {
2382 string_copy:
2383 if (howlen == e_star) {
2384 len = fromlen;
2385 if (datumtype == 'Z') len++;
2386 }
2387 if (len <= (I32) fromlen) {
2388 fromlen = len;
2389 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2390 }
f337b084
TH
2391 GROWING(0, cat, start, cur, len);
2392 Copy(aptr, cur, fromlen, char);
2393 cur += fromlen;
2394 len -= fromlen;
a6ec74c1 2395 }
f337b084
TH
2396 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2397 cur += len;
3c4fb04a 2398 SvTAINT(cat);
a6ec74c1 2399 break;
f337b084 2400 }
a6ec74c1 2401 case 'B':
f337b084 2402 case 'b': {
b83604b4 2403 const char *str, *end;
f337b084
TH
2404 I32 l, field_len;
2405 U8 bits;
2406 bool utf8_source;
2407 U32 utf8_flags;
a6ec74c1 2408
fc241834 2409 fromstr = NEXTFROM;
b83604b4 2410 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2411 end = str + fromlen;
2412 if (DO_UTF8(fromstr)) {
2413 utf8_source = TRUE;
041457d9 2414 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2415 } else {
2416 utf8_source = FALSE;
2417 utf8_flags = 0; /* Unused, but keep compilers happy */
2418 }
2419 if (howlen == e_star) len = fromlen;
2420 field_len = (len+7)/8;
2421 GROWING(utf8, cat, start, cur, field_len);
2422 if (len > (I32)fromlen) len = fromlen;
2423 bits = 0;
2424 l = 0;
2425 if (datumtype == 'B')
2426 while (l++ < len) {
2427 if (utf8_source) {
95b63a38 2428 UV val = 0;
f337b084
TH
2429 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2430 bits |= val & 1;
2431 } else bits |= *str++ & 1;
2432 if (l & 7) bits <<= 1;
fc241834 2433 else {
f337b084
TH
2434 PUSH_BYTE(utf8, cur, bits);
2435 bits = 0;
a6ec74c1
JH
2436 }
2437 }
f337b084
TH
2438 else
2439 /* datumtype == 'b' */
2440 while (l++ < len) {
2441 if (utf8_source) {
95b63a38 2442 UV val = 0;
f337b084
TH
2443 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2444 if (val & 1) bits |= 0x80;
2445 } else if (*str++ & 1)
2446 bits |= 0x80;
2447 if (l & 7) bits >>= 1;
fc241834 2448 else {
f337b084
TH
2449 PUSH_BYTE(utf8, cur, bits);
2450 bits = 0;
a6ec74c1
JH
2451 }
2452 }
f337b084
TH
2453 l--;
2454 if (l & 7) {
fc241834 2455 if (datumtype == 'B')
f337b084 2456 bits <<= 7 - (l & 7);
fc241834 2457 else
f337b084
TH
2458 bits >>= 7 - (l & 7);
2459 PUSH_BYTE(utf8, cur, bits);
2460 l += 7;
a6ec74c1 2461 }
f337b084
TH
2462 /* Determine how many chars are left in the requested field */
2463 l /= 8;
2464 if (howlen == e_star) field_len = 0;
2465 else field_len -= l;
2466 Zero(cur, field_len, char);
2467 cur += field_len;
a6ec74c1 2468 break;
f337b084 2469 }
a6ec74c1 2470 case 'H':
f337b084 2471 case 'h': {
10516c54 2472 const char *str, *end;
f337b084
TH
2473 I32 l, field_len;
2474 U8 bits;
2475 bool utf8_source;
2476 U32 utf8_flags;
a6ec74c1 2477
fc241834 2478 fromstr = NEXTFROM;
10516c54 2479 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2480 end = str + fromlen;
2481 if (DO_UTF8(fromstr)) {
2482 utf8_source = TRUE;
041457d9 2483 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2484 } else {
2485 utf8_source = FALSE;
2486 utf8_flags = 0; /* Unused, but keep compilers happy */
2487 }
2488 if (howlen == e_star) len = fromlen;
2489 field_len = (len+1)/2;
2490 GROWING(utf8, cat, start, cur, field_len);
0403a1ad 2491 if (!utf8_source && len > (I32)fromlen) len = fromlen;
f337b084
TH
2492 bits = 0;
2493 l = 0;
2494 if (datumtype == 'H')
2495 while (l++ < len) {
2496 if (utf8_source) {
95b63a38 2497 UV val = 0;
f337b084
TH
2498 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2499 if (val < 256 && isALPHA(val))
2500 bits |= (val + 9) & 0xf;
a6ec74c1 2501 else
f337b084
TH
2502 bits |= val & 0xf;
2503 } else if (isALPHA(*str))
2504 bits |= (*str++ + 9) & 0xf;
2505 else
2506 bits |= *str++ & 0xf;
2507 if (l & 1) bits <<= 4;
fc241834 2508 else {
f337b084
TH
2509 PUSH_BYTE(utf8, cur, bits);
2510 bits = 0;
a6ec74c1
JH
2511 }
2512 }
f337b084
TH
2513 else
2514 while (l++ < len) {
2515 if (utf8_source) {
95b63a38 2516 UV val = 0;
f337b084
TH
2517 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2518 if (val < 256 && isALPHA(val))
2519 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2520 else
f337b084
TH
2521 bits |= (val & 0xf) << 4;
2522 } else if (isALPHA(*str))
2523 bits |= ((*str++ + 9) & 0xf) << 4;
2524 else
2525 bits |= (*str++ & 0xf) << 4;
2526 if (l & 1) bits >>= 4;
fc241834 2527 else {
f337b084
TH
2528 PUSH_BYTE(utf8, cur, bits);
2529 bits = 0;
a6ec74c1 2530 }
fc241834 2531 }
f337b084
TH
2532 l--;
2533 if (l & 1) {
2534 PUSH_BYTE(utf8, cur, bits);
2535 l++;
2536 }
2537 /* Determine how many chars are left in the requested field */
2538 l /= 2;
2539 if (howlen == e_star) field_len = 0;
2540 else field_len -= l;
2541 Zero(cur, field_len, char);
2542 cur += field_len;
2543 break;
fc241834
RGS
2544 }
2545 case 'c':
f337b084
TH
2546 while (len-- > 0) {
2547 IV aiv;
2548 fromstr = NEXTFROM;
354b74ae 2549 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2550 if ((-128 > aiv || aiv > 127))
2551 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2552 "Character in 'c' format wrapped in pack");
585ec06d 2553 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2554 }
2555 break;
2556 case 'C':
f337b084
TH
2557 if (len == 0) {
2558 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2559 break;
2560 }
a6ec74c1 2561 while (len-- > 0) {
f337b084 2562 IV aiv;
a6ec74c1 2563 fromstr = NEXTFROM;
354b74ae 2564 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2565 if ((0 > aiv || aiv > 0xff))
2566 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2567 "Character in 'C' format wrapped in pack");
1651fc44 2568 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2569 }
fc241834
RGS
2570 break;
2571 case 'W': {
2572 char *end;
670f1322 2573 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2574
2575 end = start+SvLEN(cat)-1;
2576 if (utf8) end -= UTF8_MAXLEN-1;
2577 while (len-- > 0) {
2578 UV auv;
2579 fromstr = NEXTFROM;
354b74ae 2580 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2581 if (in_bytes) auv = auv % 0x100;
2582 if (utf8) {
2583 W_utf8:
2584 if (cur > end) {
2585 *cur = '\0';
b162af07 2586 SvCUR_set(cat, cur - start);
fc241834
RGS
2587
2588 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2589 end = start+SvLEN(cat)-UTF8_MAXLEN;
2590 }
c80e42f3
KW
2591 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2592 auv,
041457d9 2593 warn_utf8 ?
230e1fce 2594 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2595 } else {
2596 if (auv >= 0x100) {
2597 if (!SvUTF8(cat)) {
2598 *cur = '\0';
b162af07 2599 SvCUR_set(cat, cur - start);
fc241834
RGS
2600 marked_upgrade(aTHX_ cat, symptr);
2601 lookahead.flags |= FLAG_DO_UTF8;
2602 lookahead.strbeg = symptr->strbeg;
2603 utf8 = 1;
2604 start = SvPVX(cat);
2605 cur = start + SvCUR(cat);
2606 end = start+SvLEN(cat)-UTF8_MAXLEN;
2607 goto W_utf8;
2608 }
a2a5de95
NC
2609 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2610 "Character in 'W' format wrapped in pack");
fc241834
RGS
2611 auv &= 0xff;
2612 }
2613 if (cur >= end) {
2614 *cur = '\0';
b162af07 2615 SvCUR_set(cat, cur - start);
fc241834
RGS
2616 GROWING(0, cat, start, cur, len+1);
2617 end = start+SvLEN(cat)-1;
2618 }
fe2774ed 2619 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2620 }
2621 }
2622 break;
fc241834
RGS
2623 }
2624 case 'U': {
2625 char *end;
2626
2627 if (len == 0) {
2628 if (!(symptr->flags & FLAG_DO_UTF8)) {
2629 marked_upgrade(aTHX_ cat, symptr);
2630 lookahead.flags |= FLAG_DO_UTF8;
2631 lookahead.strbeg = symptr->strbeg;
2632 }
2633 utf8 = 0;
2634 goto no_change;
2635 }
2636
2637 end = start+SvLEN(cat);
2638 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2639 while (len-- > 0) {
fc241834 2640 UV auv;
a6ec74c1 2641 fromstr = NEXTFROM;
354b74ae 2642 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2643 if (utf8) {
230e1fce 2644 U8 buffer[UTF8_MAXLEN], *endb;
3ece276e 2645 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
041457d9 2646 warn_utf8 ?
fc241834
RGS
2647 0 : UNICODE_ALLOW_ANY);
2648 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2649 *cur = '\0';
b162af07 2650 SvCUR_set(cat, cur - start);
fc241834
RGS
2651 GROWING(0, cat, start, cur,
2652 len+(endb-buffer)*UTF8_EXPAND);
2653 end = start+SvLEN(cat);
2654 }
d21d7215 2655 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2656 } else {
2657 if (cur >= end) {
2658 *cur = '\0';
b162af07 2659 SvCUR_set(cat, cur - start);
fc241834
RGS
2660 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2661 end = start+SvLEN(cat)-UTF8_MAXLEN;
2662 }
3ece276e 2663 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
041457d9 2664 warn_utf8 ?
230e1fce 2665 0 : UNICODE_ALLOW_ANY);
fc241834 2666 }
a6ec74c1 2667 }
a6ec74c1 2668 break;
fc241834 2669 }
a6ec74c1
JH
2670 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2671 case 'f':
a6ec74c1 2672 while (len-- > 0) {
f337b084
TH
2673 float afloat;
2674 NV anv;
a6ec74c1 2675 fromstr = NEXTFROM;
f337b084 2676 anv = SvNV(fromstr);
85bba25f 2677# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2678 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2679 * on Alpha; fake it if we don't have them.
2680 */
f337b084 2681 if (anv > FLT_MAX)
fc241834 2682 afloat = FLT_MAX;
f337b084 2683 else if (anv < -FLT_MAX)
fc241834 2684 afloat = -FLT_MAX;
f337b084 2685 else afloat = (float)anv;
baf3cf9c 2686# else
d22fb6ca
DD
2687#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2688 if(Perl_isnan(anv))
2689 afloat = (float)NV_NAN;
2690 else
2691#endif
919894b7
DM
2692 /* a simple cast to float is undefined if outside
2693 * the range of values that can be represented */
2694 afloat = (float)(anv > FLT_MAX ? NV_INF :
2695 anv < -FLT_MAX ? -NV_INF : anv);
baf3cf9c 2696# endif
3a88beaa 2697 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2698 }
2699 break;
2700 case 'd':
a6ec74c1 2701 while (len-- > 0) {
f337b084
TH
2702 double adouble;
2703 NV anv;
a6ec74c1 2704 fromstr = NEXTFROM;
f337b084 2705 anv = SvNV(fromstr);
85bba25f 2706# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2707 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2708 * on Alpha; fake it if we don't have them.
2709 */
f337b084 2710 if (anv > DBL_MAX)
fc241834 2711 adouble = DBL_MAX;
f337b084 2712 else if (anv < -DBL_MAX)
fc241834 2713 adouble = -DBL_MAX;
f337b084 2714 else adouble = (double)anv;
baf3cf9c 2715# else
f337b084 2716 adouble = (double)anv;
baf3cf9c 2717# endif
3a88beaa 2718 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2719 }
2720 break;
fc241834 2721 case 'F': {
275663fa 2722 NV_bytes anv;
1109a392 2723 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2724 while (len-- > 0) {
2725 fromstr = NEXTFROM;
cd07c537
DM
2726#ifdef __GNUC__
2727 /* to work round a gcc/x86 bug; don't use SvNV */
2728 anv.nv = sv_2nv(fromstr);
070e2677
TC
2729# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2730 && LONG_DOUBLESIZE > 10
2731 /* GCC sometimes overwrites the padding in the
2732 assignment above */
2733 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2734# endif
cd07c537 2735#else
275663fa 2736 anv.nv = SvNV(fromstr);
cd07c537 2737#endif
3a88beaa 2738 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2739 }
2740 break;
fc241834 2741 }
92d41999 2742#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2743 case 'D': {
275663fa 2744 ld_bytes aldouble;
1109a392
MHM
2745 /* long doubles can have unused bits, which may be nonzero */
2746 Zero(&aldouble, 1, long double);
92d41999
JH
2747 while (len-- > 0) {
2748 fromstr = NEXTFROM;
cd07c537
DM
2749# ifdef __GNUC__
2750 /* to work round a gcc/x86 bug; don't use SvNV */
2751 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2752# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2753 /* GCC sometimes overwrites the padding in the
2754 assignment above */
2755 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2756# endif
cd07c537 2757# else
275663fa 2758 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2759# endif
3a88beaa
NC
2760 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2761 needs_swap);
92d41999
JH
2762 }
2763 break;
fc241834 2764 }
92d41999 2765#endif
068bd2e7 2766 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2767 case 'n':
2768 while (len-- > 0) {
f337b084 2769 I16 ai16;
a6ec74c1 2770 fromstr = NEXTFROM;
354b74ae 2771 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2772 ai16 = PerlSock_htons(ai16);
3a88beaa 2773 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2774 }
2775 break;
068bd2e7 2776 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2777 case 'v':
2778 while (len-- > 0) {
f337b084 2779 I16 ai16;
a6ec74c1 2780 fromstr = NEXTFROM;
354b74ae 2781 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2782 ai16 = htovs(ai16);
3a88beaa 2783 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2784 }
2785 break;
49704364 2786 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2787#if SHORTSIZE != SIZE16
fc241834 2788 while (len-- > 0) {
f337b084 2789 unsigned short aushort;
fc241834 2790 fromstr = NEXTFROM;
354b74ae 2791 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2792 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2793 }
49704364
LW
2794 break;
2795#else
924ba076 2796 /* FALLTHROUGH */
a6ec74c1 2797#endif
49704364 2798 case 'S':
fc241834 2799 while (len-- > 0) {
f337b084 2800 U16 au16;
fc241834 2801 fromstr = NEXTFROM;
354b74ae 2802 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2803 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2804 }
2805 break;
49704364 2806 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2807#if SHORTSIZE != SIZE16
fc241834 2808 while (len-- > 0) {
f337b084 2809 short ashort;
fc241834 2810 fromstr = NEXTFROM;
354b74ae 2811 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2812 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2813 }
49704364
LW
2814 break;
2815#else
924ba076 2816 /* FALLTHROUGH */
a6ec74c1 2817#endif
49704364
LW
2818 case 's':
2819 while (len-- > 0) {
f337b084 2820 I16 ai16;
49704364 2821 fromstr = NEXTFROM;
354b74ae 2822 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2823 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2824 }
2825 break;
2826 case 'I':
49704364 2827 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2828 while (len-- > 0) {
f337b084 2829 unsigned int auint;
a6ec74c1 2830 fromstr = NEXTFROM;
354b74ae 2831 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2832 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2833 }
2834 break;
92d41999
JH
2835 case 'j':
2836 while (len-- > 0) {
f337b084 2837 IV aiv;
92d41999 2838 fromstr = NEXTFROM;
354b74ae 2839 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2840 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2841 }
2842 break;
2843 case 'J':
2844 while (len-- > 0) {
f337b084 2845 UV auv;
92d41999 2846 fromstr = NEXTFROM;
354b74ae 2847 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2848 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2849 }
2850 break;
a6ec74c1
JH
2851 case 'w':
2852 while (len-- > 0) {
f337b084 2853 NV anv;
a6ec74c1 2854 fromstr = NEXTFROM;
83388ae2 2855 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2856 anv = SvNV_nomg(fromstr);
a6ec74c1 2857
f337b084
TH
2858 if (anv < 0) {
2859 *cur = '\0';
b162af07 2860 SvCUR_set(cat, cur - start);
49704364 2861 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2862 }
a6ec74c1 2863
196b62db
NC
2864 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2865 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2866 any negative IVs will have already been got by the croak()
2867 above. IOK is untrue for fractions, so we test them
2868 against UV_MAX_P1. */
f337b084
TH
2869 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2870 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2871 char *in = buf + sizeof(buf);
1e9a122e 2872 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2873
2874 do {
eb160463 2875 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2876 auv >>= 7;
2877 } while (auv);
2878 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2879 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2880 in, (buf + sizeof(buf)) - in);
2881 } else if (SvPOKp(fromstr))
2882 goto w_string;
a6ec74c1 2883 else if (SvNOKp(fromstr)) {
0258719b 2884 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2885 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2886 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2887 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2888 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2889 Some C compilers are strict about integral constant
2890 expressions so we conservatively divide by a slightly
2891 smaller integer instead of multiplying by the exact
2892 floating-point value.
0258719b
NC
2893 */
2894#ifdef NV_MAX_10_EXP
f337b084 2895 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2896 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2897#else
f337b084 2898 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2899 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2900#endif
a6ec74c1
JH
2901 char *in = buf + sizeof(buf);
2902
8b6e33c7 2903 anv = Perl_floor(anv);
a6ec74c1 2904 do {
8b6e33c7 2905 const NV next = Perl_floor(anv / 128);
a6ec74c1 2906 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2907 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2908 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2909 anv = next;
2910 } while (anv > 0);
a6ec74c1 2911 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2912 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2913 in, (buf + sizeof(buf)) - in);
2914 } else {
8b6e33c7
AL
2915 const char *from;
2916 char *result, *in;
735b914b
JH
2917 SV *norm;
2918 STRLEN len;
2919 bool done;
2920
f337b084 2921 w_string:
735b914b 2922 /* Copy string and check for compliance */
1e9a122e 2923 from = SvPV_nomg_const(fromstr, len);
735b914b 2924 if ((norm = is_an_int(from, len)) == NULL)
49704364 2925 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2926
a02a5408 2927 Newx(result, len, char);
735b914b
JH
2928 in = result + len;
2929 done = FALSE;
f337b084 2930 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2931 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2932 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2933 in, (result + len) - in);
735b914b
JH
2934 Safefree(result);
2935 SvREFCNT_dec(norm); /* free norm */
fc241834 2936 }
a6ec74c1
JH
2937 }
2938 break;
2939 case 'i':
49704364 2940 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2941 while (len-- > 0) {
f337b084 2942 int aint;
a6ec74c1 2943 fromstr = NEXTFROM;
354b74ae 2944 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2945 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2946 }
2947 break;
068bd2e7 2948 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2949 case 'N':
2950 while (len-- > 0) {
f337b084 2951 U32 au32;
a6ec74c1 2952 fromstr = NEXTFROM;
354b74ae 2953 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2954 au32 = PerlSock_htonl(au32);
3a88beaa 2955 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2956 }
2957 break;
068bd2e7 2958 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2959 case 'V':
2960 while (len-- > 0) {
f337b084 2961 U32 au32;
a6ec74c1 2962 fromstr = NEXTFROM;
354b74ae 2963 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2964 au32 = htovl(au32);
3a88beaa 2965 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2966 }
2967 break;
49704364 2968 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2969#if LONGSIZE != SIZE32
fc241834 2970 while (len-- > 0) {
f337b084 2971 unsigned long aulong;
fc241834 2972 fromstr = NEXTFROM;
354b74ae 2973 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2974 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2975 }
49704364
LW
2976 break;
2977#else
2978 /* Fall though! */
a6ec74c1 2979#endif
49704364 2980 case 'L':
fc241834 2981 while (len-- > 0) {
f337b084 2982 U32 au32;
fc241834 2983 fromstr = NEXTFROM;
354b74ae 2984 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2985 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2986 }
2987 break;
49704364 2988 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2989#if LONGSIZE != SIZE32
fc241834 2990 while (len-- > 0) {
f337b084 2991 long along;
fc241834 2992 fromstr = NEXTFROM;
354b74ae 2993 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2994 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2995 }
49704364
LW
2996 break;
2997#else
2998 /* Fall though! */
a6ec74c1 2999#endif
49704364
LW
3000 case 'l':
3001 while (len-- > 0) {
f337b084 3002 I32 ai32;
49704364 3003 fromstr = NEXTFROM;
354b74ae 3004 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3005 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3006 }
3007 break;
c174bf3b 3008#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3009 case 'Q':
3010 while (len-- > 0) {
f337b084 3011 Uquad_t auquad;
a6ec74c1 3012 fromstr = NEXTFROM;
354b74ae 3013 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3014 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3015 }
3016 break;
3017 case 'q':
3018 while (len-- > 0) {
f337b084 3019 Quad_t aquad;
a6ec74c1 3020 fromstr = NEXTFROM;
354b74ae 3021 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3022 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3023 }
3024 break;
1640b983 3025#endif
a6ec74c1
JH
3026 case 'P':
3027 len = 1; /* assume SV is correct length */
f337b084 3028 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3029 /* FALLTHROUGH */
a6ec74c1
JH
3030 case 'p':
3031 while (len-- > 0) {
83003860 3032 const char *aptr;
f337b084 3033
a6ec74c1 3034 fromstr = NEXTFROM;
28a4f200 3035 SvGETMAGIC(fromstr);
3036 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3037 else {
a6ec74c1
JH
3038 /* XXX better yet, could spirit away the string to
3039 * a safe spot and hang on to it until the result
3040 * of pack() (and all copies of the result) are
3041 * gone.
3042 */
041457d9 3043 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3044 !SvREADONLY(fromstr)))) {
3045 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3046 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3047 }
3048 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3049 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3050 else
2596d9fe 3051 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3052 }
3a88beaa 3053 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3054 }
3055 break;
fc241834 3056 case 'u': {
f7fe979e 3057 const char *aptr, *aend;
fc241834 3058 bool from_utf8;
f337b084 3059
a6ec74c1 3060 fromstr = NEXTFROM;
fc241834
RGS
3061 if (len <= 2) len = 45;
3062 else len = len / 3 * 3;
3063 if (len >= 64) {
a2a5de95
NC
3064 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 "Field too wide in 'u' format in pack");
fc241834
RGS
3066 len = 63;
3067 }
83003860 3068 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3069 from_utf8 = DO_UTF8(fromstr);
3070 if (from_utf8) {
3071 aend = aptr + fromlen;
3f63b0e5 3072 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3073 } else aend = NULL; /* Unused, but keep compilers happy */
3074 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3075 while (fromlen > 0) {
fc241834 3076 U8 *end;
a6ec74c1 3077 I32 todo;
fc241834 3078 U8 hunk[1+63/3*4+1];
a6ec74c1 3079
eb160463 3080 if ((I32)fromlen > len)
a6ec74c1
JH
3081 todo = len;
3082 else
3083 todo = fromlen;
fc241834
RGS
3084 if (from_utf8) {
3085 char buffer[64];
9df874cd 3086 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3087 'u' | TYPE_IS_PACK)) {
3088 *cur = '\0';
b162af07 3089 SvCUR_set(cat, cur - start);
5637ef5b
NC
3090 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3091 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3092 aptr, aend, buffer, (long) todo);
fc241834 3093 }
e68aed92 3094 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3095 } else {
e68aed92 3096 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3097 aptr += todo;
3098 }
3a88beaa 3099 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3100 fromlen -= todo;
3101 }
a6ec74c1
JH
3102 break;
3103 }
f337b084
TH
3104 }
3105 *cur = '\0';
b162af07 3106 SvCUR_set(cat, cur - start);
f337b084 3107 no_change:
49704364 3108 *symptr = lookahead;
a6ec74c1 3109 }
49704364 3110 return beglist;
18529408
IZ
3111}
3112#undef NEXTFROM
3113
3114
3115PP(pp_pack)
3116{
20b7effb 3117 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3118 SV *cat = TARG;
18529408 3119 STRLEN fromlen;
349d4f2f 3120 SV *pat_sv = *++MARK;
eb578fdb
KW
3121 const char *pat = SvPV_const(pat_sv, fromlen);
3122 const char *patend = pat + fromlen;
18529408
IZ
3123
3124 MARK++;
76f68e9b 3125 sv_setpvs(cat, "");
f337b084 3126 SvUTF8_off(cat);
18529408 3127
7accc089 3128 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3129
a6ec74c1
JH
3130 SvSETMAGIC(cat);
3131 SP = ORIGMARK;
3132 PUSHs(cat);
3133 RETURN;
3134}
a6ec74c1 3135
73cb7263 3136/*
14d04a33 3137 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3138 */