This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
(perl #129149) avoid a heap buffer overflow with pack "W"...
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
9df874cd 154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
LW
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
e6006968
KW
219#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 221
66c611c5 222/* type modifiers */
62f95557 223#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
224#define TYPE_IS_BIG_ENDIAN 0x200
225#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 226#define TYPE_IS_PACK 0x800
1109a392 227#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 228#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
229#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230
7212898e
NC
231# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233
234# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235
78d46eaa 236#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 237#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
238#define PACK_SIZE_MASK 0x3F
239
4ad176ef 240#include "packsizetables.inc"
78d46eaa 241
20aa3a38
NC
242static void
243S_reverse_copy(const char *src, char *dest, STRLEN len)
244{
245 dest += len;
246 while (len--)
247 *--dest = *src++;
248}
249
08ca2aa3 250STATIC U8
9df874cd 251utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 252{
08ca2aa3 253 STRLEN retlen;
0bcc34c2 254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 256 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
257 warnings), but these two mean we make no progress in the string and
258 might enter an infinite loop */
259 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
a2a5de95
NC
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
266 val &= 0xff;
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
274 *(U8 *)(s)++)
275
276STATIC bool
9df874cd 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3
TH
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
f337b084 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
f7fe979e 310 const char *ptr;
9e27e96a 311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
c80e42f3 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
315 }
316 if (from > end) from = end;
317 }
a2a5de95
NC
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 320 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
d21d7215 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
230e1fce
NC
357 (cur) += (len); \
358 } \
f337b084
TH
359} STMT_END
360
361#define GROWING(utf8, cat, start, cur, in_len) \
362STMT_START { \
363 STRLEN glen = (in_len); \
3473cf63 364 if (utf8) glen *= UTF8_EXPAND; \
f337b084 365 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 366 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
367 (cur) = (start) + SvCUR(cat); \
368 } \
369} STMT_END
370
371#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
372STMT_START { \
f7fe979e 373 const STRLEN glen = (in_len); \
f337b084 374 STRLEN gl = glen; \
3473cf63 375 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
376 if ((cur) + gl >= (start) + SvLEN(cat)) { \
377 *cur = '\0'; \
b162af07 378 SvCUR_set((cat), (cur) - (start)); \
0bd48802 379 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
380 (cur) = (start) + SvCUR(cat); \
381 } \
3a88beaa 382 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
383} STMT_END
384
385#define PUSH_BYTE(utf8, s, byte) \
386STMT_START { \
387 if (utf8) { \
f7fe979e 388 const U8 au8 = (byte); \
d21d7215 389 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
390 } else *(U8 *)(s)++ = (byte); \
391} STMT_END
392
393/* Only to be used inside a loop (see the break) */
394#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
395STMT_START { \
396 STRLEN retlen; \
397 if (str >= end) break; \
398 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
399 if (retlen == (STRLEN) -1 || retlen == 0) { \
400 *cur = '\0'; \
401 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
402 } \
403 str += retlen; \
404} STMT_END
405
f7fe979e
AL
406static const char *_action( const tempsym_t* symptr )
407{
10edeb5d 408 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
409}
410
206947d2 411/* Returns the sizeof() struct described by pat */
028d1f6d 412STATIC I32
f337b084 413S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 414{
f337b084 415 I32 total = 0;
206947d2 416
7918f24d
NC
417 PERL_ARGS_ASSERT_MEASURE_STRUCT;
418
49704364 419 while (next_symbol(symptr)) {
f337b084 420 I32 len;
f7fe979e 421 int size;
f337b084
TH
422
423 switch (symptr->howlen) {
fc241834 424 case e_star:
49704364 425 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 426 _action( symptr ) );
81d52ecd 427
f337b084
TH
428 default:
429 /* e_no_len and e_number */
430 len = symptr->length;
431 break;
49704364
LW
432 }
433
a7a3cfaa 434 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 435 if (!size) {
f7fe979e 436 int star;
80a13697
NC
437 /* endianness doesn't influence the size of a type */
438 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
439 default:
440 Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 442 _action( symptr ) );
28be1210
TH
443 case '.' | TYPE_IS_SHRIEKING:
444 case '@' | TYPE_IS_SHRIEKING:
80a13697 445 case '@':
28be1210 446 case '.':
80a13697
NC
447 case '/':
448 case 'U': /* XXXX Is it correct? */
449 case 'w':
450 case 'u':
451 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 452 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 453 _action( symptr ) );
80a13697
NC
454 case '%':
455 size = 0;
456 break;
457 case '(':
fc241834
RGS
458 {
459 tempsym_t savsym = *symptr;
460 symptr->patptr = savsym.grpbeg;
461 symptr->patend = savsym.grpend;
462 /* XXXX Theoretically, we need to measure many times at
463 different positions, since the subexpression may contain
464 alignment commands, but be not of aligned length.
465 Need to detect this and croak(). */
466 size = measure_struct(symptr);
467 *symptr = savsym;
468 break;
469 }
80a13697
NC
470 case 'X' | TYPE_IS_SHRIEKING:
471 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
472 */
473 if (!len) /* Avoid division by 0 */
474 len = 1;
475 len = total % len; /* Assumed: the start is aligned. */
924ba076 476 /* FALLTHROUGH */
80a13697
NC
477 case 'X':
478 size = -1;
479 if (total < len)
f7fe979e 480 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
481 break;
482 case 'x' | TYPE_IS_SHRIEKING:
483 if (!len) /* Avoid division by 0 */
484 len = 1;
485 star = total % len; /* Assumed: the start is aligned. */
486 if (star) /* Other portable ways? */
487 len = len - star;
488 else
489 len = 0;
924ba076 490 /* FALLTHROUGH */
80a13697
NC
491 case 'x':
492 case 'A':
493 case 'Z':
494 case 'a':
80a13697
NC
495 size = 1;
496 break;
497 case 'B':
498 case 'b':
499 len = (len + 7)/8;
500 size = 1;
501 break;
502 case 'H':
503 case 'h':
504 len = (len + 1)/2;
505 size = 1;
506 break;
78d46eaa 507
80a13697
NC
508 case 'P':
509 len = 1;
510 size = sizeof(char*);
78d46eaa
NC
511 break;
512 }
206947d2
IZ
513 }
514 total += len * size;
515 }
516 return total;
517}
518
49704364
LW
519
520/* locate matching closing parenthesis or bracket
521 * returns char pointer to char after match, or NULL
522 */
f7fe979e 523STATIC const char *
5aaab254 524S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 525{
7918f24d
NC
526 PERL_ARGS_ASSERT_GROUP_END;
527
49704364 528 while (patptr < patend) {
f7fe979e 529 const char c = *patptr++;
49704364
LW
530
531 if (isSPACE(c))
532 continue;
533 else if (c == ender)
534 return patptr-1;
535 else if (c == '#') {
536 while (patptr < patend && *patptr != '\n')
537 patptr++;
538 continue;
539 } else if (c == '(')
540 patptr = group_end(patptr, patend, ')') + 1;
541 else if (c == '[')
542 patptr = group_end(patptr, patend, ']') + 1;
18529408 543 }
49704364
LW
544 Perl_croak(aTHX_ "No group ending character '%c' found in template",
545 ender);
a25b5927 546 NOT_REACHED; /* NOTREACHED */
18529408
IZ
547}
548
49704364
LW
549
550/* Convert unsigned decimal number to binary.
551 * Expects a pointer to the first digit and address of length variable
552 * Advances char pointer to 1st non-digit char and returns number
fc241834 553 */
f7fe979e 554STATIC const char *
5aaab254 555S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
556{
557 I32 len = *patptr++ - '0';
7918f24d
NC
558
559 PERL_ARGS_ASSERT_GET_NUM;
560
49704364
LW
561 while (isDIGIT(*patptr)) {
562 if (len >= 0x7FFFFFFF/10)
563 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564 len = (len * 10) + (*patptr++ - '0');
565 }
566 *lenptr = len;
567 return patptr;
568}
569
570/* The marvellous template parsing routine: Using state stored in *symptr,
571 * locates next template code and count
572 */
573STATIC bool
f337b084 574S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 575{
f7fe979e 576 const char* patptr = symptr->patptr;
0bcc34c2 577 const char* const patend = symptr->patend;
49704364 578
7918f24d
NC
579 PERL_ARGS_ASSERT_NEXT_SYMBOL;
580
49704364
LW
581 symptr->flags &= ~FLAG_SLASH;
582
583 while (patptr < patend) {
584 if (isSPACE(*patptr))
585 patptr++;
586 else if (*patptr == '#') {
587 patptr++;
588 while (patptr < patend && *patptr != '\n')
589 patptr++;
590 if (patptr < patend)
591 patptr++;
592 } else {
fc241834 593 /* We should have found a template code */
49704364 594 I32 code = *patptr++ & 0xFF;
66c611c5 595 U32 inherited_modifiers = 0;
49704364
LW
596
597 if (code == ','){ /* grandfather in commas but with a warning */
598 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599 symptr->flags |= FLAG_COMMA;
600 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 601 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
602 }
603 continue;
604 }
fc241834 605
49704364 606 /* for '(', skip to ')' */
fc241834 607 if (code == '(') {
49704364
LW
608 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 610 _action( symptr ) );
49704364
LW
611 symptr->grpbeg = patptr;
612 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 615 _action( symptr ) );
49704364
LW
616 }
617
66c611c5
MHM
618 /* look for group modifiers to inherit */
619 if (TYPE_ENDIANNESS(symptr->flags)) {
620 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
622 }
623
1109a392
MHM
624 /* look for modifiers */
625 while (patptr < patend) {
b7787f18
AL
626 const char *allowed;
627 I32 modifier;
1109a392
MHM
628 switch (*patptr) {
629 case '!':
630 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 631 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
632 break;
633 case '>':
634 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 635 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
636 break;
637 case '<':
638 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 639 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
640 break;
641 default:
b7787f18
AL
642 allowed = "";
643 modifier = 0;
1109a392
MHM
644 break;
645 }
66c611c5 646
1109a392
MHM
647 if (modifier == 0)
648 break;
66c611c5 649
1109a392
MHM
650 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 652 allowed, _action( symptr ) );
66c611c5
MHM
653
654 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 655 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 656 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
657 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658 TYPE_ENDIANNESS_MASK)
659 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 660 *patptr, _action( symptr ) );
66c611c5 661
a2a5de95
NC
662 if ((code & modifier)) {
663 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Duplicate modifier '%c' after '%c' in %s",
665 *patptr, (int) TYPE_NO_MODIFIERS(code),
666 _action( symptr ) );
1109a392 667 }
66c611c5 668
1109a392
MHM
669 code |= modifier;
670 patptr++;
49704364
LW
671 }
672
66c611c5
MHM
673 /* inherit modifiers */
674 code |= inherited_modifiers;
675
fc241834 676 /* look for count and/or / */
49704364
LW
677 if (patptr < patend) {
678 if (isDIGIT(*patptr)) {
679 patptr = get_num( patptr, &symptr->length );
680 symptr->howlen = e_number;
681
682 } else if (*patptr == '*') {
683 patptr++;
684 symptr->howlen = e_star;
685
686 } else if (*patptr == '[') {
f7fe979e 687 const char* lenptr = ++patptr;
49704364
LW
688 symptr->howlen = e_number;
689 patptr = group_end( patptr, patend, ']' ) + 1;
690 /* what kind of [] is it? */
691 if (isDIGIT(*lenptr)) {
692 lenptr = get_num( lenptr, &symptr->length );
693 if( *lenptr != ']' )
694 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 695 _action( symptr ) );
49704364
LW
696 } else {
697 tempsym_t savsym = *symptr;
698 symptr->patend = patptr-1;
699 symptr->patptr = lenptr;
700 savsym.length = measure_struct(symptr);
701 *symptr = savsym;
702 }
703 } else {
704 symptr->howlen = e_no_len;
705 symptr->length = 1;
706 }
707
708 /* try to find / */
709 while (patptr < patend) {
710 if (isSPACE(*patptr))
711 patptr++;
712 else if (*patptr == '#') {
713 patptr++;
714 while (patptr < patend && *patptr != '\n')
715 patptr++;
716 if (patptr < patend)
717 patptr++;
718 } else {
66c611c5 719 if (*patptr == '/') {
49704364
LW
720 symptr->flags |= FLAG_SLASH;
721 patptr++;
66c611c5
MHM
722 if (patptr < patend &&
723 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 724 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 725 _action( symptr ) );
49704364
LW
726 }
727 break;
728 }
18529408 729 }
49704364
LW
730 } else {
731 /* at end - no count, no / */
732 symptr->howlen = e_no_len;
733 symptr->length = 1;
734 }
735
736 symptr->code = code;
fc241834 737 symptr->patptr = patptr;
49704364 738 return TRUE;
18529408 739 }
49704364 740 }
fc241834 741 symptr->patptr = patptr;
49704364 742 return FALSE;
18529408
IZ
743}
744
18529408 745/*
fc241834 746 There is no way to cleanly handle the case where we should process the
08ca2aa3 747 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
748 (e.g. estimates like strend-s as an upper bound for the number of
749 characters left wouldn't work). So if we foresee the need of this
750 (pattern starts with U or contains U0), we want to work on the encoded
751 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
752 themselves if they need to do a lot of unpacks like this on it
753*/
fc241834 754STATIC bool
08ca2aa3
TH
755need_utf8(const char *pat, const char *patend)
756{
757 bool first = TRUE;
7918f24d
NC
758
759 PERL_ARGS_ASSERT_NEED_UTF8;
760
08ca2aa3
TH
761 while (pat < patend) {
762 if (pat[0] == '#') {
763 pat++;
f7fe979e 764 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
765 if (!pat) return FALSE;
766 } else if (pat[0] == 'U') {
767 if (first || pat[1] == '0') return TRUE;
768 } else first = FALSE;
769 pat++;
770 }
771 return FALSE;
772}
773
774STATIC char
775first_symbol(const char *pat, const char *patend) {
7918f24d
NC
776 PERL_ARGS_ASSERT_FIRST_SYMBOL;
777
08ca2aa3
TH
778 while (pat < patend) {
779 if (pat[0] != '#') return pat[0];
780 pat++;
f7fe979e 781 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
782 if (!pat) return 0;
783 pat++;
784 }
785 return 0;
786}
787
788/*
dcccc8ff
KW
789
790=head1 Pack and Unpack
791
7accc089
JH
792=for apidoc unpackstring
793
796b6530 794The engine implementing the C<unpack()> Perl function.
21ebfc7a 795
796b6530
KW
796Using the template C<pat..patend>, this function unpacks the string
797C<s..strend> into a number of mortal SVs, which it pushes onto the perl
798argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 799C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
800pushed elements.
801
796b6530
KW
802The C<strend> and C<patend> pointers should point to the byte following the
803last character of each string.
21ebfc7a
DM
804
805Although this function returns its values on the perl argument stack, it
806doesn't take any parameters from that stack (and thus in particular
796b6530 807there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 808example).
7accc089
JH
809
810=cut */
811
812I32
f7fe979e 813Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 814{
f7fe979e 815 tempsym_t sym;
08ca2aa3 816
7918f24d
NC
817 PERL_ARGS_ASSERT_UNPACKSTRING;
818
f337b084 819 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
820 else if (need_utf8(pat, patend)) {
821 /* We probably should try to avoid this in case a scalar context call
822 wouldn't get to the "U0" */
823 STRLEN len = strend - s;
230e1fce 824 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
825 SAVEFREEPV(s);
826 strend = s + len;
f337b084 827 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
828 }
829
f337b084
TH
830 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 flags |= FLAG_PARSE_UTF8;
08ca2aa3 832
f7fe979e 833 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
834
835 return unpack_rec(&sym, s, s, strend, NULL );
836}
837
4136a0f7 838STATIC I32
f7fe979e 839S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 840{
20b7effb 841 dSP;
3297d27d 842 SV *sv = NULL;
f7fe979e 843 const I32 start_sp_offset = SP - PL_stack_base;
49704364 844 howlen_t howlen;
a6ec74c1 845 I32 checksum = 0;
92d41999 846 UV cuv = 0;
a6ec74c1 847 NV cdouble = 0.0;
f337b084 848 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 849 bool beyond = FALSE;
21c16052 850 bool explicit_length;
9e27e96a 851 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 852 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
853
854 PERL_ARGS_ASSERT_UNPACK_REC;
855
28be1210 856 symptr->strbeg = s - strbeg;
49704364 857
49704364 858 while (next_symbol(symptr)) {
a7a3cfaa 859 packprops_t props;
9e27e96a 860 I32 len;
f337b084 861 I32 datumtype = symptr->code;
a1219b5e 862 bool needs_swap;
206947d2 863 /* do first one only unless in list context
08ca2aa3 864 / is implemented by unpacking the count, then popping it from the
206947d2 865 stack, so must check that we're not in the middle of a / */
49704364 866 if ( unpack_only_one
206947d2 867 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 868 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 869 break;
49704364 870
f337b084 871 switch (howlen = symptr->howlen) {
fc241834
RGS
872 case e_star:
873 len = strend - strbeg; /* long enough */
49704364 874 break;
f337b084
TH
875 default:
876 /* e_no_len and e_number */
877 len = symptr->length;
878 break;
49704364 879 }
18529408 880
21c16052 881 explicit_length = TRUE;
a6ec74c1 882 redo_switch:
49704364 883 beyond = s >= strend;
a7a3cfaa
TH
884
885 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
886 if (props) {
887 /* props nonzero means we can process this letter. */
9e27e96a
AL
888 const long size = props & PACK_SIZE_MASK;
889 const long howmany = (strend - s) / size;
a7a3cfaa
TH
890 if (len > howmany)
891 len = howmany;
892
893 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 if (len && unpack_only_one) len = 1;
895 EXTEND(SP, len);
896 EXTEND_MORTAL(len);
78d46eaa
NC
897 }
898 }
a7a3cfaa 899
a1219b5e
NC
900 needs_swap = NEEDS_SWAP(datumtype);
901
1109a392 902 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 903 default:
1109a392 904 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 905
a6ec74c1 906 case '%':
49704364 907 if (howlen == e_no_len)
18529408 908 len = 16; /* len is not specified */
a6ec74c1 909 checksum = len;
92d41999 910 cuv = 0;
a6ec74c1 911 cdouble = 0;
18529408 912 continue;
81d52ecd 913
18529408
IZ
914 case '(':
915 {
49704364 916 tempsym_t savsym = *symptr;
9e27e96a 917 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 918 symptr->flags |= group_modifiers;
49704364 919 symptr->patend = savsym.grpend;
28be1210 920 symptr->previous = &savsym;
49704364 921 symptr->level++;
18529408 922 PUTBACK;
c6f750d1 923 if (len && unpack_only_one) len = 1;
18529408 924 while (len--) {
49704364 925 symptr->patptr = savsym.grpbeg;
f337b084
TH
926 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
927 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
928 unpack_rec(symptr, s, strbeg, strend, &s);
929 if (s == strend && savsym.howlen == e_star)
49704364 930 break; /* No way to continue */
18529408
IZ
931 }
932 SPAGAIN;
28be1210 933 savsym.flags = symptr->flags & ~group_modifiers;
49704364 934 *symptr = savsym;
18529408
IZ
935 break;
936 }
28be1210 937 case '.' | TYPE_IS_SHRIEKING:
28be1210 938 case '.': {
9e27e96a 939 const char *from;
28be1210 940 SV *sv;
9e27e96a 941 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
942 if (howlen == e_star) from = strbeg;
943 else if (len <= 0) from = s;
944 else {
945 tempsym_t *group = symptr;
946
947 while (--len && group) group = group->previous;
948 from = group ? strbeg + group->strbeg : strbeg;
949 }
950 sv = from <= s ?
00646304
CB
951 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 953 mXPUSHs(sv);
28be1210
TH
954 break;
955 }
28be1210 956 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 957 case '@':
28be1210 958 s = strbeg + symptr->strbeg;
28be1210 959 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 960 {
08ca2aa3
TH
961 while (len > 0) {
962 if (s >= strend)
963 Perl_croak(aTHX_ "'@' outside of string in unpack");
964 s += UTF8SKIP(s);
965 len--;
966 }
967 if (s > strend)
968 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
969 } else {
28be1210 970 if (strend-s < len)
fc241834 971 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 972 s += len;
08ca2aa3 973 }
a6ec74c1 974 break;
62f95557
IZ
975 case 'X' | TYPE_IS_SHRIEKING:
976 if (!len) /* Avoid division by 0 */
977 len = 1;
08ca2aa3 978 if (utf8) {
f7fe979e 979 const char *hop, *last;
f337b084
TH
980 I32 l = len;
981 hop = last = strbeg;
982 while (hop < s) {
983 hop += UTF8SKIP(hop);
984 if (--l == 0) {
08ca2aa3 985 last = hop;
f337b084
TH
986 l = len;
987 }
fc241834 988 }
f337b084
TH
989 if (last > s)
990 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
991 s = last;
992 break;
f337b084
TH
993 }
994 len = (s - strbeg) % len;
924ba076 995 /* FALLTHROUGH */
a6ec74c1 996 case 'X':
08ca2aa3
TH
997 if (utf8) {
998 while (len > 0) {
999 if (s <= strbeg)
1000 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1001 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1002 if (s <= strbeg)
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1004 }
1005 len--;
1006 }
1007 } else {
fc241834
RGS
1008 if (len > s - strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1010 s -= len;
08ca2aa3 1011 }
a6ec74c1 1012 break;
9e27e96a
AL
1013 case 'x' | TYPE_IS_SHRIEKING: {
1014 I32 ai32;
62f95557
IZ
1015 if (!len) /* Avoid division by 0 */
1016 len = 1;
230e1fce
NC
1017 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1019 if (ai32 == 0) break;
1020 len -= ai32;
9e27e96a 1021 }
924ba076 1022 /* FALLTHROUGH */
a6ec74c1 1023 case 'x':
08ca2aa3
TH
1024 if (utf8) {
1025 while (len>0) {
1026 if (s >= strend)
1027 Perl_croak(aTHX_ "'x' outside of string in unpack");
1028 s += UTF8SKIP(s);
1029 len--;
1030 }
1031 } else {
fc241834
RGS
1032 if (len > strend - s)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1034 s += len;
f337b084 1035 }
a6ec74c1
JH
1036 break;
1037 case '/':
49704364 1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1039
a6ec74c1
JH
1040 case 'A':
1041 case 'Z':
1042 case 'a':
08ca2aa3
TH
1043 if (checksum) {
1044 /* Preliminary length estimate is assumed done in 'W' */
1045 if (len > strend - s) len = strend - s;
1046 goto W_checksum;
1047 }
1048 if (utf8) {
1049 I32 l;
f7fe979e 1050 const char *hop;
08ca2aa3
TH
1051 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 if (hop >= strend) {
1053 if (hop > strend)
1054 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1055 break;
fc241834 1056 }
a6ec74c1 1057 }
08ca2aa3
TH
1058 if (hop > strend)
1059 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1060 len = hop - s;
1061 } else if (len > strend - s)
1062 len = strend - s;
1063
1064 if (datumtype == 'Z') {
1065 /* 'Z' strips stuff after first null */
f7fe979e 1066 const char *ptr, *end;
f337b084
TH
1067 end = s + len;
1068 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1069 sv = newSVpvn(s, ptr-s);
1070 if (howlen == e_star) /* exact for 'Z*' */
1071 len = ptr-s + (ptr != strend ? 1 : 0);
1072 } else if (datumtype == 'A') {
1073 /* 'A' strips both nulls and spaces */
f7fe979e 1074 const char *ptr;
18bdf90a 1075 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1076 for (ptr = s+len-1; ptr >= s; ptr--) {
1077 if ( *ptr != 0
1078 && !UTF8_IS_CONTINUATION(*ptr)
1079 && !isSPACE_utf8_safe(ptr, strend))
1080 {
1081 break;
1082 }
1083 }
18bdf90a
TH
1084 if (ptr >= s) ptr += UTF8SKIP(ptr);
1085 else ptr++;
28be1210 1086 if (ptr > s+len)
18bdf90a
TH
1087 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1088 } else {
1089 for (ptr = s+len-1; ptr >= s; ptr--)
1090 if (*ptr != 0 && !isSPACE(*ptr)) break;
1091 ptr++;
1092 }
08ca2aa3
TH
1093 sv = newSVpvn(s, ptr-s);
1094 } else sv = newSVpvn(s, len);
1095
1096 if (utf8) {
1097 SvUTF8_on(sv);
1098 /* Undo any upgrade done due to need_utf8() */
f337b084 1099 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1100 sv_utf8_downgrade(sv, 0);
a6ec74c1 1101 }
6e449a3a 1102 mXPUSHs(sv);
08ca2aa3 1103 s += len;
a6ec74c1
JH
1104 break;
1105 case 'B':
08ca2aa3
TH
1106 case 'b': {
1107 char *str;
49704364 1108 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1109 len = (strend - s) * 8;
1110 if (checksum) {
f337b084 1111 if (utf8)
08ca2aa3 1112 while (len >= 8 && s < strend) {
9df874cd 1113 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1114 len -= 8;
1115 }
f337b084 1116 else
fc241834 1117 while (len >= 8) {
08ca2aa3 1118 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1119 len -= 8;
1120 }
08ca2aa3
TH
1121 if (len && s < strend) {
1122 U8 bits;
f337b084
TH
1123 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1124 if (datumtype == 'b')
a6ec74c1 1125 while (len-- > 0) {
92d41999 1126 if (bits & 1) cuv++;
a6ec74c1
JH
1127 bits >>= 1;
1128 }
f337b084 1129 else
a6ec74c1 1130 while (len-- > 0) {
08ca2aa3 1131 if (bits & 0x80) cuv++;
a6ec74c1
JH
1132 bits <<= 1;
1133 }
fc241834 1134 }
a6ec74c1
JH
1135 break;
1136 }
08ca2aa3 1137
561b68a9 1138 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1139 SvPOK_on(sv);
1140 str = SvPVX(sv);
1141 if (datumtype == 'b') {
f337b084 1142 U8 bits = 0;
f7fe979e 1143 const I32 ai32 = len;
08ca2aa3
TH
1144 for (len = 0; len < ai32; len++) {
1145 if (len & 7) bits >>= 1;
1146 else if (utf8) {
1147 if (s >= strend) break;
9df874cd 1148 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1149 } else bits = *(U8 *) s++;
1150 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1151 }
08ca2aa3 1152 } else {
f337b084 1153 U8 bits = 0;
f7fe979e 1154 const I32 ai32 = len;
08ca2aa3
TH
1155 for (len = 0; len < ai32; len++) {
1156 if (len & 7) bits <<= 1;
1157 else if (utf8) {
1158 if (s >= strend) break;
9df874cd 1159 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1160 } else bits = *(U8 *) s++;
1161 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1162 }
1163 }
1164 *str = '\0';
aa07b2f6 1165 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1166 XPUSHs(sv);
a6ec74c1 1167 break;
08ca2aa3 1168 }
a6ec74c1 1169 case 'H':
08ca2aa3 1170 case 'h': {
3297d27d 1171 char *str = NULL;
fc241834 1172 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1173 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1174 len = (strend - s) * 2;
858fe5e1
TC
1175 if (!checksum) {
1176 sv = sv_2mortal(newSV(len ? len : 1));
1177 SvPOK_on(sv);
1178 str = SvPVX(sv);
1179 }
a6ec74c1 1180 if (datumtype == 'h') {
f337b084 1181 U8 bits = 0;
9e27e96a 1182 I32 ai32 = len;
fc241834
RGS
1183 for (len = 0; len < ai32; len++) {
1184 if (len & 1) bits >>= 4;
1185 else if (utf8) {
1186 if (s >= strend) break;
9df874cd 1187 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1188 } else bits = * (U8 *) s++;
858fe5e1
TC
1189 if (!checksum)
1190 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1191 }
08ca2aa3 1192 } else {
f337b084 1193 U8 bits = 0;
f7fe979e 1194 const I32 ai32 = len;
08ca2aa3
TH
1195 for (len = 0; len < ai32; len++) {
1196 if (len & 1) bits <<= 4;
1197 else if (utf8) {
1198 if (s >= strend) break;
9df874cd 1199 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1200 } else bits = *(U8 *) s++;
858fe5e1
TC
1201 if (!checksum)
1202 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1203 }
1204 }
858fe5e1
TC
1205 if (!checksum) {
1206 *str = '\0';
1207 SvCUR_set(sv, str - SvPVX_const(sv));
1208 XPUSHs(sv);
1209 }
a6ec74c1 1210 break;
08ca2aa3 1211 }
1651fc44
ML
1212 case 'C':
1213 if (len == 0) {
1214 if (explicit_length)
1215 /* Switch to "character" mode */
1216 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1217 break;
1218 }
924ba076 1219 /* FALLTHROUGH */
a6ec74c1 1220 case 'c':
1651fc44
ML
1221 while (len-- > 0 && s < strend) {
1222 int aint;
1223 if (utf8)
1224 {
1225 STRLEN retlen;
1226 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1227 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1228 if (retlen == (STRLEN) -1 || retlen == 0)
1229 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1230 s += retlen;
1231 }
1232 else
1233 aint = *(U8 *)(s)++;
1234 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1235 aint -= 256;
08ca2aa3 1236 if (!checksum)
6e449a3a 1237 mPUSHi(aint);
73cb7263
NC
1238 else if (checksum > bits_in_uv)
1239 cdouble += (NV)aint;
1240 else
1241 cuv += aint;
a6ec74c1
JH
1242 }
1243 break;
08ca2aa3
TH
1244 case 'W':
1245 W_checksum:
1651fc44 1246 if (utf8) {
08ca2aa3 1247 while (len-- > 0 && s < strend) {
08ca2aa3 1248 STRLEN retlen;
f7fe979e 1249 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1250 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1251 if (retlen == (STRLEN) -1 || retlen == 0)
1252 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1253 s += retlen;
1254 if (!checksum)
6e449a3a 1255 mPUSHu(val);
08ca2aa3
TH
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV) val;
d6d3e8bd 1258 else
08ca2aa3 1259 cuv += val;
fc241834 1260 }
08ca2aa3 1261 } else if (!checksum)
a6ec74c1 1262 while (len-- > 0) {
f7fe979e 1263 const U8 ch = *(U8 *) s++;
6e449a3a 1264 mPUSHu(ch);
a6ec74c1 1265 }
08ca2aa3
TH
1266 else if (checksum > bits_in_uv)
1267 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1268 else
1269 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1270 break;
1271 case 'U':
35bcd338 1272 if (len == 0) {
c5333953 1273 if (explicit_length && howlen != e_star) {
08ca2aa3 1274 /* Switch to "bytes in UTF-8" mode */
f337b084 1275 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1276 else
1277 /* Should be impossible due to the need_utf8() test */
1278 Perl_croak(aTHX_ "U0 mode on a byte string");
1279 }
35bcd338
JH
1280 break;
1281 }
08ca2aa3 1282 if (len > strend - s) len = strend - s;
fc241834 1283 if (!checksum) {
08ca2aa3
TH
1284 if (len && unpack_only_one) len = 1;
1285 EXTEND(SP, len);
1286 EXTEND_MORTAL(len);
fc241834 1287 }
08ca2aa3
TH
1288 while (len-- > 0 && s < strend) {
1289 STRLEN retlen;
1290 UV auv;
1291 if (utf8) {
1292 U8 result[UTF8_MAXLEN];
f7fe979e 1293 const char *ptr = s;
08ca2aa3 1294 STRLEN len;
08ca2aa3
TH
1295 /* Bug: warns about bad utf8 even if we are short on bytes
1296 and will break out of the loop */
9df874cd 1297 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1298 'U'))
08ca2aa3
TH
1299 break;
1300 len = UTF8SKIP(result);
9df874cd 1301 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1302 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1303 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1304 len,
1305 &retlen,
1306 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1307 s = ptr;
1308 } else {
3ece276e
KW
1309 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1310 strend - s,
1311 &retlen,
1312 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1313 if (retlen == (STRLEN) -1 || retlen == 0)
1314 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1315 s += retlen;
1316 }
1317 if (!checksum)
6e449a3a 1318 mPUSHu(auv);
73cb7263 1319 else if (checksum > bits_in_uv)
08ca2aa3 1320 cdouble += (NV) auv;
73cb7263 1321 else
08ca2aa3 1322 cuv += auv;
a6ec74c1
JH
1323 }
1324 break;
49704364
LW
1325 case 's' | TYPE_IS_SHRIEKING:
1326#if SHORTSIZE != SIZE16
73cb7263 1327 while (len-- > 0) {
08ca2aa3 1328 short ashort;
aaec8192 1329 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1330 if (!checksum)
6e449a3a 1331 mPUSHi(ashort);
73cb7263
NC
1332 else if (checksum > bits_in_uv)
1333 cdouble += (NV)ashort;
1334 else
1335 cuv += ashort;
49704364
LW
1336 }
1337 break;
1338#else
924ba076 1339 /* FALLTHROUGH */
a6ec74c1 1340#endif
49704364 1341 case 's':
73cb7263 1342 while (len-- > 0) {
08ca2aa3
TH
1343 I16 ai16;
1344
1345#if U16SIZE > SIZE16
1346 ai16 = 0;
1347#endif
aaec8192 1348 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1349#if U16SIZE > SIZE16
73cb7263
NC
1350 if (ai16 > 32767)
1351 ai16 -= 65536;
a6ec74c1 1352#endif
08ca2aa3 1353 if (!checksum)
6e449a3a 1354 mPUSHi(ai16);
73cb7263
NC
1355 else if (checksum > bits_in_uv)
1356 cdouble += (NV)ai16;
1357 else
1358 cuv += ai16;
a6ec74c1
JH
1359 }
1360 break;
49704364
LW
1361 case 'S' | TYPE_IS_SHRIEKING:
1362#if SHORTSIZE != SIZE16
73cb7263 1363 while (len-- > 0) {
08ca2aa3 1364 unsigned short aushort;
aaec8192
NC
1365 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1366 needs_swap);
08ca2aa3 1367 if (!checksum)
6e449a3a 1368 mPUSHu(aushort);
73cb7263
NC
1369 else if (checksum > bits_in_uv)
1370 cdouble += (NV)aushort;
1371 else
1372 cuv += aushort;
49704364
LW
1373 }
1374 break;
1375#else
924ba076 1376 /* FALLTHROUGH */
49704364 1377#endif
a6ec74c1
JH
1378 case 'v':
1379 case 'n':
1380 case 'S':
73cb7263 1381 while (len-- > 0) {
08ca2aa3
TH
1382 U16 au16;
1383#if U16SIZE > SIZE16
1384 au16 = 0;
1385#endif
aaec8192 1386 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1387 if (datumtype == 'n')
1388 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1389 if (datumtype == 'v')
1390 au16 = vtohs(au16);
08ca2aa3 1391 if (!checksum)
6e449a3a 1392 mPUSHu(au16);
73cb7263 1393 else if (checksum > bits_in_uv)
f337b084 1394 cdouble += (NV) au16;
73cb7263
NC
1395 else
1396 cuv += au16;
a6ec74c1
JH
1397 }
1398 break;
068bd2e7
MHM
1399 case 'v' | TYPE_IS_SHRIEKING:
1400 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1401 while (len-- > 0) {
08ca2aa3
TH
1402 I16 ai16;
1403# if U16SIZE > SIZE16
1404 ai16 = 0;
1405# endif
aaec8192 1406 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1407 /* There should never be any byte-swapping here. */
1408 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1409 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1410 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1411 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1412 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1413 if (!checksum)
6e449a3a 1414 mPUSHi(ai16);
73cb7263 1415 else if (checksum > bits_in_uv)
08ca2aa3 1416 cdouble += (NV) ai16;
73cb7263
NC
1417 else
1418 cuv += ai16;
068bd2e7
MHM
1419 }
1420 break;
a6ec74c1 1421 case 'i':
49704364 1422 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1423 while (len-- > 0) {
08ca2aa3 1424 int aint;
aaec8192 1425 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1426 if (!checksum)
6e449a3a 1427 mPUSHi(aint);
73cb7263
NC
1428 else if (checksum > bits_in_uv)
1429 cdouble += (NV)aint;
1430 else
1431 cuv += aint;
a6ec74c1
JH
1432 }
1433 break;
1434 case 'I':
49704364 1435 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1436 while (len-- > 0) {
08ca2aa3 1437 unsigned int auint;
aaec8192 1438 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1439 if (!checksum)
6e449a3a 1440 mPUSHu(auint);
73cb7263
NC
1441 else if (checksum > bits_in_uv)
1442 cdouble += (NV)auint;
1443 else
1444 cuv += auint;
a6ec74c1
JH
1445 }
1446 break;
92d41999 1447 case 'j':
73cb7263 1448 while (len-- > 0) {
08ca2aa3 1449 IV aiv;
aaec8192 1450 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1451 if (!checksum)
6e449a3a 1452 mPUSHi(aiv);
73cb7263
NC
1453 else if (checksum > bits_in_uv)
1454 cdouble += (NV)aiv;
1455 else
1456 cuv += aiv;
92d41999
JH
1457 }
1458 break;
1459 case 'J':
73cb7263 1460 while (len-- > 0) {
08ca2aa3 1461 UV auv;
aaec8192 1462 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1463 if (!checksum)
6e449a3a 1464 mPUSHu(auv);
73cb7263
NC
1465 else if (checksum > bits_in_uv)
1466 cdouble += (NV)auv;
1467 else
1468 cuv += auv;
92d41999
JH
1469 }
1470 break;
49704364
LW
1471 case 'l' | TYPE_IS_SHRIEKING:
1472#if LONGSIZE != SIZE32
73cb7263 1473 while (len-- > 0) {
08ca2aa3 1474 long along;
aaec8192 1475 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1476 if (!checksum)
6e449a3a 1477 mPUSHi(along);
73cb7263
NC
1478 else if (checksum > bits_in_uv)
1479 cdouble += (NV)along;
1480 else
1481 cuv += along;
49704364
LW
1482 }
1483 break;
1484#else
924ba076 1485 /* FALLTHROUGH */
a6ec74c1 1486#endif
49704364 1487 case 'l':
73cb7263 1488 while (len-- > 0) {
08ca2aa3
TH
1489 I32 ai32;
1490#if U32SIZE > SIZE32
1491 ai32 = 0;
1492#endif
aaec8192 1493 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1494#if U32SIZE > SIZE32
08ca2aa3 1495 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1496#endif
08ca2aa3 1497 if (!checksum)
6e449a3a 1498 mPUSHi(ai32);
73cb7263
NC
1499 else if (checksum > bits_in_uv)
1500 cdouble += (NV)ai32;
1501 else
1502 cuv += ai32;
a6ec74c1
JH
1503 }
1504 break;
49704364
LW
1505 case 'L' | TYPE_IS_SHRIEKING:
1506#if LONGSIZE != SIZE32
73cb7263 1507 while (len-- > 0) {
08ca2aa3 1508 unsigned long aulong;
aaec8192 1509 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1510 if (!checksum)
6e449a3a 1511 mPUSHu(aulong);
73cb7263
NC
1512 else if (checksum > bits_in_uv)
1513 cdouble += (NV)aulong;
1514 else
1515 cuv += aulong;
49704364
LW
1516 }
1517 break;
1518#else
924ba076 1519 /* FALLTHROUGH */
49704364 1520#endif
a6ec74c1
JH
1521 case 'V':
1522 case 'N':
1523 case 'L':
73cb7263 1524 while (len-- > 0) {
08ca2aa3
TH
1525 U32 au32;
1526#if U32SIZE > SIZE32
1527 au32 = 0;
1528#endif
aaec8192 1529 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1530 if (datumtype == 'N')
1531 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1532 if (datumtype == 'V')
1533 au32 = vtohl(au32);
08ca2aa3 1534 if (!checksum)
6e449a3a 1535 mPUSHu(au32);
fc241834
RGS
1536 else if (checksum > bits_in_uv)
1537 cdouble += (NV)au32;
1538 else
1539 cuv += au32;
a6ec74c1
JH
1540 }
1541 break;
068bd2e7
MHM
1542 case 'V' | TYPE_IS_SHRIEKING:
1543 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1544 while (len-- > 0) {
08ca2aa3 1545 I32 ai32;
f8e5a5db 1546#if U32SIZE > SIZE32
08ca2aa3 1547 ai32 = 0;
f8e5a5db 1548#endif
aaec8192 1549 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1550 /* There should never be any byte swapping here. */
1551 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1552 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1553 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1554 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1555 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1556 if (!checksum)
6e449a3a 1557 mPUSHi(ai32);
73cb7263
NC
1558 else if (checksum > bits_in_uv)
1559 cdouble += (NV)ai32;
1560 else
1561 cuv += ai32;
068bd2e7
MHM
1562 }
1563 break;
a6ec74c1 1564 case 'p':
a6ec74c1 1565 while (len-- > 0) {
f7fe979e 1566 const char *aptr;
aaec8192 1567 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1568 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1569 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1570 }
1571 break;
1572 case 'w':
a6ec74c1
JH
1573 {
1574 UV auv = 0;
1575 U32 bytes = 0;
fc241834 1576
08ca2aa3
TH
1577 while (len > 0 && s < strend) {
1578 U8 ch;
f337b084 1579 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1580 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1581 /* UTF8_IS_XXXXX not right here because this is a BER, not
1582 * UTF-8 format - using constant 0x80 */
08ca2aa3 1583 if (ch < 0x80) {
a6ec74c1 1584 bytes = 0;
6e449a3a 1585 mPUSHu(auv);
a6ec74c1
JH
1586 len--;
1587 auv = 0;
08ca2aa3 1588 continue;
a6ec74c1 1589 }
08ca2aa3 1590 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1591 const char *t;
a6ec74c1 1592
147e3846
KW
1593 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1594 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1595 while (s < strend) {
f337b084 1596 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1597 sv = mul128(sv, (U8)(ch & 0x7f));
1598 if (!(ch & 0x80)) {
a6ec74c1
JH
1599 bytes = 0;
1600 break;
1601 }
1602 }
10516c54 1603 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1604 while (*t == '0')
1605 t++;
1606 sv_chop(sv, t);
6e449a3a 1607 mPUSHs(sv);
a6ec74c1
JH
1608 len--;
1609 auv = 0;
1610 }
1611 }
1612 if ((s >= strend) && bytes)
49704364 1613 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1614 }
1615 break;
1616 case 'P':
49704364
LW
1617 if (symptr->howlen == e_star)
1618 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1619 EXTEND(SP, 1);
2d3e0934 1620 if (s + sizeof(char*) <= strend) {
08ca2aa3 1621 char *aptr;
aaec8192 1622 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1623 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1624 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1625 }
a6ec74c1 1626 break;
c174bf3b 1627#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1628 case 'q':
73cb7263 1629 while (len-- > 0) {
08ca2aa3 1630 Quad_t aquad;
aaec8192 1631 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1632 if (!checksum)
c174bf3b 1633 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1634 else if (checksum > bits_in_uv)
1635 cdouble += (NV)aquad;
1636 else
1637 cuv += aquad;
1638 }
a6ec74c1
JH
1639 break;
1640 case 'Q':
73cb7263 1641 while (len-- > 0) {
08ca2aa3 1642 Uquad_t auquad;
aaec8192 1643 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1644 if (!checksum)
c174bf3b 1645 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV)auquad;
1648 else
1649 cuv += auquad;
a6ec74c1
JH
1650 }
1651 break;
1640b983 1652#endif
a6ec74c1
JH
1653 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1654 case 'f':
73cb7263 1655 while (len-- > 0) {
08ca2aa3 1656 float afloat;
aaec8192 1657 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1658 if (!checksum)
6e449a3a 1659 mPUSHn(afloat);
08ca2aa3 1660 else
73cb7263 1661 cdouble += afloat;
fc241834 1662 }
a6ec74c1
JH
1663 break;
1664 case 'd':
73cb7263 1665 while (len-- > 0) {
08ca2aa3 1666 double adouble;
aaec8192 1667 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1668 if (!checksum)
6e449a3a 1669 mPUSHn(adouble);
08ca2aa3 1670 else
73cb7263 1671 cdouble += adouble;
fc241834 1672 }
a6ec74c1 1673 break;
92d41999 1674 case 'F':
73cb7263 1675 while (len-- > 0) {
275663fa 1676 NV_bytes anv;
aaec8192
NC
1677 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1678 datumtype, needs_swap);
08ca2aa3 1679 if (!checksum)
275663fa 1680 mPUSHn(anv.nv);
08ca2aa3 1681 else
275663fa 1682 cdouble += anv.nv;
fc241834 1683 }
92d41999
JH
1684 break;
1685#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1686 case 'D':
73cb7263 1687 while (len-- > 0) {
275663fa 1688 ld_bytes aldouble;
aaec8192
NC
1689 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1690 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1691 /* The most common long double format, the x86 80-bit
1692 * extended precision, has either 2 or 6 unused bytes,
1693 * which may contain garbage, which may contain
1694 * unintentional data. While we do zero the bytes of
1695 * the long double data in pack(), here in unpack() we
1696 * don't, because it's really hard to envision that
1697 * reading the long double off aldouble would be
e075ae47 1698 * affected by the unused bytes.
42262fd3
JH
1699 *
1700 * Note that trying to unpack 'long doubles' of 'long
1701 * doubles' packed in another system is in the general
1702 * case doomed without having more detail. */
08ca2aa3 1703 if (!checksum)
275663fa 1704 mPUSHn(aldouble.ld);
08ca2aa3 1705 else
275663fa 1706 cdouble += aldouble.ld;
92d41999
JH
1707 }
1708 break;
1709#endif
a6ec74c1 1710 case 'u':
858fe5e1 1711 if (!checksum) {
f7fe979e 1712 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1713 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1714 if (l) SvPOK_on(sv);
1715 }
08ca2aa3 1716
bee81d36
KW
1717 /* Note that all legal uuencoded strings are ASCII printables, so
1718 * have the same representation under UTF-8 vs not. This means we
1719 * can ignore UTF8ness on legal input. For illegal we stop at the
1720 * first failure, and don't report where/what that is, so again we
1721 * can ignore UTF8ness */
1722
9233b56b
KW
1723 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1724 I32 a, b, c, d;
1725 char hunk[3];
1726
1727 len = PL_uudmap[*(U8*)s++] & 077;
1728 while (len > 0) {
1729 if (s < strend && ISUUCHAR(*s))
1730 a = PL_uudmap[*(U8*)s++] & 077;
1731 else
1732 a = 0;
1733 if (s < strend && ISUUCHAR(*s))
1734 b = PL_uudmap[*(U8*)s++] & 077;
1735 else
1736 b = 0;
1737 if (s < strend && ISUUCHAR(*s))
1738 c = PL_uudmap[*(U8*)s++] & 077;
1739 else
1740 c = 0;
1741 if (s < strend && ISUUCHAR(*s))
1742 d = PL_uudmap[*(U8*)s++] & 077;
1743 else
1744 d = 0;
1745 hunk[0] = (char)((a << 2) | (b >> 4));
1746 hunk[1] = (char)((b << 4) | (c >> 2));
1747 hunk[2] = (char)((c << 6) | d);
1748 if (!checksum)
1749 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1750 len -= 3;
1751 }
1752 if (*s == '\n')
1753 s++;
1754 else /* possible checksum byte */
1755 if (s + 1 < strend && s[1] == '\n')
1756 s += 2;
1757 }
858fe5e1
TC
1758 if (!checksum)
1759 XPUSHs(sv);
a6ec74c1 1760 break;
99f862a1 1761 } /* End of switch */
49704364 1762
a6ec74c1 1763 if (checksum) {
1109a392 1764 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1765 (checksum > bits_in_uv &&
08ca2aa3
TH
1766 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1767 NV trouble, anv;
a6ec74c1 1768
08ca2aa3 1769 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1770 while (checksum >= 16) {
1771 checksum -= 16;
08ca2aa3 1772 anv *= 65536.0;
a6ec74c1 1773 }
a6ec74c1 1774 while (cdouble < 0.0)
08ca2aa3 1775 cdouble += anv;
09b94b1f
S
1776 cdouble = Perl_modf(cdouble / anv, &trouble);
1777#ifdef LONGDOUBLE_DOUBLEDOUBLE
1778 /* Workaround for powerpc doubledouble modfl bug:
1779 * close to 1.0L and -1.0L cdouble is 0, and trouble
1780 * is cdouble / anv. */
1781 if (trouble != Perl_ceil(trouble)) {
1782 cdouble = trouble;
1783 if (cdouble > 1.0L) cdouble -= 1.0L;
1784 if (cdouble < -1.0L) cdouble += 1.0L;
1785 }
1786#endif
1787 cdouble *= anv;
c4c5f44a 1788 sv = newSVnv(cdouble);
a6ec74c1
JH
1789 }
1790 else {
fa8ec7c1
NC
1791 if (checksum < bits_in_uv) {
1792 UV mask = ((UV)1 << checksum) - 1;
92d41999 1793 cuv &= mask;
a6ec74c1 1794 }
c4c5f44a 1795 sv = newSVuv(cuv);
a6ec74c1 1796 }
6e449a3a 1797 mXPUSHs(sv);
a6ec74c1
JH
1798 checksum = 0;
1799 }
fc241834 1800
49704364
LW
1801 if (symptr->flags & FLAG_SLASH){
1802 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1803 break;
49704364
LW
1804 if( next_symbol(symptr) ){
1805 if( symptr->howlen == e_number )
1806 Perl_croak(aTHX_ "Count after length/code in unpack" );
1807 if( beyond ){
1808 /* ...end of char buffer then no decent length available */
1809 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1810 } else {
1811 /* take top of stack (hope it's numeric) */
1812 len = POPi;
1813 if( len < 0 )
1814 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1815 }
1816 } else {
1817 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1818 }
1819 datumtype = symptr->code;
21c16052 1820 explicit_length = FALSE;
49704364
LW
1821 goto redo_switch;
1822 }
a6ec74c1 1823 }
49704364 1824
18529408
IZ
1825 if (new_s)
1826 *new_s = s;
1827 PUTBACK;
1828 return SP - PL_stack_base - start_sp_offset;
1829}
1830
1831PP(pp_unpack)
1832{
1833 dSP;
bab9c0ac 1834 dPOPPOPssrl;
1c23e2bd 1835 U8 gimme = GIMME_V;
18529408
IZ
1836 STRLEN llen;
1837 STRLEN rlen;
5c144d81
NC
1838 const char *pat = SvPV_const(left, llen);
1839 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1840 const char *strend = s + rlen;
1841 const char *patend = pat + llen;
08ca2aa3 1842 I32 cnt;
18529408
IZ
1843
1844 PUTBACK;
7accc089 1845 cnt = unpackstring(pat, patend, s, strend,
49704364 1846 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1847 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1848
18529408
IZ
1849 SPAGAIN;
1850 if ( !cnt && gimme == G_SCALAR )
1851 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1852 RETURN;
1853}
1854
f337b084 1855STATIC U8 *
e68aed92 1856doencodes(U8 *h, const U8 *s, I32 len)
a6ec74c1 1857{
f337b084 1858 *h++ = PL_uuemap[len];
a6ec74c1 1859 while (len > 2) {
f337b084
TH
1860 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1861 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1862 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1863 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1864 s += 3;
1865 len -= 3;
1866 }
1867 if (len > 0) {
e68aed92 1868 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1869 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1870 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1871 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1872 *h++ = PL_uuemap[0];
a6ec74c1 1873 }
f337b084
TH
1874 *h++ = '\n';
1875 return h;
a6ec74c1
JH
1876}
1877
1878STATIC SV *
f7fe979e 1879S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1880{
8b6e33c7
AL
1881 SV *result = newSVpvn(s, l);
1882 char *const result_c = SvPV_nolen(result); /* convenience */
1883 char *out = result_c;
1884 bool skip = 1;
1885 bool ignore = 0;
a6ec74c1 1886
7918f24d
NC
1887 PERL_ARGS_ASSERT_IS_AN_INT;
1888
a6ec74c1
JH
1889 while (*s) {
1890 switch (*s) {
1891 case ' ':
1892 break;
1893 case '+':
1894 if (!skip) {
1895 SvREFCNT_dec(result);
1896 return (NULL);
1897 }
1898 break;
1899 case '0':
1900 case '1':
1901 case '2':
1902 case '3':
1903 case '4':
1904 case '5':
1905 case '6':
1906 case '7':
1907 case '8':
1908 case '9':
1909 skip = 0;
1910 if (!ignore) {
1911 *(out++) = *s;
1912 }
1913 break;
1914 case '.':
1915 ignore = 1;
1916 break;
1917 default:
1918 SvREFCNT_dec(result);
1919 return (NULL);
1920 }
1921 s++;
1922 }
1923 *(out++) = '\0';
1924 SvCUR_set(result, out - result_c);
1925 return (result);
1926}
1927
1928/* pnum must be '\0' terminated */
1929STATIC int
1930S_div128(pTHX_ SV *pnum, bool *done)
1931{
8b6e33c7
AL
1932 STRLEN len;
1933 char * const s = SvPV(pnum, len);
1934 char *t = s;
1935 int m = 0;
1936
7918f24d
NC
1937 PERL_ARGS_ASSERT_DIV128;
1938
8b6e33c7
AL
1939 *done = 1;
1940 while (*t) {
1941 const int i = m * 10 + (*t - '0');
1942 const int r = (i >> 7); /* r < 10 */
1943 m = i & 0x7F;
1944 if (r) {
1945 *done = 0;
1946 }
1947 *(t++) = '0' + r;
a6ec74c1 1948 }
8b6e33c7
AL
1949 *(t++) = '\0';
1950 SvCUR_set(pnum, (STRLEN) (t - s));
1951 return (m);
a6ec74c1
JH
1952}
1953
18529408 1954/*
7accc089
JH
1955=for apidoc packlist
1956
796b6530 1957The engine implementing C<pack()> Perl function.
7accc089 1958
bfce84ec
AL
1959=cut
1960*/
7accc089
JH
1961
1962void
5aaab254 1963Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1964{
aadb217d
JH
1965 tempsym_t sym;
1966
7918f24d
NC
1967 PERL_ARGS_ASSERT_PACKLIST;
1968
f7fe979e 1969 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1970
f337b084
TH
1971 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1972 Also make sure any UTF8 flag is loaded */
56eb0262 1973 SvPV_force_nolen(cat);
bfce84ec
AL
1974 if (DO_UTF8(cat))
1975 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1976
49704364
LW
1977 (void)pack_rec( cat, &sym, beglist, endlist );
1978}
1979
f337b084
TH
1980/* like sv_utf8_upgrade, but also repoint the group start markers */
1981STATIC void
1982marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1983 STRLEN len;
1984 tempsym_t *group;
f7fe979e
AL
1985 const char *from_ptr, *from_start, *from_end, **marks, **m;
1986 char *to_start, *to_ptr;
f337b084
TH
1987
1988 if (SvUTF8(sv)) return;
1989
aa07b2f6 1990 from_start = SvPVX_const(sv);
f337b084
TH
1991 from_end = from_start + SvCUR(sv);
1992 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 1993 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
1994 if (from_ptr == from_end) {
1995 /* Simple case: no character needs to be changed */
1996 SvUTF8_on(sv);
1997 return;
1998 }
1999
3473cf63 2000 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2001 Newx(to_start, len, char);
f337b084
TH
2002 Copy(from_start, to_start, from_ptr-from_start, char);
2003 to_ptr = to_start + (from_ptr-from_start);
2004
a02a5408 2005 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2006 for (group=sym_ptr; group; group = group->previous)
2007 marks[group->level] = from_start + group->strbeg;
2008 marks[sym_ptr->level+1] = from_end+1;
2009 for (m = marks; *m < from_ptr; m++)
2010 *m = to_start + (*m-from_start);
2011
2012 for (;from_ptr < from_end; from_ptr++) {
2013 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2014 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2015 }
2016 *to_ptr = 0;
2017
2018 while (*m == from_ptr) *m++ = to_ptr;
2019 if (m != marks + sym_ptr->level+1) {
2020 Safefree(marks);
2021 Safefree(to_start);
5637ef5b
NC
2022 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2023 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2024 }
2025 for (group=sym_ptr; group; group = group->previous)
2026 group->strbeg = marks[group->level] - to_start;
2027 Safefree(marks);
2028
2029 if (SvOOK(sv)) {
2030 if (SvIVX(sv)) {
b162af07 2031 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2032 from_start -= SvIVX(sv);
2033 SvIV_set(sv, 0);
2034 }
2035 SvFLAGS(sv) &= ~SVf_OOK;
2036 }
2037 if (SvLEN(sv) != 0)
2038 Safefree(from_start);
f880fe2f 2039 SvPV_set(sv, to_start);
b162af07
SP
2040 SvCUR_set(sv, to_ptr - to_start);
2041 SvLEN_set(sv, len);
f337b084
TH
2042 SvUTF8_on(sv);
2043}
2044
2045/* Exponential string grower. Makes string extension effectively O(n)
2046 needed says how many extra bytes we need (not counting the final '\0')
2047 Only grows the string if there is an actual lack of space
2048*/
2049STATIC char *
0bd48802 2050S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2051 const STRLEN cur = SvCUR(sv);
2052 const STRLEN len = SvLEN(sv);
f337b084 2053 STRLEN extend;
7918f24d
NC
2054
2055 PERL_ARGS_ASSERT_SV_EXP_GROW;
2056
f337b084
TH
2057 if (len - cur > needed) return SvPVX(sv);
2058 extend = needed > len ? needed : len;
2059 return SvGROW(sv, len+extend+1);
2060}
49704364 2061
93f6e112 2062static SV *
b197e565 2063S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2064{
2065 SvGETMAGIC(sv);
93f6e112
FC
2066 if (UNLIKELY(SvAMAGIC(sv)))
2067 sv = sv_2num(sv);
99f450cc 2068 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2069 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2070 const NV nv = SvNV_nomg(sv);
2071 if (c == 'w')
147e3846 2072 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2073 else
147e3846 2074 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2075 }
93f6e112 2076 return sv;
354b74ae
FC
2077}
2078
93f6e112
FC
2079#define SvIV_no_inf(sv,d) \
2080 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2081#define SvUV_no_inf(sv,d) \
2082 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2083
49704364
LW
2084STATIC
2085SV **
f337b084 2086S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2087{
49704364 2088 tempsym_t lookahead;
f337b084
TH
2089 I32 items = endlist - beglist;
2090 bool found = next_symbol(symptr);
2091 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2092 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2093 char* from;
f337b084 2094
7918f24d
NC
2095 PERL_ARGS_ASSERT_PACK_REC;
2096
f337b084
TH
2097 if (symptr->level == 0 && found && symptr->code == 'U') {
2098 marked_upgrade(aTHX_ cat, symptr);
2099 symptr->flags |= FLAG_DO_UTF8;
2100 utf8 = 0;
49704364 2101 }
f337b084 2102 symptr->strbeg = SvCUR(cat);
49704364
LW
2103
2104 while (found) {
f337b084
TH
2105 SV *fromstr;
2106 STRLEN fromlen;
2107 I32 len;
a0714e2c 2108 SV *lengthcode = NULL;
49704364 2109 I32 datumtype = symptr->code;
f337b084
TH
2110 howlen_t howlen = symptr->howlen;
2111 char *start = SvPVX(cat);
2112 char *cur = start + SvCUR(cat);
a1219b5e 2113 bool needs_swap;
49704364 2114
fc1bb3f2 2115#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2116#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2117
2118 switch (howlen) {
fc241834 2119 case e_star:
f337b084
TH
2120 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2121 0 : items;
2122 break;
2123 default:
2124 /* e_no_len and e_number */
2125 len = symptr->length;
49704364
LW
2126 break;
2127 }
2128
f337b084 2129 if (len) {
a7a3cfaa 2130 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2131
a7a3cfaa
TH
2132 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2133 /* We can process this letter. */
2134 STRLEN size = props & PACK_SIZE_MASK;
2135 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2136 }
f337b084
TH
2137 }
2138
49704364
LW
2139 /* Look ahead for next symbol. Do we have code/code? */
2140 lookahead = *symptr;
2141 found = next_symbol(&lookahead);
246f24af
TH
2142 if (symptr->flags & FLAG_SLASH) {
2143 IV count;
f337b084 2144 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2145 if (strchr("aAZ", lookahead.code)) {
2146 if (lookahead.howlen == e_number) count = lookahead.length;
2147 else {
ce399ba6 2148 if (items > 0) {
48a5da33 2149 count = sv_len_utf8(*beglist);
ce399ba6 2150 }
246f24af
TH
2151 else count = 0;
2152 if (lookahead.code == 'Z') count++;
2153 }
2154 } else {
2155 if (lookahead.howlen == e_number && lookahead.length < items)
2156 count = lookahead.length;
2157 else count = items;
2158 }
2159 lookahead.howlen = e_number;
2160 lookahead.length = count;
2161 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2162 }
49704364 2163
a1219b5e
NC
2164 needs_swap = NEEDS_SWAP(datumtype);
2165
fc241834
RGS
2166 /* Code inside the switch must take care to properly update
2167 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2168 doesn't simply leave using break */
0c7df902 2169 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2170 default:
f337b084
TH
2171 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2172 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2173 case '%':
49704364 2174 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2175
28be1210 2176 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2177 case '.':
2178 if (howlen == e_star) from = start;
2179 else if (len == 0) from = cur;
2180 else {
2181 tempsym_t *group = symptr;
2182
2183 while (--len && group) group = group->previous;
2184 from = group ? start + group->strbeg : start;
2185 }
2186 fromstr = NEXTFROM;
354b74ae 2187 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2188 goto resize;
28be1210 2189 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2190 case '@':
28be1210
TH
2191 from = start + symptr->strbeg;
2192 resize:
28be1210 2193 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2194 if (len >= 0) {
2195 while (len && from < cur) {
2196 from += UTF8SKIP(from);
2197 len--;
2198 }
2199 if (from > cur)
2200 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2201 if (len) {
2202 /* Here we know from == cur */
2203 grow:
2204 GROWING(0, cat, start, cur, len);
2205 Zero(cur, len, char);
2206 cur += len;
2207 } else if (from < cur) {
2208 len = cur - from;
2209 goto shrink;
2210 } else goto no_change;
2211 } else {
2212 cur = from;
2213 len = -len;
2214 goto utf8_shrink;
f337b084 2215 }
28be1210
TH
2216 else {
2217 len -= cur - from;
f337b084 2218 if (len > 0) goto grow;
28be1210 2219 if (len == 0) goto no_change;
fc241834 2220 len = -len;
28be1210 2221 goto shrink;
f337b084 2222 }
a6ec74c1 2223 break;
81d52ecd 2224
fc241834 2225 case '(': {
49704364 2226 tempsym_t savsym = *symptr;
66c611c5
MHM
2227 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2228 symptr->flags |= group_modifiers;
49704364
LW
2229 symptr->patend = savsym.grpend;
2230 symptr->level++;
f337b084 2231 symptr->previous = &lookahead;
18529408 2232 while (len--) {
f337b084
TH
2233 U32 was_utf8;
2234 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2235 else symptr->flags &= ~FLAG_PARSE_UTF8;
2236 was_utf8 = SvUTF8(cat);
49704364 2237 symptr->patptr = savsym.grpbeg;
f337b084
TH
2238 beglist = pack_rec(cat, symptr, beglist, endlist);
2239 if (SvUTF8(cat) != was_utf8)
2240 /* This had better be an upgrade while in utf8==0 mode */
2241 utf8 = 1;
2242
49704364 2243 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2244 break; /* No way to continue */
2245 }
ee790063 2246 items = endlist - beglist;
f337b084
TH
2247 lookahead.flags = symptr->flags & ~group_modifiers;
2248 goto no_change;
18529408 2249 }
62f95557
IZ
2250 case 'X' | TYPE_IS_SHRIEKING:
2251 if (!len) /* Avoid division by 0 */
2252 len = 1;
f337b084
TH
2253 if (utf8) {
2254 char *hop, *last;
2255 I32 l = len;
2256 hop = last = start;
2257 while (hop < cur) {
2258 hop += UTF8SKIP(hop);
2259 if (--l == 0) {
2260 last = hop;
2261 l = len;
2262 }
2263 }
2264 if (last > cur)
2265 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2266 cur = last;
2267 break;
2268 }
2269 len = (cur-start) % len;
924ba076 2270 /* FALLTHROUGH */
a6ec74c1 2271 case 'X':
f337b084
TH
2272 if (utf8) {
2273 if (len < 1) goto no_change;
28be1210 2274 utf8_shrink:
f337b084
TH
2275 while (len > 0) {
2276 if (cur <= start)
28be1210
TH
2277 Perl_croak(aTHX_ "'%c' outside of string in pack",
2278 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2279 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2280 if (cur <= start)
28be1210
TH
2281 Perl_croak(aTHX_ "'%c' outside of string in pack",
2282 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2283 }
2284 len--;
2285 }
2286 } else {
fc241834 2287 shrink:
f337b084 2288 if (cur - start < len)
28be1210
TH
2289 Perl_croak(aTHX_ "'%c' outside of string in pack",
2290 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2291 cur -= len;
2292 }
2293 if (cur < start+symptr->strbeg) {
2294 /* Make sure group starts don't point into the void */
2295 tempsym_t *group;
9e27e96a 2296 const STRLEN length = cur-start;
f337b084
TH
2297 for (group = symptr;
2298 group && length < group->strbeg;
2299 group = group->previous) group->strbeg = length;
2300 lookahead.strbeg = length;
2301 }
a6ec74c1 2302 break;
fc241834
RGS
2303 case 'x' | TYPE_IS_SHRIEKING: {
2304 I32 ai32;
62f95557
IZ
2305 if (!len) /* Avoid division by 0 */
2306 len = 1;
230e1fce 2307 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2308 else ai32 = (cur - start) % len;
2309 if (ai32 == 0) goto no_change;
2310 len -= ai32;
2311 }
924ba076 2312 /* FALLTHROUGH */
a6ec74c1 2313 case 'x':
f337b084 2314 goto grow;
a6ec74c1
JH
2315 case 'A':
2316 case 'Z':
f337b084 2317 case 'a': {
f7fe979e 2318 const char *aptr;
f337b084 2319
a6ec74c1 2320 fromstr = NEXTFROM;
e62f0680 2321 aptr = SvPV_const(fromstr, fromlen);
f337b084 2322 if (DO_UTF8(fromstr)) {
f7fe979e 2323 const char *end, *s;
f337b084
TH
2324
2325 if (!utf8 && !SvUTF8(cat)) {
2326 marked_upgrade(aTHX_ cat, symptr);
2327 lookahead.flags |= FLAG_DO_UTF8;
2328 lookahead.strbeg = symptr->strbeg;
2329 utf8 = 1;
2330 start = SvPVX(cat);
2331 cur = start + SvCUR(cat);
2332 }
fc241834 2333 if (howlen == e_star) {
f337b084
TH
2334 if (utf8) goto string_copy;
2335 len = fromlen+1;
2336 }
2337 s = aptr;
2338 end = aptr + fromlen;
2339 fromlen = datumtype == 'Z' ? len-1 : len;
2340 while ((I32) fromlen > 0 && s < end) {
2341 s += UTF8SKIP(s);
2342 fromlen--;
2343 }
2344 if (s > end)
2345 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2346 if (utf8) {
fc241834 2347 len = fromlen;
f337b084
TH
2348 if (datumtype == 'Z') len++;
2349 fromlen = s-aptr;
2350 len += fromlen;
fc241834 2351
f337b084 2352 goto string_copy;
fc241834 2353 }
f337b084
TH
2354 fromlen = len - fromlen;
2355 if (datumtype == 'Z') fromlen--;
2356 if (howlen == e_star) {
2357 len = fromlen;
2358 if (datumtype == 'Z') len++;
fc241834 2359 }
f337b084 2360 GROWING(0, cat, start, cur, len);
9df874cd 2361 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2362 datumtype | TYPE_IS_PACK))
5637ef5b 2363 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
147e3846 2364 "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
5637ef5b 2365 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2366 cur += fromlen;
a6ec74c1 2367 len -= fromlen;
f337b084
TH
2368 } else if (utf8) {
2369 if (howlen == e_star) {
2370 len = fromlen;
2371 if (datumtype == 'Z') len++;
a6ec74c1 2372 }
f337b084
TH
2373 if (len <= (I32) fromlen) {
2374 fromlen = len;
2375 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2376 }
fc241834 2377 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2378 upgrade, so:
2379 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2380 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2381 len -= fromlen;
2382 while (fromlen > 0) {
230e1fce 2383 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2384 aptr++;
2385 fromlen--;
fc241834 2386 }
f337b084
TH
2387 } else {
2388 string_copy:
2389 if (howlen == e_star) {
2390 len = fromlen;
2391 if (datumtype == 'Z') len++;
2392 }
2393 if (len <= (I32) fromlen) {
2394 fromlen = len;
2395 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2396 }
f337b084
TH
2397 GROWING(0, cat, start, cur, len);
2398 Copy(aptr, cur, fromlen, char);
2399 cur += fromlen;
2400 len -= fromlen;
a6ec74c1 2401 }
f337b084
TH
2402 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2403 cur += len;
3c4fb04a 2404 SvTAINT(cat);
a6ec74c1 2405 break;
f337b084 2406 }
a6ec74c1 2407 case 'B':
f337b084 2408 case 'b': {
b83604b4 2409 const char *str, *end;
f337b084
TH
2410 I32 l, field_len;
2411 U8 bits;
2412 bool utf8_source;
2413 U32 utf8_flags;
a6ec74c1 2414
fc241834 2415 fromstr = NEXTFROM;
b83604b4 2416 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2417 end = str + fromlen;
2418 if (DO_UTF8(fromstr)) {
2419 utf8_source = TRUE;
041457d9 2420 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2421 } else {
2422 utf8_source = FALSE;
2423 utf8_flags = 0; /* Unused, but keep compilers happy */
2424 }
2425 if (howlen == e_star) len = fromlen;
2426 field_len = (len+7)/8;
2427 GROWING(utf8, cat, start, cur, field_len);
2428 if (len > (I32)fromlen) len = fromlen;
2429 bits = 0;
2430 l = 0;
2431 if (datumtype == 'B')
2432 while (l++ < len) {
2433 if (utf8_source) {
95b63a38 2434 UV val = 0;
f337b084
TH
2435 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2436 bits |= val & 1;
2437 } else bits |= *str++ & 1;
2438 if (l & 7) bits <<= 1;
fc241834 2439 else {
f337b084
TH
2440 PUSH_BYTE(utf8, cur, bits);
2441 bits = 0;
a6ec74c1
JH
2442 }
2443 }
f337b084
TH
2444 else
2445 /* datumtype == 'b' */
2446 while (l++ < len) {
2447 if (utf8_source) {
95b63a38 2448 UV val = 0;
f337b084
TH
2449 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2450 if (val & 1) bits |= 0x80;
2451 } else if (*str++ & 1)
2452 bits |= 0x80;
2453 if (l & 7) bits >>= 1;
fc241834 2454 else {
f337b084
TH
2455 PUSH_BYTE(utf8, cur, bits);
2456 bits = 0;
a6ec74c1
JH
2457 }
2458 }
f337b084
TH
2459 l--;
2460 if (l & 7) {
fc241834 2461 if (datumtype == 'B')
f337b084 2462 bits <<= 7 - (l & 7);
fc241834 2463 else
f337b084
TH
2464 bits >>= 7 - (l & 7);
2465 PUSH_BYTE(utf8, cur, bits);
2466 l += 7;
a6ec74c1 2467 }
f337b084
TH
2468 /* Determine how many chars are left in the requested field */
2469 l /= 8;
2470 if (howlen == e_star) field_len = 0;
2471 else field_len -= l;
2472 Zero(cur, field_len, char);
2473 cur += field_len;
a6ec74c1 2474 break;
f337b084 2475 }
a6ec74c1 2476 case 'H':
f337b084 2477 case 'h': {
10516c54 2478 const char *str, *end;
f337b084
TH
2479 I32 l, field_len;
2480 U8 bits;
2481 bool utf8_source;
2482 U32 utf8_flags;
a6ec74c1 2483
fc241834 2484 fromstr = NEXTFROM;
10516c54 2485 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2486 end = str + fromlen;
2487 if (DO_UTF8(fromstr)) {
2488 utf8_source = TRUE;
041457d9 2489 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2490 } else {
2491 utf8_source = FALSE;
2492 utf8_flags = 0; /* Unused, but keep compilers happy */
2493 }
2494 if (howlen == e_star) len = fromlen;
2495 field_len = (len+1)/2;
2496 GROWING(utf8, cat, start, cur, field_len);
0403a1ad 2497 if (!utf8_source && len > (I32)fromlen) len = fromlen;
f337b084
TH
2498 bits = 0;
2499 l = 0;
2500 if (datumtype == 'H')
2501 while (l++ < len) {
2502 if (utf8_source) {
95b63a38 2503 UV val = 0;
f337b084
TH
2504 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2505 if (val < 256 && isALPHA(val))
2506 bits |= (val + 9) & 0xf;
a6ec74c1 2507 else
f337b084
TH
2508 bits |= val & 0xf;
2509 } else if (isALPHA(*str))
2510 bits |= (*str++ + 9) & 0xf;
2511 else
2512 bits |= *str++ & 0xf;
2513 if (l & 1) bits <<= 4;
fc241834 2514 else {
f337b084
TH
2515 PUSH_BYTE(utf8, cur, bits);
2516 bits = 0;
a6ec74c1
JH
2517 }
2518 }
f337b084
TH
2519 else
2520 while (l++ < len) {
2521 if (utf8_source) {
95b63a38 2522 UV val = 0;
f337b084
TH
2523 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2524 if (val < 256 && isALPHA(val))
2525 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2526 else
f337b084
TH
2527 bits |= (val & 0xf) << 4;
2528 } else if (isALPHA(*str))
2529 bits |= ((*str++ + 9) & 0xf) << 4;
2530 else
2531 bits |= (*str++ & 0xf) << 4;
2532 if (l & 1) bits >>= 4;
fc241834 2533 else {
f337b084
TH
2534 PUSH_BYTE(utf8, cur, bits);
2535 bits = 0;
a6ec74c1 2536 }
fc241834 2537 }
f337b084
TH
2538 l--;
2539 if (l & 1) {
2540 PUSH_BYTE(utf8, cur, bits);
2541 l++;
2542 }
2543 /* Determine how many chars are left in the requested field */
2544 l /= 2;
2545 if (howlen == e_star) field_len = 0;
2546 else field_len -= l;
2547 Zero(cur, field_len, char);
2548 cur += field_len;
2549 break;
fc241834
RGS
2550 }
2551 case 'c':
f337b084
TH
2552 while (len-- > 0) {
2553 IV aiv;
2554 fromstr = NEXTFROM;
354b74ae 2555 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2556 if ((-128 > aiv || aiv > 127))
2557 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2558 "Character in 'c' format wrapped in pack");
585ec06d 2559 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2560 }
2561 break;
2562 case 'C':
f337b084
TH
2563 if (len == 0) {
2564 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2565 break;
2566 }
a6ec74c1 2567 while (len-- > 0) {
f337b084 2568 IV aiv;
a6ec74c1 2569 fromstr = NEXTFROM;
354b74ae 2570 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2571 if ((0 > aiv || aiv > 0xff))
2572 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2573 "Character in 'C' format wrapped in pack");
1651fc44 2574 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2575 }
fc241834
RGS
2576 break;
2577 case 'W': {
2578 char *end;
670f1322 2579 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2580
2581 end = start+SvLEN(cat)-1;
2582 if (utf8) end -= UTF8_MAXLEN-1;
2583 while (len-- > 0) {
2584 UV auv;
2585 fromstr = NEXTFROM;
354b74ae 2586 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2587 if (in_bytes) auv = auv % 0x100;
2588 if (utf8) {
2589 W_utf8:
bf4a926a 2590 if (cur >= end) {
fc241834 2591 *cur = '\0';
b162af07 2592 SvCUR_set(cat, cur - start);
fc241834
RGS
2593
2594 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2595 end = start+SvLEN(cat)-UTF8_MAXLEN;
2596 }
c80e42f3
KW
2597 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2598 auv,
041457d9 2599 warn_utf8 ?
230e1fce 2600 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2601 } else {
2602 if (auv >= 0x100) {
2603 if (!SvUTF8(cat)) {
2604 *cur = '\0';
b162af07 2605 SvCUR_set(cat, cur - start);
fc241834
RGS
2606 marked_upgrade(aTHX_ cat, symptr);
2607 lookahead.flags |= FLAG_DO_UTF8;
2608 lookahead.strbeg = symptr->strbeg;
2609 utf8 = 1;
2610 start = SvPVX(cat);
2611 cur = start + SvCUR(cat);
2612 end = start+SvLEN(cat)-UTF8_MAXLEN;
2613 goto W_utf8;
2614 }
a2a5de95
NC
2615 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2616 "Character in 'W' format wrapped in pack");
fc241834
RGS
2617 auv &= 0xff;
2618 }
2619 if (cur >= end) {
2620 *cur = '\0';
b162af07 2621 SvCUR_set(cat, cur - start);
fc241834
RGS
2622 GROWING(0, cat, start, cur, len+1);
2623 end = start+SvLEN(cat)-1;
2624 }
fe2774ed 2625 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2626 }
2627 }
2628 break;
fc241834
RGS
2629 }
2630 case 'U': {
2631 char *end;
2632
2633 if (len == 0) {
2634 if (!(symptr->flags & FLAG_DO_UTF8)) {
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2638 }
2639 utf8 = 0;
2640 goto no_change;
2641 }
2642
2643 end = start+SvLEN(cat);
2644 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2645 while (len-- > 0) {
fc241834 2646 UV auv;
a6ec74c1 2647 fromstr = NEXTFROM;
354b74ae 2648 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2649 if (utf8) {
230e1fce 2650 U8 buffer[UTF8_MAXLEN], *endb;
3ece276e 2651 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
041457d9 2652 warn_utf8 ?
fc241834
RGS
2653 0 : UNICODE_ALLOW_ANY);
2654 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2655 *cur = '\0';
b162af07 2656 SvCUR_set(cat, cur - start);
fc241834
RGS
2657 GROWING(0, cat, start, cur,
2658 len+(endb-buffer)*UTF8_EXPAND);
2659 end = start+SvLEN(cat);
2660 }
d21d7215 2661 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2662 } else {
2663 if (cur >= end) {
2664 *cur = '\0';
b162af07 2665 SvCUR_set(cat, cur - start);
fc241834
RGS
2666 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2667 end = start+SvLEN(cat)-UTF8_MAXLEN;
2668 }
3ece276e 2669 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
041457d9 2670 warn_utf8 ?
230e1fce 2671 0 : UNICODE_ALLOW_ANY);
fc241834 2672 }
a6ec74c1 2673 }
a6ec74c1 2674 break;
fc241834 2675 }
a6ec74c1
JH
2676 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2677 case 'f':
a6ec74c1 2678 while (len-- > 0) {
f337b084
TH
2679 float afloat;
2680 NV anv;
a6ec74c1 2681 fromstr = NEXTFROM;
f337b084 2682 anv = SvNV(fromstr);
a7157111 2683# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2684 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2685 * on Alpha; fake it if we don't have them.
2686 */
f337b084 2687 if (anv > FLT_MAX)
fc241834 2688 afloat = FLT_MAX;
f337b084 2689 else if (anv < -FLT_MAX)
fc241834 2690 afloat = -FLT_MAX;
f337b084 2691 else afloat = (float)anv;
baf3cf9c 2692# else
a7157111 2693# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2694 if(Perl_isnan(anv))
2695 afloat = (float)NV_NAN;
2696 else
a7157111
JH
2697# endif
2698# ifdef NV_INF
919894b7
DM
2699 /* a simple cast to float is undefined if outside
2700 * the range of values that can be represented */
2701 afloat = (float)(anv > FLT_MAX ? NV_INF :
2702 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2703# endif
baf3cf9c 2704# endif
3a88beaa 2705 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2706 }
2707 break;
2708 case 'd':
a6ec74c1 2709 while (len-- > 0) {
f337b084
TH
2710 double adouble;
2711 NV anv;
a6ec74c1 2712 fromstr = NEXTFROM;
f337b084 2713 anv = SvNV(fromstr);
a7157111 2714# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2715 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2716 * on Alpha; fake it if we don't have them.
2717 */
f337b084 2718 if (anv > DBL_MAX)
fc241834 2719 adouble = DBL_MAX;
f337b084 2720 else if (anv < -DBL_MAX)
fc241834 2721 adouble = -DBL_MAX;
f337b084 2722 else adouble = (double)anv;
baf3cf9c 2723# else
f337b084 2724 adouble = (double)anv;
baf3cf9c 2725# endif
3a88beaa 2726 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2727 }
2728 break;
fc241834 2729 case 'F': {
275663fa 2730 NV_bytes anv;
1109a392 2731 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2732 while (len-- > 0) {
2733 fromstr = NEXTFROM;
cd07c537
DM
2734#ifdef __GNUC__
2735 /* to work round a gcc/x86 bug; don't use SvNV */
2736 anv.nv = sv_2nv(fromstr);
070e2677
TC
2737# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2738 && LONG_DOUBLESIZE > 10
2739 /* GCC sometimes overwrites the padding in the
2740 assignment above */
2741 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2742# endif
cd07c537 2743#else
275663fa 2744 anv.nv = SvNV(fromstr);
cd07c537 2745#endif
3a88beaa 2746 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2747 }
2748 break;
fc241834 2749 }
92d41999 2750#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2751 case 'D': {
275663fa 2752 ld_bytes aldouble;
1109a392
MHM
2753 /* long doubles can have unused bits, which may be nonzero */
2754 Zero(&aldouble, 1, long double);
92d41999
JH
2755 while (len-- > 0) {
2756 fromstr = NEXTFROM;
cd07c537
DM
2757# ifdef __GNUC__
2758 /* to work round a gcc/x86 bug; don't use SvNV */
2759 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2760# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2761 /* GCC sometimes overwrites the padding in the
2762 assignment above */
2763 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2764# endif
cd07c537 2765# else
275663fa 2766 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2767# endif
3a88beaa
NC
2768 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2769 needs_swap);
92d41999
JH
2770 }
2771 break;
fc241834 2772 }
92d41999 2773#endif
068bd2e7 2774 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2775 case 'n':
2776 while (len-- > 0) {
f337b084 2777 I16 ai16;
a6ec74c1 2778 fromstr = NEXTFROM;
354b74ae 2779 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2780 ai16 = PerlSock_htons(ai16);
3a88beaa 2781 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2782 }
2783 break;
068bd2e7 2784 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2785 case 'v':
2786 while (len-- > 0) {
f337b084 2787 I16 ai16;
a6ec74c1 2788 fromstr = NEXTFROM;
354b74ae 2789 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2790 ai16 = htovs(ai16);
3a88beaa 2791 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2792 }
2793 break;
49704364 2794 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2795#if SHORTSIZE != SIZE16
fc241834 2796 while (len-- > 0) {
f337b084 2797 unsigned short aushort;
fc241834 2798 fromstr = NEXTFROM;
354b74ae 2799 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2800 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2801 }
49704364
LW
2802 break;
2803#else
924ba076 2804 /* FALLTHROUGH */
a6ec74c1 2805#endif
49704364 2806 case 'S':
fc241834 2807 while (len-- > 0) {
f337b084 2808 U16 au16;
fc241834 2809 fromstr = NEXTFROM;
354b74ae 2810 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2811 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2812 }
2813 break;
49704364 2814 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2815#if SHORTSIZE != SIZE16
fc241834 2816 while (len-- > 0) {
f337b084 2817 short ashort;
fc241834 2818 fromstr = NEXTFROM;
354b74ae 2819 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2820 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2821 }
49704364
LW
2822 break;
2823#else
924ba076 2824 /* FALLTHROUGH */
a6ec74c1 2825#endif
49704364
LW
2826 case 's':
2827 while (len-- > 0) {
f337b084 2828 I16 ai16;
49704364 2829 fromstr = NEXTFROM;
354b74ae 2830 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2831 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2832 }
2833 break;
2834 case 'I':
49704364 2835 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2836 while (len-- > 0) {
f337b084 2837 unsigned int auint;
a6ec74c1 2838 fromstr = NEXTFROM;
354b74ae 2839 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2840 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2841 }
2842 break;
92d41999
JH
2843 case 'j':
2844 while (len-- > 0) {
f337b084 2845 IV aiv;
92d41999 2846 fromstr = NEXTFROM;
354b74ae 2847 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2848 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2849 }
2850 break;
2851 case 'J':
2852 while (len-- > 0) {
f337b084 2853 UV auv;
92d41999 2854 fromstr = NEXTFROM;
354b74ae 2855 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2856 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2857 }
2858 break;
a6ec74c1
JH
2859 case 'w':
2860 while (len-- > 0) {
f337b084 2861 NV anv;
a6ec74c1 2862 fromstr = NEXTFROM;
83388ae2 2863 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2864 anv = SvNV_nomg(fromstr);
a6ec74c1 2865
f337b084
TH
2866 if (anv < 0) {
2867 *cur = '\0';
b162af07 2868 SvCUR_set(cat, cur - start);
49704364 2869 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2870 }
a6ec74c1 2871
196b62db
NC
2872 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2873 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2874 any negative IVs will have already been got by the croak()
2875 above. IOK is untrue for fractions, so we test them
2876 against UV_MAX_P1. */
f337b084
TH
2877 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2878 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2879 char *in = buf + sizeof(buf);
1e9a122e 2880 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2881
2882 do {
eb160463 2883 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2884 auv >>= 7;
2885 } while (auv);
2886 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2887 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2888 in, (buf + sizeof(buf)) - in);
2889 } else if (SvPOKp(fromstr))
2890 goto w_string;
a6ec74c1 2891 else if (SvNOKp(fromstr)) {
0258719b 2892 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2893 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2894 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2895 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2896 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2897 Some C compilers are strict about integral constant
2898 expressions so we conservatively divide by a slightly
2899 smaller integer instead of multiplying by the exact
2900 floating-point value.
0258719b
NC
2901 */
2902#ifdef NV_MAX_10_EXP
f337b084 2903 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2904 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2905#else
f337b084 2906 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2907 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2908#endif
a6ec74c1
JH
2909 char *in = buf + sizeof(buf);
2910
8b6e33c7 2911 anv = Perl_floor(anv);
a6ec74c1 2912 do {
8b6e33c7 2913 const NV next = Perl_floor(anv / 128);
a6ec74c1 2914 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2915 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2916 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2917 anv = next;
2918 } while (anv > 0);
a6ec74c1 2919 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2920 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2921 in, (buf + sizeof(buf)) - in);
2922 } else {
8b6e33c7
AL
2923 const char *from;
2924 char *result, *in;
735b914b
JH
2925 SV *norm;
2926 STRLEN len;
2927 bool done;
2928
f337b084 2929 w_string:
735b914b 2930 /* Copy string and check for compliance */
1e9a122e 2931 from = SvPV_nomg_const(fromstr, len);
735b914b 2932 if ((norm = is_an_int(from, len)) == NULL)
49704364 2933 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2934
a02a5408 2935 Newx(result, len, char);
735b914b
JH
2936 in = result + len;
2937 done = FALSE;
f337b084 2938 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2939 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2940 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2941 in, (result + len) - in);
735b914b
JH
2942 Safefree(result);
2943 SvREFCNT_dec(norm); /* free norm */
fc241834 2944 }
a6ec74c1
JH
2945 }
2946 break;
2947 case 'i':
49704364 2948 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2949 while (len-- > 0) {
f337b084 2950 int aint;
a6ec74c1 2951 fromstr = NEXTFROM;
354b74ae 2952 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2953 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2954 }
2955 break;
068bd2e7 2956 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2957 case 'N':
2958 while (len-- > 0) {
f337b084 2959 U32 au32;
a6ec74c1 2960 fromstr = NEXTFROM;
354b74ae 2961 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2962 au32 = PerlSock_htonl(au32);
3a88beaa 2963 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2964 }
2965 break;
068bd2e7 2966 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2967 case 'V':
2968 while (len-- > 0) {
f337b084 2969 U32 au32;
a6ec74c1 2970 fromstr = NEXTFROM;
354b74ae 2971 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2972 au32 = htovl(au32);
3a88beaa 2973 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2974 }
2975 break;
49704364 2976 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2977#if LONGSIZE != SIZE32
fc241834 2978 while (len-- > 0) {
f337b084 2979 unsigned long aulong;
fc241834 2980 fromstr = NEXTFROM;
354b74ae 2981 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2982 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2983 }
49704364
LW
2984 break;
2985#else
2986 /* Fall though! */
a6ec74c1 2987#endif
49704364 2988 case 'L':
fc241834 2989 while (len-- > 0) {
f337b084 2990 U32 au32;
fc241834 2991 fromstr = NEXTFROM;
354b74ae 2992 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2993 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2994 }
2995 break;
49704364 2996 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2997#if LONGSIZE != SIZE32
fc241834 2998 while (len-- > 0) {
f337b084 2999 long along;
fc241834 3000 fromstr = NEXTFROM;
354b74ae 3001 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3002 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3003 }
49704364
LW
3004 break;
3005#else
3006 /* Fall though! */
a6ec74c1 3007#endif
49704364
LW
3008 case 'l':
3009 while (len-- > 0) {
f337b084 3010 I32 ai32;
49704364 3011 fromstr = NEXTFROM;
354b74ae 3012 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3013 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3014 }
3015 break;
c174bf3b 3016#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3017 case 'Q':
3018 while (len-- > 0) {
f337b084 3019 Uquad_t auquad;
a6ec74c1 3020 fromstr = NEXTFROM;
354b74ae 3021 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3022 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3023 }
3024 break;
3025 case 'q':
3026 while (len-- > 0) {
f337b084 3027 Quad_t aquad;
a6ec74c1 3028 fromstr = NEXTFROM;
354b74ae 3029 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3030 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3031 }
3032 break;
1640b983 3033#endif
a6ec74c1
JH
3034 case 'P':
3035 len = 1; /* assume SV is correct length */
f337b084 3036 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3037 /* FALLTHROUGH */
a6ec74c1
JH
3038 case 'p':
3039 while (len-- > 0) {
83003860 3040 const char *aptr;
f337b084 3041
a6ec74c1 3042 fromstr = NEXTFROM;
28a4f200 3043 SvGETMAGIC(fromstr);
3044 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3045 else {
a6ec74c1
JH
3046 /* XXX better yet, could spirit away the string to
3047 * a safe spot and hang on to it until the result
3048 * of pack() (and all copies of the result) are
3049 * gone.
3050 */
b199a398
FC
3051 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3052 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3053 !SvREADONLY(fromstr)))) {
3054 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3055 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3056 }
3057 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3058 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3059 else
2596d9fe 3060 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3061 }
3a88beaa 3062 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3063 }
3064 break;
fc241834 3065 case 'u': {
f7fe979e 3066 const char *aptr, *aend;
fc241834 3067 bool from_utf8;
f337b084 3068
a6ec74c1 3069 fromstr = NEXTFROM;
fc241834
RGS
3070 if (len <= 2) len = 45;
3071 else len = len / 3 * 3;
3072 if (len >= 64) {
a2a5de95
NC
3073 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3074 "Field too wide in 'u' format in pack");
fc241834
RGS
3075 len = 63;
3076 }
83003860 3077 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3078 from_utf8 = DO_UTF8(fromstr);
3079 if (from_utf8) {
3080 aend = aptr + fromlen;
3f63b0e5 3081 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3082 } else aend = NULL; /* Unused, but keep compilers happy */
3083 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3084 while (fromlen > 0) {
fc241834 3085 U8 *end;
a6ec74c1 3086 I32 todo;
fc241834 3087 U8 hunk[1+63/3*4+1];
a6ec74c1 3088
eb160463 3089 if ((I32)fromlen > len)
a6ec74c1
JH
3090 todo = len;
3091 else
3092 todo = fromlen;
fc241834
RGS
3093 if (from_utf8) {
3094 char buffer[64];
9df874cd 3095 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3096 'u' | TYPE_IS_PACK)) {
3097 *cur = '\0';
b162af07 3098 SvCUR_set(cat, cur - start);
5637ef5b
NC
3099 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3100 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3101 aptr, aend, buffer, (long) todo);
fc241834 3102 }
e68aed92 3103 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3104 } else {
e68aed92 3105 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3106 aptr += todo;
3107 }
3a88beaa 3108 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3109 fromlen -= todo;
3110 }
a6ec74c1
JH
3111 break;
3112 }
f337b084
TH
3113 }
3114 *cur = '\0';
b162af07 3115 SvCUR_set(cat, cur - start);
f337b084 3116 no_change:
49704364 3117 *symptr = lookahead;
a6ec74c1 3118 }
49704364 3119 return beglist;
18529408
IZ
3120}
3121#undef NEXTFROM
3122
3123
3124PP(pp_pack)
3125{
20b7effb 3126 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3127 SV *cat = TARG;
18529408 3128 STRLEN fromlen;
349d4f2f 3129 SV *pat_sv = *++MARK;
eb578fdb
KW
3130 const char *pat = SvPV_const(pat_sv, fromlen);
3131 const char *patend = pat + fromlen;
18529408
IZ
3132
3133 MARK++;
e8f7c79f 3134 SvPVCLEAR(cat);
f337b084 3135 SvUTF8_off(cat);
18529408 3136
7accc089 3137 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3138
a6ec74c1
JH
3139 SvSETMAGIC(cat);
3140 SP = ORIGMARK;
3141 PUSHs(cat);
3142 RETURN;
3143}
a6ec74c1 3144
73cb7263 3145/*
14d04a33 3146 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3147 */