This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
packsizetables.c -> packsizetables.inc
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
9df874cd 154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
LW
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
e6006968
KW
219#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 221
66c611c5 222/* type modifiers */
62f95557 223#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
224#define TYPE_IS_BIG_ENDIAN 0x200
225#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 226#define TYPE_IS_PACK 0x800
1109a392 227#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 228#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
229#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230
7212898e
NC
231# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233
234# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235
78d46eaa 236#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 237#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
238#define PACK_SIZE_MASK 0x3F
239
4ad176ef 240#include "packsizetables.inc"
78d46eaa 241
20aa3a38
NC
242static void
243S_reverse_copy(const char *src, char *dest, STRLEN len)
244{
245 dest += len;
246 while (len--)
247 *--dest = *src++;
248}
249
08ca2aa3 250STATIC U8
9df874cd 251utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 252{
08ca2aa3 253 STRLEN retlen;
0bcc34c2 254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 256 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
257 warnings), but these two mean we make no progress in the string and
258 might enter an infinite loop */
259 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
a2a5de95
NC
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
266 val &= 0xff;
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
274 *(U8 *)(s)++)
275
276STATIC bool
9df874cd 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3
TH
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
f337b084 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
f7fe979e 310 const char *ptr;
9e27e96a 311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
c80e42f3 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
315 }
316 if (from > end) from = end;
317 }
a2a5de95
NC
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 320 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
d21d7215 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
230e1fce
NC
357 (cur) += (len); \
358 } \
f337b084
TH
359} STMT_END
360
361#define GROWING(utf8, cat, start, cur, in_len) \
362STMT_START { \
363 STRLEN glen = (in_len); \
3473cf63 364 if (utf8) glen *= UTF8_EXPAND; \
f337b084 365 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 366 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
367 (cur) = (start) + SvCUR(cat); \
368 } \
369} STMT_END
370
371#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
372STMT_START { \
f7fe979e 373 const STRLEN glen = (in_len); \
f337b084 374 STRLEN gl = glen; \
3473cf63 375 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
376 if ((cur) + gl >= (start) + SvLEN(cat)) { \
377 *cur = '\0'; \
b162af07 378 SvCUR_set((cat), (cur) - (start)); \
0bd48802 379 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
380 (cur) = (start) + SvCUR(cat); \
381 } \
3a88beaa 382 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
383} STMT_END
384
385#define PUSH_BYTE(utf8, s, byte) \
386STMT_START { \
387 if (utf8) { \
f7fe979e 388 const U8 au8 = (byte); \
d21d7215 389 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
390 } else *(U8 *)(s)++ = (byte); \
391} STMT_END
392
393/* Only to be used inside a loop (see the break) */
394#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
395STMT_START { \
396 STRLEN retlen; \
397 if (str >= end) break; \
398 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
399 if (retlen == (STRLEN) -1 || retlen == 0) { \
400 *cur = '\0'; \
401 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
402 } \
403 str += retlen; \
404} STMT_END
405
f7fe979e
AL
406static const char *_action( const tempsym_t* symptr )
407{
10edeb5d 408 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
409}
410
206947d2 411/* Returns the sizeof() struct described by pat */
028d1f6d 412STATIC I32
f337b084 413S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 414{
f337b084 415 I32 total = 0;
206947d2 416
7918f24d
NC
417 PERL_ARGS_ASSERT_MEASURE_STRUCT;
418
49704364 419 while (next_symbol(symptr)) {
f337b084 420 I32 len;
f7fe979e 421 int size;
f337b084
TH
422
423 switch (symptr->howlen) {
fc241834 424 case e_star:
49704364 425 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 426 _action( symptr ) );
81d52ecd 427
f337b084
TH
428 default:
429 /* e_no_len and e_number */
430 len = symptr->length;
431 break;
49704364
LW
432 }
433
a7a3cfaa 434 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 435 if (!size) {
f7fe979e 436 int star;
80a13697
NC
437 /* endianness doesn't influence the size of a type */
438 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
439 default:
440 Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 442 _action( symptr ) );
28be1210
TH
443 case '.' | TYPE_IS_SHRIEKING:
444 case '@' | TYPE_IS_SHRIEKING:
80a13697 445 case '@':
28be1210 446 case '.':
80a13697
NC
447 case '/':
448 case 'U': /* XXXX Is it correct? */
449 case 'w':
450 case 'u':
451 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 452 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 453 _action( symptr ) );
80a13697
NC
454 case '%':
455 size = 0;
456 break;
457 case '(':
fc241834
RGS
458 {
459 tempsym_t savsym = *symptr;
460 symptr->patptr = savsym.grpbeg;
461 symptr->patend = savsym.grpend;
462 /* XXXX Theoretically, we need to measure many times at
463 different positions, since the subexpression may contain
464 alignment commands, but be not of aligned length.
465 Need to detect this and croak(). */
466 size = measure_struct(symptr);
467 *symptr = savsym;
468 break;
469 }
80a13697
NC
470 case 'X' | TYPE_IS_SHRIEKING:
471 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
472 */
473 if (!len) /* Avoid division by 0 */
474 len = 1;
475 len = total % len; /* Assumed: the start is aligned. */
924ba076 476 /* FALLTHROUGH */
80a13697
NC
477 case 'X':
478 size = -1;
479 if (total < len)
f7fe979e 480 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
481 break;
482 case 'x' | TYPE_IS_SHRIEKING:
483 if (!len) /* Avoid division by 0 */
484 len = 1;
485 star = total % len; /* Assumed: the start is aligned. */
486 if (star) /* Other portable ways? */
487 len = len - star;
488 else
489 len = 0;
924ba076 490 /* FALLTHROUGH */
80a13697
NC
491 case 'x':
492 case 'A':
493 case 'Z':
494 case 'a':
80a13697
NC
495 size = 1;
496 break;
497 case 'B':
498 case 'b':
499 len = (len + 7)/8;
500 size = 1;
501 break;
502 case 'H':
503 case 'h':
504 len = (len + 1)/2;
505 size = 1;
506 break;
78d46eaa 507
80a13697
NC
508 case 'P':
509 len = 1;
510 size = sizeof(char*);
78d46eaa
NC
511 break;
512 }
206947d2
IZ
513 }
514 total += len * size;
515 }
516 return total;
517}
518
49704364
LW
519
520/* locate matching closing parenthesis or bracket
521 * returns char pointer to char after match, or NULL
522 */
f7fe979e 523STATIC const char *
5aaab254 524S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 525{
7918f24d
NC
526 PERL_ARGS_ASSERT_GROUP_END;
527
49704364 528 while (patptr < patend) {
f7fe979e 529 const char c = *patptr++;
49704364
LW
530
531 if (isSPACE(c))
532 continue;
533 else if (c == ender)
534 return patptr-1;
535 else if (c == '#') {
536 while (patptr < patend && *patptr != '\n')
537 patptr++;
538 continue;
539 } else if (c == '(')
540 patptr = group_end(patptr, patend, ')') + 1;
541 else if (c == '[')
542 patptr = group_end(patptr, patend, ']') + 1;
18529408 543 }
49704364
LW
544 Perl_croak(aTHX_ "No group ending character '%c' found in template",
545 ender);
a25b5927 546 NOT_REACHED; /* NOTREACHED */
18529408
IZ
547}
548
49704364
LW
549
550/* Convert unsigned decimal number to binary.
551 * Expects a pointer to the first digit and address of length variable
552 * Advances char pointer to 1st non-digit char and returns number
fc241834 553 */
f7fe979e 554STATIC const char *
5aaab254 555S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
556{
557 I32 len = *patptr++ - '0';
7918f24d
NC
558
559 PERL_ARGS_ASSERT_GET_NUM;
560
49704364
LW
561 while (isDIGIT(*patptr)) {
562 if (len >= 0x7FFFFFFF/10)
563 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564 len = (len * 10) + (*patptr++ - '0');
565 }
566 *lenptr = len;
567 return patptr;
568}
569
570/* The marvellous template parsing routine: Using state stored in *symptr,
571 * locates next template code and count
572 */
573STATIC bool
f337b084 574S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 575{
f7fe979e 576 const char* patptr = symptr->patptr;
0bcc34c2 577 const char* const patend = symptr->patend;
49704364 578
7918f24d
NC
579 PERL_ARGS_ASSERT_NEXT_SYMBOL;
580
49704364
LW
581 symptr->flags &= ~FLAG_SLASH;
582
583 while (patptr < patend) {
584 if (isSPACE(*patptr))
585 patptr++;
586 else if (*patptr == '#') {
587 patptr++;
588 while (patptr < patend && *patptr != '\n')
589 patptr++;
590 if (patptr < patend)
591 patptr++;
592 } else {
fc241834 593 /* We should have found a template code */
49704364 594 I32 code = *patptr++ & 0xFF;
66c611c5 595 U32 inherited_modifiers = 0;
49704364
LW
596
597 if (code == ','){ /* grandfather in commas but with a warning */
598 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599 symptr->flags |= FLAG_COMMA;
600 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 601 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
602 }
603 continue;
604 }
fc241834 605
49704364 606 /* for '(', skip to ')' */
fc241834 607 if (code == '(') {
49704364
LW
608 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 610 _action( symptr ) );
49704364
LW
611 symptr->grpbeg = patptr;
612 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 615 _action( symptr ) );
49704364
LW
616 }
617
66c611c5
MHM
618 /* look for group modifiers to inherit */
619 if (TYPE_ENDIANNESS(symptr->flags)) {
620 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
622 }
623
1109a392
MHM
624 /* look for modifiers */
625 while (patptr < patend) {
b7787f18
AL
626 const char *allowed;
627 I32 modifier;
1109a392
MHM
628 switch (*patptr) {
629 case '!':
630 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 631 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
632 break;
633 case '>':
634 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 635 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
636 break;
637 case '<':
638 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 639 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
640 break;
641 default:
b7787f18
AL
642 allowed = "";
643 modifier = 0;
1109a392
MHM
644 break;
645 }
66c611c5 646
1109a392
MHM
647 if (modifier == 0)
648 break;
66c611c5 649
1109a392
MHM
650 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 652 allowed, _action( symptr ) );
66c611c5
MHM
653
654 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 655 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 656 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
657 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658 TYPE_ENDIANNESS_MASK)
659 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 660 *patptr, _action( symptr ) );
66c611c5 661
a2a5de95
NC
662 if ((code & modifier)) {
663 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Duplicate modifier '%c' after '%c' in %s",
665 *patptr, (int) TYPE_NO_MODIFIERS(code),
666 _action( symptr ) );
1109a392 667 }
66c611c5 668
1109a392
MHM
669 code |= modifier;
670 patptr++;
49704364
LW
671 }
672
66c611c5
MHM
673 /* inherit modifiers */
674 code |= inherited_modifiers;
675
fc241834 676 /* look for count and/or / */
49704364
LW
677 if (patptr < patend) {
678 if (isDIGIT(*patptr)) {
679 patptr = get_num( patptr, &symptr->length );
680 symptr->howlen = e_number;
681
682 } else if (*patptr == '*') {
683 patptr++;
684 symptr->howlen = e_star;
685
686 } else if (*patptr == '[') {
f7fe979e 687 const char* lenptr = ++patptr;
49704364
LW
688 symptr->howlen = e_number;
689 patptr = group_end( patptr, patend, ']' ) + 1;
690 /* what kind of [] is it? */
691 if (isDIGIT(*lenptr)) {
692 lenptr = get_num( lenptr, &symptr->length );
693 if( *lenptr != ']' )
694 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 695 _action( symptr ) );
49704364
LW
696 } else {
697 tempsym_t savsym = *symptr;
698 symptr->patend = patptr-1;
699 symptr->patptr = lenptr;
700 savsym.length = measure_struct(symptr);
701 *symptr = savsym;
702 }
703 } else {
704 symptr->howlen = e_no_len;
705 symptr->length = 1;
706 }
707
708 /* try to find / */
709 while (patptr < patend) {
710 if (isSPACE(*patptr))
711 patptr++;
712 else if (*patptr == '#') {
713 patptr++;
714 while (patptr < patend && *patptr != '\n')
715 patptr++;
716 if (patptr < patend)
717 patptr++;
718 } else {
66c611c5 719 if (*patptr == '/') {
49704364
LW
720 symptr->flags |= FLAG_SLASH;
721 patptr++;
66c611c5
MHM
722 if (patptr < patend &&
723 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 724 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 725 _action( symptr ) );
49704364
LW
726 }
727 break;
728 }
18529408 729 }
49704364
LW
730 } else {
731 /* at end - no count, no / */
732 symptr->howlen = e_no_len;
733 symptr->length = 1;
734 }
735
736 symptr->code = code;
fc241834 737 symptr->patptr = patptr;
49704364 738 return TRUE;
18529408 739 }
49704364 740 }
fc241834 741 symptr->patptr = patptr;
49704364 742 return FALSE;
18529408
IZ
743}
744
18529408 745/*
fc241834 746 There is no way to cleanly handle the case where we should process the
08ca2aa3 747 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
748 (e.g. estimates like strend-s as an upper bound for the number of
749 characters left wouldn't work). So if we foresee the need of this
750 (pattern starts with U or contains U0), we want to work on the encoded
751 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
752 themselves if they need to do a lot of unpacks like this on it
753*/
fc241834 754STATIC bool
08ca2aa3
TH
755need_utf8(const char *pat, const char *patend)
756{
757 bool first = TRUE;
7918f24d
NC
758
759 PERL_ARGS_ASSERT_NEED_UTF8;
760
08ca2aa3
TH
761 while (pat < patend) {
762 if (pat[0] == '#') {
763 pat++;
f7fe979e 764 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
765 if (!pat) return FALSE;
766 } else if (pat[0] == 'U') {
767 if (first || pat[1] == '0') return TRUE;
768 } else first = FALSE;
769 pat++;
770 }
771 return FALSE;
772}
773
774STATIC char
775first_symbol(const char *pat, const char *patend) {
7918f24d
NC
776 PERL_ARGS_ASSERT_FIRST_SYMBOL;
777
08ca2aa3
TH
778 while (pat < patend) {
779 if (pat[0] != '#') return pat[0];
780 pat++;
f7fe979e 781 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
782 if (!pat) return 0;
783 pat++;
784 }
785 return 0;
786}
787
788/*
dcccc8ff
KW
789
790=head1 Pack and Unpack
791
7accc089
JH
792=for apidoc unpackstring
793
21ebfc7a
DM
794The engine implementing the unpack() Perl function.
795
796Using the template pat..patend, this function unpacks the string
797s..strend into a number of mortal SVs, which it pushes onto the perl
798argument (@_) stack (so you will need to issue a C<PUTBACK> before and
72d33970 799C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
800pushed elements.
801
802The strend and patend pointers should point to the byte following the last
803character of each string.
804
805Although this function returns its values on the perl argument stack, it
806doesn't take any parameters from that stack (and thus in particular
807there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
808example).
7accc089
JH
809
810=cut */
811
812I32
f7fe979e 813Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 814{
f7fe979e 815 tempsym_t sym;
08ca2aa3 816
7918f24d
NC
817 PERL_ARGS_ASSERT_UNPACKSTRING;
818
f337b084 819 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
820 else if (need_utf8(pat, patend)) {
821 /* We probably should try to avoid this in case a scalar context call
822 wouldn't get to the "U0" */
823 STRLEN len = strend - s;
230e1fce 824 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
825 SAVEFREEPV(s);
826 strend = s + len;
f337b084 827 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
828 }
829
f337b084
TH
830 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 flags |= FLAG_PARSE_UTF8;
08ca2aa3 832
f7fe979e 833 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
834
835 return unpack_rec(&sym, s, s, strend, NULL );
836}
837
4136a0f7 838STATIC I32
f7fe979e 839S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 840{
20b7effb 841 dSP;
3297d27d 842 SV *sv = NULL;
f7fe979e 843 const I32 start_sp_offset = SP - PL_stack_base;
49704364 844 howlen_t howlen;
a6ec74c1 845 I32 checksum = 0;
92d41999 846 UV cuv = 0;
a6ec74c1 847 NV cdouble = 0.0;
f337b084 848 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 849 bool beyond = FALSE;
21c16052 850 bool explicit_length;
9e27e96a 851 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 852 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
853
854 PERL_ARGS_ASSERT_UNPACK_REC;
855
28be1210 856 symptr->strbeg = s - strbeg;
49704364 857
49704364 858 while (next_symbol(symptr)) {
a7a3cfaa 859 packprops_t props;
9e27e96a 860 I32 len;
f337b084 861 I32 datumtype = symptr->code;
a1219b5e 862 bool needs_swap;
206947d2 863 /* do first one only unless in list context
08ca2aa3 864 / is implemented by unpacking the count, then popping it from the
206947d2 865 stack, so must check that we're not in the middle of a / */
49704364 866 if ( unpack_only_one
206947d2 867 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 868 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 869 break;
49704364 870
f337b084 871 switch (howlen = symptr->howlen) {
fc241834
RGS
872 case e_star:
873 len = strend - strbeg; /* long enough */
49704364 874 break;
f337b084
TH
875 default:
876 /* e_no_len and e_number */
877 len = symptr->length;
878 break;
49704364 879 }
18529408 880
21c16052 881 explicit_length = TRUE;
a6ec74c1 882 redo_switch:
49704364 883 beyond = s >= strend;
a7a3cfaa
TH
884
885 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
886 if (props) {
887 /* props nonzero means we can process this letter. */
9e27e96a
AL
888 const long size = props & PACK_SIZE_MASK;
889 const long howmany = (strend - s) / size;
a7a3cfaa
TH
890 if (len > howmany)
891 len = howmany;
892
893 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 if (len && unpack_only_one) len = 1;
895 EXTEND(SP, len);
896 EXTEND_MORTAL(len);
78d46eaa
NC
897 }
898 }
a7a3cfaa 899
a1219b5e
NC
900 needs_swap = NEEDS_SWAP(datumtype);
901
1109a392 902 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 903 default:
1109a392 904 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 905
a6ec74c1 906 case '%':
49704364 907 if (howlen == e_no_len)
18529408 908 len = 16; /* len is not specified */
a6ec74c1 909 checksum = len;
92d41999 910 cuv = 0;
a6ec74c1 911 cdouble = 0;
18529408 912 continue;
81d52ecd 913
18529408
IZ
914 case '(':
915 {
49704364 916 tempsym_t savsym = *symptr;
9e27e96a 917 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 918 symptr->flags |= group_modifiers;
49704364 919 symptr->patend = savsym.grpend;
28be1210 920 symptr->previous = &savsym;
49704364 921 symptr->level++;
18529408 922 PUTBACK;
c6f750d1 923 if (len && unpack_only_one) len = 1;
18529408 924 while (len--) {
49704364 925 symptr->patptr = savsym.grpbeg;
f337b084
TH
926 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
927 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
928 unpack_rec(symptr, s, strbeg, strend, &s);
929 if (s == strend && savsym.howlen == e_star)
49704364 930 break; /* No way to continue */
18529408
IZ
931 }
932 SPAGAIN;
28be1210 933 savsym.flags = symptr->flags & ~group_modifiers;
49704364 934 *symptr = savsym;
18529408
IZ
935 break;
936 }
28be1210 937 case '.' | TYPE_IS_SHRIEKING:
28be1210 938 case '.': {
9e27e96a 939 const char *from;
28be1210 940 SV *sv;
9e27e96a 941 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
942 if (howlen == e_star) from = strbeg;
943 else if (len <= 0) from = s;
944 else {
945 tempsym_t *group = symptr;
946
947 while (--len && group) group = group->previous;
948 from = group ? strbeg + group->strbeg : strbeg;
949 }
950 sv = from <= s ?
00646304
CB
951 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 953 mXPUSHs(sv);
28be1210
TH
954 break;
955 }
28be1210 956 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 957 case '@':
28be1210 958 s = strbeg + symptr->strbeg;
28be1210 959 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 960 {
08ca2aa3
TH
961 while (len > 0) {
962 if (s >= strend)
963 Perl_croak(aTHX_ "'@' outside of string in unpack");
964 s += UTF8SKIP(s);
965 len--;
966 }
967 if (s > strend)
968 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
969 } else {
28be1210 970 if (strend-s < len)
fc241834 971 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 972 s += len;
08ca2aa3 973 }
a6ec74c1 974 break;
62f95557
IZ
975 case 'X' | TYPE_IS_SHRIEKING:
976 if (!len) /* Avoid division by 0 */
977 len = 1;
08ca2aa3 978 if (utf8) {
f7fe979e 979 const char *hop, *last;
f337b084
TH
980 I32 l = len;
981 hop = last = strbeg;
982 while (hop < s) {
983 hop += UTF8SKIP(hop);
984 if (--l == 0) {
08ca2aa3 985 last = hop;
f337b084
TH
986 l = len;
987 }
fc241834 988 }
f337b084
TH
989 if (last > s)
990 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
991 s = last;
992 break;
f337b084
TH
993 }
994 len = (s - strbeg) % len;
924ba076 995 /* FALLTHROUGH */
a6ec74c1 996 case 'X':
08ca2aa3
TH
997 if (utf8) {
998 while (len > 0) {
999 if (s <= strbeg)
1000 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1001 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1002 if (s <= strbeg)
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1004 }
1005 len--;
1006 }
1007 } else {
fc241834
RGS
1008 if (len > s - strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1010 s -= len;
08ca2aa3 1011 }
a6ec74c1 1012 break;
9e27e96a
AL
1013 case 'x' | TYPE_IS_SHRIEKING: {
1014 I32 ai32;
62f95557
IZ
1015 if (!len) /* Avoid division by 0 */
1016 len = 1;
230e1fce
NC
1017 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1019 if (ai32 == 0) break;
1020 len -= ai32;
9e27e96a 1021 }
924ba076 1022 /* FALLTHROUGH */
a6ec74c1 1023 case 'x':
08ca2aa3
TH
1024 if (utf8) {
1025 while (len>0) {
1026 if (s >= strend)
1027 Perl_croak(aTHX_ "'x' outside of string in unpack");
1028 s += UTF8SKIP(s);
1029 len--;
1030 }
1031 } else {
fc241834
RGS
1032 if (len > strend - s)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1034 s += len;
f337b084 1035 }
a6ec74c1
JH
1036 break;
1037 case '/':
49704364 1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1039
a6ec74c1
JH
1040 case 'A':
1041 case 'Z':
1042 case 'a':
08ca2aa3
TH
1043 if (checksum) {
1044 /* Preliminary length estimate is assumed done in 'W' */
1045 if (len > strend - s) len = strend - s;
1046 goto W_checksum;
1047 }
1048 if (utf8) {
1049 I32 l;
f7fe979e 1050 const char *hop;
08ca2aa3
TH
1051 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 if (hop >= strend) {
1053 if (hop > strend)
1054 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1055 break;
fc241834 1056 }
a6ec74c1 1057 }
08ca2aa3
TH
1058 if (hop > strend)
1059 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1060 len = hop - s;
1061 } else if (len > strend - s)
1062 len = strend - s;
1063
1064 if (datumtype == 'Z') {
1065 /* 'Z' strips stuff after first null */
f7fe979e 1066 const char *ptr, *end;
f337b084
TH
1067 end = s + len;
1068 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1069 sv = newSVpvn(s, ptr-s);
1070 if (howlen == e_star) /* exact for 'Z*' */
1071 len = ptr-s + (ptr != strend ? 1 : 0);
1072 } else if (datumtype == 'A') {
1073 /* 'A' strips both nulls and spaces */
f7fe979e 1074 const char *ptr;
18bdf90a
TH
1075 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1076 for (ptr = s+len-1; ptr >= s; ptr--)
1077 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1078 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1079 if (ptr >= s) ptr += UTF8SKIP(ptr);
1080 else ptr++;
28be1210 1081 if (ptr > s+len)
18bdf90a
TH
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 } else {
1084 for (ptr = s+len-1; ptr >= s; ptr--)
1085 if (*ptr != 0 && !isSPACE(*ptr)) break;
1086 ptr++;
1087 }
08ca2aa3
TH
1088 sv = newSVpvn(s, ptr-s);
1089 } else sv = newSVpvn(s, len);
1090
1091 if (utf8) {
1092 SvUTF8_on(sv);
1093 /* Undo any upgrade done due to need_utf8() */
f337b084 1094 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1095 sv_utf8_downgrade(sv, 0);
a6ec74c1 1096 }
6e449a3a 1097 mXPUSHs(sv);
08ca2aa3 1098 s += len;
a6ec74c1
JH
1099 break;
1100 case 'B':
08ca2aa3
TH
1101 case 'b': {
1102 char *str;
49704364 1103 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1104 len = (strend - s) * 8;
1105 if (checksum) {
f337b084 1106 if (utf8)
08ca2aa3 1107 while (len >= 8 && s < strend) {
9df874cd 1108 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1109 len -= 8;
1110 }
f337b084 1111 else
fc241834 1112 while (len >= 8) {
08ca2aa3 1113 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1114 len -= 8;
1115 }
08ca2aa3
TH
1116 if (len && s < strend) {
1117 U8 bits;
f337b084
TH
1118 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1119 if (datumtype == 'b')
a6ec74c1 1120 while (len-- > 0) {
92d41999 1121 if (bits & 1) cuv++;
a6ec74c1
JH
1122 bits >>= 1;
1123 }
f337b084 1124 else
a6ec74c1 1125 while (len-- > 0) {
08ca2aa3 1126 if (bits & 0x80) cuv++;
a6ec74c1
JH
1127 bits <<= 1;
1128 }
fc241834 1129 }
a6ec74c1
JH
1130 break;
1131 }
08ca2aa3 1132
561b68a9 1133 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1134 SvPOK_on(sv);
1135 str = SvPVX(sv);
1136 if (datumtype == 'b') {
f337b084 1137 U8 bits = 0;
f7fe979e 1138 const I32 ai32 = len;
08ca2aa3
TH
1139 for (len = 0; len < ai32; len++) {
1140 if (len & 7) bits >>= 1;
1141 else if (utf8) {
1142 if (s >= strend) break;
9df874cd 1143 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1144 } else bits = *(U8 *) s++;
1145 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1146 }
08ca2aa3 1147 } else {
f337b084 1148 U8 bits = 0;
f7fe979e 1149 const I32 ai32 = len;
08ca2aa3
TH
1150 for (len = 0; len < ai32; len++) {
1151 if (len & 7) bits <<= 1;
1152 else if (utf8) {
1153 if (s >= strend) break;
9df874cd 1154 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1155 } else bits = *(U8 *) s++;
1156 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1157 }
1158 }
1159 *str = '\0';
aa07b2f6 1160 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1161 XPUSHs(sv);
a6ec74c1 1162 break;
08ca2aa3 1163 }
a6ec74c1 1164 case 'H':
08ca2aa3 1165 case 'h': {
3297d27d 1166 char *str = NULL;
fc241834 1167 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1168 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1169 len = (strend - s) * 2;
858fe5e1
TC
1170 if (!checksum) {
1171 sv = sv_2mortal(newSV(len ? len : 1));
1172 SvPOK_on(sv);
1173 str = SvPVX(sv);
1174 }
a6ec74c1 1175 if (datumtype == 'h') {
f337b084 1176 U8 bits = 0;
9e27e96a 1177 I32 ai32 = len;
fc241834
RGS
1178 for (len = 0; len < ai32; len++) {
1179 if (len & 1) bits >>= 4;
1180 else if (utf8) {
1181 if (s >= strend) break;
9df874cd 1182 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1183 } else bits = * (U8 *) s++;
858fe5e1
TC
1184 if (!checksum)
1185 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1186 }
08ca2aa3 1187 } else {
f337b084 1188 U8 bits = 0;
f7fe979e 1189 const I32 ai32 = len;
08ca2aa3
TH
1190 for (len = 0; len < ai32; len++) {
1191 if (len & 1) bits <<= 4;
1192 else if (utf8) {
1193 if (s >= strend) break;
9df874cd 1194 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1195 } else bits = *(U8 *) s++;
858fe5e1
TC
1196 if (!checksum)
1197 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1198 }
1199 }
858fe5e1
TC
1200 if (!checksum) {
1201 *str = '\0';
1202 SvCUR_set(sv, str - SvPVX_const(sv));
1203 XPUSHs(sv);
1204 }
a6ec74c1 1205 break;
08ca2aa3 1206 }
1651fc44
ML
1207 case 'C':
1208 if (len == 0) {
1209 if (explicit_length)
1210 /* Switch to "character" mode */
1211 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1212 break;
1213 }
924ba076 1214 /* FALLTHROUGH */
a6ec74c1 1215 case 'c':
1651fc44
ML
1216 while (len-- > 0 && s < strend) {
1217 int aint;
1218 if (utf8)
1219 {
1220 STRLEN retlen;
1221 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1222 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1223 if (retlen == (STRLEN) -1 || retlen == 0)
1224 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1225 s += retlen;
1226 }
1227 else
1228 aint = *(U8 *)(s)++;
1229 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1230 aint -= 256;
08ca2aa3 1231 if (!checksum)
6e449a3a 1232 mPUSHi(aint);
73cb7263
NC
1233 else if (checksum > bits_in_uv)
1234 cdouble += (NV)aint;
1235 else
1236 cuv += aint;
a6ec74c1
JH
1237 }
1238 break;
08ca2aa3
TH
1239 case 'W':
1240 W_checksum:
1651fc44 1241 if (utf8) {
08ca2aa3 1242 while (len-- > 0 && s < strend) {
08ca2aa3 1243 STRLEN retlen;
f7fe979e 1244 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 if (!checksum)
6e449a3a 1250 mPUSHu(val);
08ca2aa3
TH
1251 else if (checksum > bits_in_uv)
1252 cdouble += (NV) val;
d6d3e8bd 1253 else
08ca2aa3 1254 cuv += val;
fc241834 1255 }
08ca2aa3 1256 } else if (!checksum)
a6ec74c1 1257 while (len-- > 0) {
f7fe979e 1258 const U8 ch = *(U8 *) s++;
6e449a3a 1259 mPUSHu(ch);
a6ec74c1 1260 }
08ca2aa3
TH
1261 else if (checksum > bits_in_uv)
1262 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1263 else
1264 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1265 break;
1266 case 'U':
35bcd338 1267 if (len == 0) {
c5333953 1268 if (explicit_length && howlen != e_star) {
08ca2aa3 1269 /* Switch to "bytes in UTF-8" mode */
f337b084 1270 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1271 else
1272 /* Should be impossible due to the need_utf8() test */
1273 Perl_croak(aTHX_ "U0 mode on a byte string");
1274 }
35bcd338
JH
1275 break;
1276 }
08ca2aa3 1277 if (len > strend - s) len = strend - s;
fc241834 1278 if (!checksum) {
08ca2aa3
TH
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
fc241834 1282 }
08ca2aa3
TH
1283 while (len-- > 0 && s < strend) {
1284 STRLEN retlen;
1285 UV auv;
1286 if (utf8) {
1287 U8 result[UTF8_MAXLEN];
f7fe979e 1288 const char *ptr = s;
08ca2aa3 1289 STRLEN len;
08ca2aa3
TH
1290 /* Bug: warns about bad utf8 even if we are short on bytes
1291 and will break out of the loop */
9df874cd 1292 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1293 'U'))
08ca2aa3
TH
1294 break;
1295 len = UTF8SKIP(result);
9df874cd 1296 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1297 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1298 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1299 len,
1300 &retlen,
1301 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1302 s = ptr;
1303 } else {
3ece276e
KW
1304 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1305 strend - s,
1306 &retlen,
1307 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1308 if (retlen == (STRLEN) -1 || retlen == 0)
1309 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1310 s += retlen;
1311 }
1312 if (!checksum)
6e449a3a 1313 mPUSHu(auv);
73cb7263 1314 else if (checksum > bits_in_uv)
08ca2aa3 1315 cdouble += (NV) auv;
73cb7263 1316 else
08ca2aa3 1317 cuv += auv;
a6ec74c1
JH
1318 }
1319 break;
49704364
LW
1320 case 's' | TYPE_IS_SHRIEKING:
1321#if SHORTSIZE != SIZE16
73cb7263 1322 while (len-- > 0) {
08ca2aa3 1323 short ashort;
aaec8192 1324 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1325 if (!checksum)
6e449a3a 1326 mPUSHi(ashort);
73cb7263
NC
1327 else if (checksum > bits_in_uv)
1328 cdouble += (NV)ashort;
1329 else
1330 cuv += ashort;
49704364
LW
1331 }
1332 break;
1333#else
924ba076 1334 /* FALLTHROUGH */
a6ec74c1 1335#endif
49704364 1336 case 's':
73cb7263 1337 while (len-- > 0) {
08ca2aa3
TH
1338 I16 ai16;
1339
1340#if U16SIZE > SIZE16
1341 ai16 = 0;
1342#endif
aaec8192 1343 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1344#if U16SIZE > SIZE16
73cb7263
NC
1345 if (ai16 > 32767)
1346 ai16 -= 65536;
a6ec74c1 1347#endif
08ca2aa3 1348 if (!checksum)
6e449a3a 1349 mPUSHi(ai16);
73cb7263
NC
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ai16;
1352 else
1353 cuv += ai16;
a6ec74c1
JH
1354 }
1355 break;
49704364
LW
1356 case 'S' | TYPE_IS_SHRIEKING:
1357#if SHORTSIZE != SIZE16
73cb7263 1358 while (len-- > 0) {
08ca2aa3 1359 unsigned short aushort;
aaec8192
NC
1360 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1361 needs_swap);
08ca2aa3 1362 if (!checksum)
6e449a3a 1363 mPUSHu(aushort);
73cb7263
NC
1364 else if (checksum > bits_in_uv)
1365 cdouble += (NV)aushort;
1366 else
1367 cuv += aushort;
49704364
LW
1368 }
1369 break;
1370#else
924ba076 1371 /* FALLTHROUGH */
49704364 1372#endif
a6ec74c1
JH
1373 case 'v':
1374 case 'n':
1375 case 'S':
73cb7263 1376 while (len-- > 0) {
08ca2aa3
TH
1377 U16 au16;
1378#if U16SIZE > SIZE16
1379 au16 = 0;
1380#endif
aaec8192 1381 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1382 if (datumtype == 'n')
1383 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1384 if (datumtype == 'v')
1385 au16 = vtohs(au16);
08ca2aa3 1386 if (!checksum)
6e449a3a 1387 mPUSHu(au16);
73cb7263 1388 else if (checksum > bits_in_uv)
f337b084 1389 cdouble += (NV) au16;
73cb7263
NC
1390 else
1391 cuv += au16;
a6ec74c1
JH
1392 }
1393 break;
068bd2e7
MHM
1394 case 'v' | TYPE_IS_SHRIEKING:
1395 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1396 while (len-- > 0) {
08ca2aa3
TH
1397 I16 ai16;
1398# if U16SIZE > SIZE16
1399 ai16 = 0;
1400# endif
aaec8192 1401 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1402 /* There should never be any byte-swapping here. */
1403 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1404 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1405 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1406 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1407 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1408 if (!checksum)
6e449a3a 1409 mPUSHi(ai16);
73cb7263 1410 else if (checksum > bits_in_uv)
08ca2aa3 1411 cdouble += (NV) ai16;
73cb7263
NC
1412 else
1413 cuv += ai16;
068bd2e7
MHM
1414 }
1415 break;
a6ec74c1 1416 case 'i':
49704364 1417 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1418 while (len-- > 0) {
08ca2aa3 1419 int aint;
aaec8192 1420 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1421 if (!checksum)
6e449a3a 1422 mPUSHi(aint);
73cb7263
NC
1423 else if (checksum > bits_in_uv)
1424 cdouble += (NV)aint;
1425 else
1426 cuv += aint;
a6ec74c1
JH
1427 }
1428 break;
1429 case 'I':
49704364 1430 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1431 while (len-- > 0) {
08ca2aa3 1432 unsigned int auint;
aaec8192 1433 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1434 if (!checksum)
6e449a3a 1435 mPUSHu(auint);
73cb7263
NC
1436 else if (checksum > bits_in_uv)
1437 cdouble += (NV)auint;
1438 else
1439 cuv += auint;
a6ec74c1
JH
1440 }
1441 break;
92d41999 1442 case 'j':
73cb7263 1443 while (len-- > 0) {
08ca2aa3 1444 IV aiv;
aaec8192 1445 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1446 if (!checksum)
6e449a3a 1447 mPUSHi(aiv);
73cb7263
NC
1448 else if (checksum > bits_in_uv)
1449 cdouble += (NV)aiv;
1450 else
1451 cuv += aiv;
92d41999
JH
1452 }
1453 break;
1454 case 'J':
73cb7263 1455 while (len-- > 0) {
08ca2aa3 1456 UV auv;
aaec8192 1457 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1458 if (!checksum)
6e449a3a 1459 mPUSHu(auv);
73cb7263
NC
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)auv;
1462 else
1463 cuv += auv;
92d41999
JH
1464 }
1465 break;
49704364
LW
1466 case 'l' | TYPE_IS_SHRIEKING:
1467#if LONGSIZE != SIZE32
73cb7263 1468 while (len-- > 0) {
08ca2aa3 1469 long along;
aaec8192 1470 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1471 if (!checksum)
6e449a3a 1472 mPUSHi(along);
73cb7263
NC
1473 else if (checksum > bits_in_uv)
1474 cdouble += (NV)along;
1475 else
1476 cuv += along;
49704364
LW
1477 }
1478 break;
1479#else
924ba076 1480 /* FALLTHROUGH */
a6ec74c1 1481#endif
49704364 1482 case 'l':
73cb7263 1483 while (len-- > 0) {
08ca2aa3
TH
1484 I32 ai32;
1485#if U32SIZE > SIZE32
1486 ai32 = 0;
1487#endif
aaec8192 1488 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1489#if U32SIZE > SIZE32
08ca2aa3 1490 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1491#endif
08ca2aa3 1492 if (!checksum)
6e449a3a 1493 mPUSHi(ai32);
73cb7263
NC
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)ai32;
1496 else
1497 cuv += ai32;
a6ec74c1
JH
1498 }
1499 break;
49704364
LW
1500 case 'L' | TYPE_IS_SHRIEKING:
1501#if LONGSIZE != SIZE32
73cb7263 1502 while (len-- > 0) {
08ca2aa3 1503 unsigned long aulong;
aaec8192 1504 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1505 if (!checksum)
6e449a3a 1506 mPUSHu(aulong);
73cb7263
NC
1507 else if (checksum > bits_in_uv)
1508 cdouble += (NV)aulong;
1509 else
1510 cuv += aulong;
49704364
LW
1511 }
1512 break;
1513#else
924ba076 1514 /* FALLTHROUGH */
49704364 1515#endif
a6ec74c1
JH
1516 case 'V':
1517 case 'N':
1518 case 'L':
73cb7263 1519 while (len-- > 0) {
08ca2aa3
TH
1520 U32 au32;
1521#if U32SIZE > SIZE32
1522 au32 = 0;
1523#endif
aaec8192 1524 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1525 if (datumtype == 'N')
1526 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1527 if (datumtype == 'V')
1528 au32 = vtohl(au32);
08ca2aa3 1529 if (!checksum)
6e449a3a 1530 mPUSHu(au32);
fc241834
RGS
1531 else if (checksum > bits_in_uv)
1532 cdouble += (NV)au32;
1533 else
1534 cuv += au32;
a6ec74c1
JH
1535 }
1536 break;
068bd2e7
MHM
1537 case 'V' | TYPE_IS_SHRIEKING:
1538 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1539 while (len-- > 0) {
08ca2aa3 1540 I32 ai32;
f8e5a5db 1541#if U32SIZE > SIZE32
08ca2aa3 1542 ai32 = 0;
f8e5a5db 1543#endif
aaec8192 1544 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1545 /* There should never be any byte swapping here. */
1546 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1547 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1548 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1549 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1550 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1551 if (!checksum)
6e449a3a 1552 mPUSHi(ai32);
73cb7263
NC
1553 else if (checksum > bits_in_uv)
1554 cdouble += (NV)ai32;
1555 else
1556 cuv += ai32;
068bd2e7
MHM
1557 }
1558 break;
a6ec74c1 1559 case 'p':
a6ec74c1 1560 while (len-- > 0) {
f7fe979e 1561 const char *aptr;
aaec8192 1562 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1563 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1564 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1565 }
1566 break;
1567 case 'w':
a6ec74c1
JH
1568 {
1569 UV auv = 0;
1570 U32 bytes = 0;
fc241834 1571
08ca2aa3
TH
1572 while (len > 0 && s < strend) {
1573 U8 ch;
f337b084 1574 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1575 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1576 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1577 if (ch < 0x80) {
a6ec74c1 1578 bytes = 0;
6e449a3a 1579 mPUSHu(auv);
a6ec74c1
JH
1580 len--;
1581 auv = 0;
08ca2aa3 1582 continue;
a6ec74c1 1583 }
08ca2aa3 1584 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1585 const char *t;
a6ec74c1 1586
f5992bc4 1587 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1588 while (s < strend) {
f337b084 1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1590 sv = mul128(sv, (U8)(ch & 0x7f));
1591 if (!(ch & 0x80)) {
a6ec74c1
JH
1592 bytes = 0;
1593 break;
1594 }
1595 }
10516c54 1596 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1597 while (*t == '0')
1598 t++;
1599 sv_chop(sv, t);
6e449a3a 1600 mPUSHs(sv);
a6ec74c1
JH
1601 len--;
1602 auv = 0;
1603 }
1604 }
1605 if ((s >= strend) && bytes)
49704364 1606 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1607 }
1608 break;
1609 case 'P':
49704364
LW
1610 if (symptr->howlen == e_star)
1611 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1612 EXTEND(SP, 1);
2d3e0934 1613 if (s + sizeof(char*) <= strend) {
08ca2aa3 1614 char *aptr;
aaec8192 1615 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1616 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1617 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1618 }
a6ec74c1 1619 break;
c174bf3b 1620#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1621 case 'q':
73cb7263 1622 while (len-- > 0) {
08ca2aa3 1623 Quad_t aquad;
aaec8192 1624 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1625 if (!checksum)
c174bf3b 1626 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1627 else if (checksum > bits_in_uv)
1628 cdouble += (NV)aquad;
1629 else
1630 cuv += aquad;
1631 }
a6ec74c1
JH
1632 break;
1633 case 'Q':
73cb7263 1634 while (len-- > 0) {
08ca2aa3 1635 Uquad_t auquad;
aaec8192 1636 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1637 if (!checksum)
c174bf3b 1638 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1639 else if (checksum > bits_in_uv)
1640 cdouble += (NV)auquad;
1641 else
1642 cuv += auquad;
a6ec74c1
JH
1643 }
1644 break;
1640b983 1645#endif
a6ec74c1
JH
1646 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1647 case 'f':
73cb7263 1648 while (len-- > 0) {
08ca2aa3 1649 float afloat;
aaec8192 1650 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1651 if (!checksum)
6e449a3a 1652 mPUSHn(afloat);
08ca2aa3 1653 else
73cb7263 1654 cdouble += afloat;
fc241834 1655 }
a6ec74c1
JH
1656 break;
1657 case 'd':
73cb7263 1658 while (len-- > 0) {
08ca2aa3 1659 double adouble;
aaec8192 1660 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1661 if (!checksum)
6e449a3a 1662 mPUSHn(adouble);
08ca2aa3 1663 else
73cb7263 1664 cdouble += adouble;
fc241834 1665 }
a6ec74c1 1666 break;
92d41999 1667 case 'F':
73cb7263 1668 while (len-- > 0) {
275663fa 1669 NV_bytes anv;
aaec8192
NC
1670 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1671 datumtype, needs_swap);
08ca2aa3 1672 if (!checksum)
275663fa 1673 mPUSHn(anv.nv);
08ca2aa3 1674 else
275663fa 1675 cdouble += anv.nv;
fc241834 1676 }
92d41999
JH
1677 break;
1678#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1679 case 'D':
73cb7263 1680 while (len-- > 0) {
275663fa 1681 ld_bytes aldouble;
aaec8192
NC
1682 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1683 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1684 /* The most common long double format, the x86 80-bit
1685 * extended precision, has either 2 or 6 unused bytes,
1686 * which may contain garbage, which may contain
1687 * unintentional data. While we do zero the bytes of
1688 * the long double data in pack(), here in unpack() we
1689 * don't, because it's really hard to envision that
1690 * reading the long double off aldouble would be
e075ae47 1691 * affected by the unused bytes.
42262fd3
JH
1692 *
1693 * Note that trying to unpack 'long doubles' of 'long
1694 * doubles' packed in another system is in the general
1695 * case doomed without having more detail. */
08ca2aa3 1696 if (!checksum)
275663fa 1697 mPUSHn(aldouble.ld);
08ca2aa3 1698 else
275663fa 1699 cdouble += aldouble.ld;
92d41999
JH
1700 }
1701 break;
1702#endif
a6ec74c1 1703 case 'u':
858fe5e1 1704 if (!checksum) {
f7fe979e 1705 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1706 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1707 if (l) SvPOK_on(sv);
1708 }
08ca2aa3 1709
bee81d36
KW
1710 /* Note that all legal uuencoded strings are ASCII printables, so
1711 * have the same representation under UTF-8 vs not. This means we
1712 * can ignore UTF8ness on legal input. For illegal we stop at the
1713 * first failure, and don't report where/what that is, so again we
1714 * can ignore UTF8ness */
1715
9233b56b
KW
1716 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1717 I32 a, b, c, d;
1718 char hunk[3];
1719
1720 len = PL_uudmap[*(U8*)s++] & 077;
1721 while (len > 0) {
1722 if (s < strend && ISUUCHAR(*s))
1723 a = PL_uudmap[*(U8*)s++] & 077;
1724 else
1725 a = 0;
1726 if (s < strend && ISUUCHAR(*s))
1727 b = PL_uudmap[*(U8*)s++] & 077;
1728 else
1729 b = 0;
1730 if (s < strend && ISUUCHAR(*s))
1731 c = PL_uudmap[*(U8*)s++] & 077;
1732 else
1733 c = 0;
1734 if (s < strend && ISUUCHAR(*s))
1735 d = PL_uudmap[*(U8*)s++] & 077;
1736 else
1737 d = 0;
1738 hunk[0] = (char)((a << 2) | (b >> 4));
1739 hunk[1] = (char)((b << 4) | (c >> 2));
1740 hunk[2] = (char)((c << 6) | d);
1741 if (!checksum)
1742 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1743 len -= 3;
1744 }
1745 if (*s == '\n')
1746 s++;
1747 else /* possible checksum byte */
1748 if (s + 1 < strend && s[1] == '\n')
1749 s += 2;
1750 }
858fe5e1
TC
1751 if (!checksum)
1752 XPUSHs(sv);
a6ec74c1 1753 break;
99f862a1 1754 } /* End of switch */
49704364 1755
a6ec74c1 1756 if (checksum) {
1109a392 1757 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1758 (checksum > bits_in_uv &&
08ca2aa3
TH
1759 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1760 NV trouble, anv;
a6ec74c1 1761
08ca2aa3 1762 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1763 while (checksum >= 16) {
1764 checksum -= 16;
08ca2aa3 1765 anv *= 65536.0;
a6ec74c1 1766 }
a6ec74c1 1767 while (cdouble < 0.0)
08ca2aa3
TH
1768 cdouble += anv;
1769 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1770 sv = newSVnv(cdouble);
a6ec74c1
JH
1771 }
1772 else {
fa8ec7c1
NC
1773 if (checksum < bits_in_uv) {
1774 UV mask = ((UV)1 << checksum) - 1;
92d41999 1775 cuv &= mask;
a6ec74c1 1776 }
c4c5f44a 1777 sv = newSVuv(cuv);
a6ec74c1 1778 }
6e449a3a 1779 mXPUSHs(sv);
a6ec74c1
JH
1780 checksum = 0;
1781 }
fc241834 1782
49704364
LW
1783 if (symptr->flags & FLAG_SLASH){
1784 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1785 break;
49704364
LW
1786 if( next_symbol(symptr) ){
1787 if( symptr->howlen == e_number )
1788 Perl_croak(aTHX_ "Count after length/code in unpack" );
1789 if( beyond ){
1790 /* ...end of char buffer then no decent length available */
1791 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1792 } else {
1793 /* take top of stack (hope it's numeric) */
1794 len = POPi;
1795 if( len < 0 )
1796 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1797 }
1798 } else {
1799 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1800 }
1801 datumtype = symptr->code;
21c16052 1802 explicit_length = FALSE;
49704364
LW
1803 goto redo_switch;
1804 }
a6ec74c1 1805 }
49704364 1806
18529408
IZ
1807 if (new_s)
1808 *new_s = s;
1809 PUTBACK;
1810 return SP - PL_stack_base - start_sp_offset;
1811}
1812
1813PP(pp_unpack)
1814{
1815 dSP;
bab9c0ac 1816 dPOPPOPssrl;
18529408
IZ
1817 I32 gimme = GIMME_V;
1818 STRLEN llen;
1819 STRLEN rlen;
5c144d81
NC
1820 const char *pat = SvPV_const(left, llen);
1821 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1822 const char *strend = s + rlen;
1823 const char *patend = pat + llen;
08ca2aa3 1824 I32 cnt;
18529408
IZ
1825
1826 PUTBACK;
7accc089 1827 cnt = unpackstring(pat, patend, s, strend,
49704364 1828 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1829 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1830
18529408
IZ
1831 SPAGAIN;
1832 if ( !cnt && gimme == G_SCALAR )
1833 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1834 RETURN;
1835}
1836
f337b084 1837STATIC U8 *
e68aed92 1838doencodes(U8 *h, const U8 *s, I32 len)
a6ec74c1 1839{
f337b084 1840 *h++ = PL_uuemap[len];
a6ec74c1 1841 while (len > 2) {
f337b084
TH
1842 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1843 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1844 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1845 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1846 s += 3;
1847 len -= 3;
1848 }
1849 if (len > 0) {
e68aed92 1850 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1851 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1852 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1853 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1854 *h++ = PL_uuemap[0];
a6ec74c1 1855 }
f337b084
TH
1856 *h++ = '\n';
1857 return h;
a6ec74c1
JH
1858}
1859
1860STATIC SV *
f7fe979e 1861S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1862{
8b6e33c7
AL
1863 SV *result = newSVpvn(s, l);
1864 char *const result_c = SvPV_nolen(result); /* convenience */
1865 char *out = result_c;
1866 bool skip = 1;
1867 bool ignore = 0;
a6ec74c1 1868
7918f24d
NC
1869 PERL_ARGS_ASSERT_IS_AN_INT;
1870
a6ec74c1
JH
1871 while (*s) {
1872 switch (*s) {
1873 case ' ':
1874 break;
1875 case '+':
1876 if (!skip) {
1877 SvREFCNT_dec(result);
1878 return (NULL);
1879 }
1880 break;
1881 case '0':
1882 case '1':
1883 case '2':
1884 case '3':
1885 case '4':
1886 case '5':
1887 case '6':
1888 case '7':
1889 case '8':
1890 case '9':
1891 skip = 0;
1892 if (!ignore) {
1893 *(out++) = *s;
1894 }
1895 break;
1896 case '.':
1897 ignore = 1;
1898 break;
1899 default:
1900 SvREFCNT_dec(result);
1901 return (NULL);
1902 }
1903 s++;
1904 }
1905 *(out++) = '\0';
1906 SvCUR_set(result, out - result_c);
1907 return (result);
1908}
1909
1910/* pnum must be '\0' terminated */
1911STATIC int
1912S_div128(pTHX_ SV *pnum, bool *done)
1913{
8b6e33c7
AL
1914 STRLEN len;
1915 char * const s = SvPV(pnum, len);
1916 char *t = s;
1917 int m = 0;
1918
7918f24d
NC
1919 PERL_ARGS_ASSERT_DIV128;
1920
8b6e33c7
AL
1921 *done = 1;
1922 while (*t) {
1923 const int i = m * 10 + (*t - '0');
1924 const int r = (i >> 7); /* r < 10 */
1925 m = i & 0x7F;
1926 if (r) {
1927 *done = 0;
1928 }
1929 *(t++) = '0' + r;
a6ec74c1 1930 }
8b6e33c7
AL
1931 *(t++) = '\0';
1932 SvCUR_set(pnum, (STRLEN) (t - s));
1933 return (m);
a6ec74c1
JH
1934}
1935
18529408 1936/*
7accc089
JH
1937=for apidoc packlist
1938
1939The engine implementing pack() Perl function.
1940
bfce84ec
AL
1941=cut
1942*/
7accc089
JH
1943
1944void
5aaab254 1945Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1946{
aadb217d
JH
1947 tempsym_t sym;
1948
7918f24d
NC
1949 PERL_ARGS_ASSERT_PACKLIST;
1950
f7fe979e 1951 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1952
f337b084
TH
1953 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1954 Also make sure any UTF8 flag is loaded */
56eb0262 1955 SvPV_force_nolen(cat);
bfce84ec
AL
1956 if (DO_UTF8(cat))
1957 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1958
49704364
LW
1959 (void)pack_rec( cat, &sym, beglist, endlist );
1960}
1961
f337b084
TH
1962/* like sv_utf8_upgrade, but also repoint the group start markers */
1963STATIC void
1964marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1965 STRLEN len;
1966 tempsym_t *group;
f7fe979e
AL
1967 const char *from_ptr, *from_start, *from_end, **marks, **m;
1968 char *to_start, *to_ptr;
f337b084
TH
1969
1970 if (SvUTF8(sv)) return;
1971
aa07b2f6 1972 from_start = SvPVX_const(sv);
f337b084
TH
1973 from_end = from_start + SvCUR(sv);
1974 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 1975 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
1976 if (from_ptr == from_end) {
1977 /* Simple case: no character needs to be changed */
1978 SvUTF8_on(sv);
1979 return;
1980 }
1981
3473cf63 1982 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 1983 Newx(to_start, len, char);
f337b084
TH
1984 Copy(from_start, to_start, from_ptr-from_start, char);
1985 to_ptr = to_start + (from_ptr-from_start);
1986
a02a5408 1987 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
1988 for (group=sym_ptr; group; group = group->previous)
1989 marks[group->level] = from_start + group->strbeg;
1990 marks[sym_ptr->level+1] = from_end+1;
1991 for (m = marks; *m < from_ptr; m++)
1992 *m = to_start + (*m-from_start);
1993
1994 for (;from_ptr < from_end; from_ptr++) {
1995 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 1996 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
1997 }
1998 *to_ptr = 0;
1999
2000 while (*m == from_ptr) *m++ = to_ptr;
2001 if (m != marks + sym_ptr->level+1) {
2002 Safefree(marks);
2003 Safefree(to_start);
5637ef5b
NC
2004 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2005 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2006 }
2007 for (group=sym_ptr; group; group = group->previous)
2008 group->strbeg = marks[group->level] - to_start;
2009 Safefree(marks);
2010
2011 if (SvOOK(sv)) {
2012 if (SvIVX(sv)) {
b162af07 2013 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2014 from_start -= SvIVX(sv);
2015 SvIV_set(sv, 0);
2016 }
2017 SvFLAGS(sv) &= ~SVf_OOK;
2018 }
2019 if (SvLEN(sv) != 0)
2020 Safefree(from_start);
f880fe2f 2021 SvPV_set(sv, to_start);
b162af07
SP
2022 SvCUR_set(sv, to_ptr - to_start);
2023 SvLEN_set(sv, len);
f337b084
TH
2024 SvUTF8_on(sv);
2025}
2026
2027/* Exponential string grower. Makes string extension effectively O(n)
2028 needed says how many extra bytes we need (not counting the final '\0')
2029 Only grows the string if there is an actual lack of space
2030*/
2031STATIC char *
0bd48802 2032S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2033 const STRLEN cur = SvCUR(sv);
2034 const STRLEN len = SvLEN(sv);
f337b084 2035 STRLEN extend;
7918f24d
NC
2036
2037 PERL_ARGS_ASSERT_SV_EXP_GROW;
2038
f337b084
TH
2039 if (len - cur > needed) return SvPVX(sv);
2040 extend = needed > len ? needed : len;
2041 return SvGROW(sv, len+extend+1);
2042}
49704364 2043
93f6e112 2044static SV *
b197e565 2045S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2046{
2047 SvGETMAGIC(sv);
93f6e112
FC
2048 if (UNLIKELY(SvAMAGIC(sv)))
2049 sv = sv_2num(sv);
99f450cc 2050 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2051 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2052 const NV nv = SvNV_nomg(sv);
2053 if (c == 'w')
2054 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2055 else
2056 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2057 }
93f6e112 2058 return sv;
354b74ae
FC
2059}
2060
93f6e112
FC
2061#define SvIV_no_inf(sv,d) \
2062 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2063#define SvUV_no_inf(sv,d) \
2064 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2065
49704364
LW
2066STATIC
2067SV **
f337b084 2068S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2069{
49704364 2070 tempsym_t lookahead;
f337b084
TH
2071 I32 items = endlist - beglist;
2072 bool found = next_symbol(symptr);
2073 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2074 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2075 char* from;
f337b084 2076
7918f24d
NC
2077 PERL_ARGS_ASSERT_PACK_REC;
2078
f337b084
TH
2079 if (symptr->level == 0 && found && symptr->code == 'U') {
2080 marked_upgrade(aTHX_ cat, symptr);
2081 symptr->flags |= FLAG_DO_UTF8;
2082 utf8 = 0;
49704364 2083 }
f337b084 2084 symptr->strbeg = SvCUR(cat);
49704364
LW
2085
2086 while (found) {
f337b084
TH
2087 SV *fromstr;
2088 STRLEN fromlen;
2089 I32 len;
a0714e2c 2090 SV *lengthcode = NULL;
49704364 2091 I32 datumtype = symptr->code;
f337b084
TH
2092 howlen_t howlen = symptr->howlen;
2093 char *start = SvPVX(cat);
2094 char *cur = start + SvCUR(cat);
a1219b5e 2095 bool needs_swap;
49704364 2096
fc1bb3f2 2097#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2098#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2099
2100 switch (howlen) {
fc241834 2101 case e_star:
f337b084
TH
2102 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2103 0 : items;
2104 break;
2105 default:
2106 /* e_no_len and e_number */
2107 len = symptr->length;
49704364
LW
2108 break;
2109 }
2110
f337b084 2111 if (len) {
a7a3cfaa 2112 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2113
a7a3cfaa
TH
2114 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2115 /* We can process this letter. */
2116 STRLEN size = props & PACK_SIZE_MASK;
2117 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2118 }
f337b084
TH
2119 }
2120
49704364
LW
2121 /* Look ahead for next symbol. Do we have code/code? */
2122 lookahead = *symptr;
2123 found = next_symbol(&lookahead);
246f24af
TH
2124 if (symptr->flags & FLAG_SLASH) {
2125 IV count;
f337b084 2126 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2127 if (strchr("aAZ", lookahead.code)) {
2128 if (lookahead.howlen == e_number) count = lookahead.length;
2129 else {
ce399ba6 2130 if (items > 0) {
48a5da33 2131 count = sv_len_utf8(*beglist);
ce399ba6 2132 }
246f24af
TH
2133 else count = 0;
2134 if (lookahead.code == 'Z') count++;
2135 }
2136 } else {
2137 if (lookahead.howlen == e_number && lookahead.length < items)
2138 count = lookahead.length;
2139 else count = items;
2140 }
2141 lookahead.howlen = e_number;
2142 lookahead.length = count;
2143 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2144 }
49704364 2145
a1219b5e
NC
2146 needs_swap = NEEDS_SWAP(datumtype);
2147
fc241834
RGS
2148 /* Code inside the switch must take care to properly update
2149 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2150 doesn't simply leave using break */
0c7df902 2151 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2152 default:
f337b084
TH
2153 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2154 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2155 case '%':
49704364 2156 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2157
28be1210 2158 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2159 case '.':
2160 if (howlen == e_star) from = start;
2161 else if (len == 0) from = cur;
2162 else {
2163 tempsym_t *group = symptr;
2164
2165 while (--len && group) group = group->previous;
2166 from = group ? start + group->strbeg : start;
2167 }
2168 fromstr = NEXTFROM;
354b74ae 2169 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2170 goto resize;
28be1210 2171 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2172 case '@':
28be1210
TH
2173 from = start + symptr->strbeg;
2174 resize:
28be1210 2175 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2176 if (len >= 0) {
2177 while (len && from < cur) {
2178 from += UTF8SKIP(from);
2179 len--;
2180 }
2181 if (from > cur)
2182 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2183 if (len) {
2184 /* Here we know from == cur */
2185 grow:
2186 GROWING(0, cat, start, cur, len);
2187 Zero(cur, len, char);
2188 cur += len;
2189 } else if (from < cur) {
2190 len = cur - from;
2191 goto shrink;
2192 } else goto no_change;
2193 } else {
2194 cur = from;
2195 len = -len;
2196 goto utf8_shrink;
f337b084 2197 }
28be1210
TH
2198 else {
2199 len -= cur - from;
f337b084 2200 if (len > 0) goto grow;
28be1210 2201 if (len == 0) goto no_change;
fc241834 2202 len = -len;
28be1210 2203 goto shrink;
f337b084 2204 }
a6ec74c1 2205 break;
81d52ecd 2206
fc241834 2207 case '(': {
49704364 2208 tempsym_t savsym = *symptr;
66c611c5
MHM
2209 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2210 symptr->flags |= group_modifiers;
49704364
LW
2211 symptr->patend = savsym.grpend;
2212 symptr->level++;
f337b084 2213 symptr->previous = &lookahead;
18529408 2214 while (len--) {
f337b084
TH
2215 U32 was_utf8;
2216 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2217 else symptr->flags &= ~FLAG_PARSE_UTF8;
2218 was_utf8 = SvUTF8(cat);
49704364 2219 symptr->patptr = savsym.grpbeg;
f337b084
TH
2220 beglist = pack_rec(cat, symptr, beglist, endlist);
2221 if (SvUTF8(cat) != was_utf8)
2222 /* This had better be an upgrade while in utf8==0 mode */
2223 utf8 = 1;
2224
49704364 2225 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2226 break; /* No way to continue */
2227 }
ee790063 2228 items = endlist - beglist;
f337b084
TH
2229 lookahead.flags = symptr->flags & ~group_modifiers;
2230 goto no_change;
18529408 2231 }
62f95557
IZ
2232 case 'X' | TYPE_IS_SHRIEKING:
2233 if (!len) /* Avoid division by 0 */
2234 len = 1;
f337b084
TH
2235 if (utf8) {
2236 char *hop, *last;
2237 I32 l = len;
2238 hop = last = start;
2239 while (hop < cur) {
2240 hop += UTF8SKIP(hop);
2241 if (--l == 0) {
2242 last = hop;
2243 l = len;
2244 }
2245 }
2246 if (last > cur)
2247 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2248 cur = last;
2249 break;
2250 }
2251 len = (cur-start) % len;
924ba076 2252 /* FALLTHROUGH */
a6ec74c1 2253 case 'X':
f337b084
TH
2254 if (utf8) {
2255 if (len < 1) goto no_change;
28be1210 2256 utf8_shrink:
f337b084
TH
2257 while (len > 0) {
2258 if (cur <= start)
28be1210
TH
2259 Perl_croak(aTHX_ "'%c' outside of string in pack",
2260 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2261 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2262 if (cur <= start)
28be1210
TH
2263 Perl_croak(aTHX_ "'%c' outside of string in pack",
2264 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2265 }
2266 len--;
2267 }
2268 } else {
fc241834 2269 shrink:
f337b084 2270 if (cur - start < len)
28be1210
TH
2271 Perl_croak(aTHX_ "'%c' outside of string in pack",
2272 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2273 cur -= len;
2274 }
2275 if (cur < start+symptr->strbeg) {
2276 /* Make sure group starts don't point into the void */
2277 tempsym_t *group;
9e27e96a 2278 const STRLEN length = cur-start;
f337b084
TH
2279 for (group = symptr;
2280 group && length < group->strbeg;
2281 group = group->previous) group->strbeg = length;
2282 lookahead.strbeg = length;
2283 }
a6ec74c1 2284 break;
fc241834
RGS
2285 case 'x' | TYPE_IS_SHRIEKING: {
2286 I32 ai32;
62f95557
IZ
2287 if (!len) /* Avoid division by 0 */
2288 len = 1;
230e1fce 2289 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2290 else ai32 = (cur - start) % len;
2291 if (ai32 == 0) goto no_change;
2292 len -= ai32;
2293 }
924ba076 2294 /* FALLTHROUGH */
a6ec74c1 2295 case 'x':
f337b084 2296 goto grow;
a6ec74c1
JH
2297 case 'A':
2298 case 'Z':
f337b084 2299 case 'a': {
f7fe979e 2300 const char *aptr;
f337b084 2301
a6ec74c1 2302 fromstr = NEXTFROM;
e62f0680 2303 aptr = SvPV_const(fromstr, fromlen);
f337b084 2304 if (DO_UTF8(fromstr)) {
f7fe979e 2305 const char *end, *s;
f337b084
TH
2306
2307 if (!utf8 && !SvUTF8(cat)) {
2308 marked_upgrade(aTHX_ cat, symptr);
2309 lookahead.flags |= FLAG_DO_UTF8;
2310 lookahead.strbeg = symptr->strbeg;
2311 utf8 = 1;
2312 start = SvPVX(cat);
2313 cur = start + SvCUR(cat);
2314 }
fc241834 2315 if (howlen == e_star) {
f337b084
TH
2316 if (utf8) goto string_copy;
2317 len = fromlen+1;
2318 }
2319 s = aptr;
2320 end = aptr + fromlen;
2321 fromlen = datumtype == 'Z' ? len-1 : len;
2322 while ((I32) fromlen > 0 && s < end) {
2323 s += UTF8SKIP(s);
2324 fromlen--;
2325 }
2326 if (s > end)
2327 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2328 if (utf8) {
fc241834 2329 len = fromlen;
f337b084
TH
2330 if (datumtype == 'Z') len++;
2331 fromlen = s-aptr;
2332 len += fromlen;
fc241834 2333
f337b084 2334 goto string_copy;
fc241834 2335 }
f337b084
TH
2336 fromlen = len - fromlen;
2337 if (datumtype == 'Z') fromlen--;
2338 if (howlen == e_star) {
2339 len = fromlen;
2340 if (datumtype == 'Z') len++;
fc241834 2341 }
f337b084 2342 GROWING(0, cat, start, cur, len);
9df874cd 2343 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2344 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2345 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2346 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2347 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2348 cur += fromlen;
a6ec74c1 2349 len -= fromlen;
f337b084
TH
2350 } else if (utf8) {
2351 if (howlen == e_star) {
2352 len = fromlen;
2353 if (datumtype == 'Z') len++;
a6ec74c1 2354 }
f337b084
TH
2355 if (len <= (I32) fromlen) {
2356 fromlen = len;
2357 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2358 }
fc241834 2359 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2360 upgrade, so:
2361 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2362 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2363 len -= fromlen;
2364 while (fromlen > 0) {
230e1fce 2365 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2366 aptr++;
2367 fromlen--;
fc241834 2368 }
f337b084
TH
2369 } else {
2370 string_copy:
2371 if (howlen == e_star) {
2372 len = fromlen;
2373 if (datumtype == 'Z') len++;
2374 }
2375 if (len <= (I32) fromlen) {
2376 fromlen = len;
2377 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2378 }
f337b084
TH
2379 GROWING(0, cat, start, cur, len);
2380 Copy(aptr, cur, fromlen, char);
2381 cur += fromlen;
2382 len -= fromlen;
a6ec74c1 2383 }
f337b084
TH
2384 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2385 cur += len;
3c4fb04a 2386 SvTAINT(cat);
a6ec74c1 2387 break;
f337b084 2388 }
a6ec74c1 2389 case 'B':
f337b084 2390 case 'b': {
b83604b4 2391 const char *str, *end;
f337b084
TH
2392 I32 l, field_len;
2393 U8 bits;
2394 bool utf8_source;
2395 U32 utf8_flags;
a6ec74c1 2396
fc241834 2397 fromstr = NEXTFROM;
b83604b4 2398 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2399 end = str + fromlen;
2400 if (DO_UTF8(fromstr)) {
2401 utf8_source = TRUE;
041457d9 2402 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2403 } else {
2404 utf8_source = FALSE;
2405 utf8_flags = 0; /* Unused, but keep compilers happy */
2406 }
2407 if (howlen == e_star) len = fromlen;
2408 field_len = (len+7)/8;
2409 GROWING(utf8, cat, start, cur, field_len);
2410 if (len > (I32)fromlen) len = fromlen;
2411 bits = 0;
2412 l = 0;
2413 if (datumtype == 'B')
2414 while (l++ < len) {
2415 if (utf8_source) {
95b63a38 2416 UV val = 0;
f337b084
TH
2417 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2418 bits |= val & 1;
2419 } else bits |= *str++ & 1;
2420 if (l & 7) bits <<= 1;
fc241834 2421 else {
f337b084
TH
2422 PUSH_BYTE(utf8, cur, bits);
2423 bits = 0;
a6ec74c1
JH
2424 }
2425 }
f337b084
TH
2426 else
2427 /* datumtype == 'b' */
2428 while (l++ < len) {
2429 if (utf8_source) {
95b63a38 2430 UV val = 0;
f337b084
TH
2431 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2432 if (val & 1) bits |= 0x80;
2433 } else if (*str++ & 1)
2434 bits |= 0x80;
2435 if (l & 7) bits >>= 1;
fc241834 2436 else {
f337b084
TH
2437 PUSH_BYTE(utf8, cur, bits);
2438 bits = 0;
a6ec74c1
JH
2439 }
2440 }
f337b084
TH
2441 l--;
2442 if (l & 7) {
fc241834 2443 if (datumtype == 'B')
f337b084 2444 bits <<= 7 - (l & 7);
fc241834 2445 else
f337b084
TH
2446 bits >>= 7 - (l & 7);
2447 PUSH_BYTE(utf8, cur, bits);
2448 l += 7;
a6ec74c1 2449 }
f337b084
TH
2450 /* Determine how many chars are left in the requested field */
2451 l /= 8;
2452 if (howlen == e_star) field_len = 0;
2453 else field_len -= l;
2454 Zero(cur, field_len, char);
2455 cur += field_len;
a6ec74c1 2456 break;
f337b084 2457 }
a6ec74c1 2458 case 'H':
f337b084 2459 case 'h': {
10516c54 2460 const char *str, *end;
f337b084
TH
2461 I32 l, field_len;
2462 U8 bits;
2463 bool utf8_source;
2464 U32 utf8_flags;
a6ec74c1 2465
fc241834 2466 fromstr = NEXTFROM;
10516c54 2467 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2468 end = str + fromlen;
2469 if (DO_UTF8(fromstr)) {
2470 utf8_source = TRUE;
041457d9 2471 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2472 } else {
2473 utf8_source = FALSE;
2474 utf8_flags = 0; /* Unused, but keep compilers happy */
2475 }
2476 if (howlen == e_star) len = fromlen;
2477 field_len = (len+1)/2;
2478 GROWING(utf8, cat, start, cur, field_len);
2479 if (!utf8 && len > (I32)fromlen) len = fromlen;
2480 bits = 0;
2481 l = 0;
2482 if (datumtype == 'H')
2483 while (l++ < len) {
2484 if (utf8_source) {
95b63a38 2485 UV val = 0;
f337b084
TH
2486 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2487 if (val < 256 && isALPHA(val))
2488 bits |= (val + 9) & 0xf;
a6ec74c1 2489 else
f337b084
TH
2490 bits |= val & 0xf;
2491 } else if (isALPHA(*str))
2492 bits |= (*str++ + 9) & 0xf;
2493 else
2494 bits |= *str++ & 0xf;
2495 if (l & 1) bits <<= 4;
fc241834 2496 else {
f337b084
TH
2497 PUSH_BYTE(utf8, cur, bits);
2498 bits = 0;
a6ec74c1
JH
2499 }
2500 }
f337b084
TH
2501 else
2502 while (l++ < len) {
2503 if (utf8_source) {
95b63a38 2504 UV val = 0;
f337b084
TH
2505 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2506 if (val < 256 && isALPHA(val))
2507 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2508 else
f337b084
TH
2509 bits |= (val & 0xf) << 4;
2510 } else if (isALPHA(*str))
2511 bits |= ((*str++ + 9) & 0xf) << 4;
2512 else
2513 bits |= (*str++ & 0xf) << 4;
2514 if (l & 1) bits >>= 4;
fc241834 2515 else {
f337b084
TH
2516 PUSH_BYTE(utf8, cur, bits);
2517 bits = 0;
a6ec74c1 2518 }
fc241834 2519 }
f337b084
TH
2520 l--;
2521 if (l & 1) {
2522 PUSH_BYTE(utf8, cur, bits);
2523 l++;
2524 }
2525 /* Determine how many chars are left in the requested field */
2526 l /= 2;
2527 if (howlen == e_star) field_len = 0;
2528 else field_len -= l;
2529 Zero(cur, field_len, char);
2530 cur += field_len;
2531 break;
fc241834
RGS
2532 }
2533 case 'c':
f337b084
TH
2534 while (len-- > 0) {
2535 IV aiv;
2536 fromstr = NEXTFROM;
354b74ae 2537 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2538 if ((-128 > aiv || aiv > 127))
2539 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2540 "Character in 'c' format wrapped in pack");
585ec06d 2541 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2542 }
2543 break;
2544 case 'C':
f337b084
TH
2545 if (len == 0) {
2546 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2547 break;
2548 }
a6ec74c1 2549 while (len-- > 0) {
f337b084 2550 IV aiv;
a6ec74c1 2551 fromstr = NEXTFROM;
354b74ae 2552 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2553 if ((0 > aiv || aiv > 0xff))
2554 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2555 "Character in 'C' format wrapped in pack");
1651fc44 2556 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2557 }
fc241834
RGS
2558 break;
2559 case 'W': {
2560 char *end;
670f1322 2561 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2562
2563 end = start+SvLEN(cat)-1;
2564 if (utf8) end -= UTF8_MAXLEN-1;
2565 while (len-- > 0) {
2566 UV auv;
2567 fromstr = NEXTFROM;
354b74ae 2568 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2569 if (in_bytes) auv = auv % 0x100;
2570 if (utf8) {
2571 W_utf8:
2572 if (cur > end) {
2573 *cur = '\0';
b162af07 2574 SvCUR_set(cat, cur - start);
fc241834
RGS
2575
2576 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2577 end = start+SvLEN(cat)-UTF8_MAXLEN;
2578 }
c80e42f3
KW
2579 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2580 auv,
041457d9 2581 warn_utf8 ?
230e1fce 2582 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2583 } else {
2584 if (auv >= 0x100) {
2585 if (!SvUTF8(cat)) {
2586 *cur = '\0';
b162af07 2587 SvCUR_set(cat, cur - start);
fc241834
RGS
2588 marked_upgrade(aTHX_ cat, symptr);
2589 lookahead.flags |= FLAG_DO_UTF8;
2590 lookahead.strbeg = symptr->strbeg;
2591 utf8 = 1;
2592 start = SvPVX(cat);
2593 cur = start + SvCUR(cat);
2594 end = start+SvLEN(cat)-UTF8_MAXLEN;
2595 goto W_utf8;
2596 }
a2a5de95
NC
2597 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2598 "Character in 'W' format wrapped in pack");
fc241834
RGS
2599 auv &= 0xff;
2600 }
2601 if (cur >= end) {
2602 *cur = '\0';
b162af07 2603 SvCUR_set(cat, cur - start);
fc241834
RGS
2604 GROWING(0, cat, start, cur, len+1);
2605 end = start+SvLEN(cat)-1;
2606 }
fe2774ed 2607 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2608 }
2609 }
2610 break;
fc241834
RGS
2611 }
2612 case 'U': {
2613 char *end;
2614
2615 if (len == 0) {
2616 if (!(symptr->flags & FLAG_DO_UTF8)) {
2617 marked_upgrade(aTHX_ cat, symptr);
2618 lookahead.flags |= FLAG_DO_UTF8;
2619 lookahead.strbeg = symptr->strbeg;
2620 }
2621 utf8 = 0;
2622 goto no_change;
2623 }
2624
2625 end = start+SvLEN(cat);
2626 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2627 while (len-- > 0) {
fc241834 2628 UV auv;
a6ec74c1 2629 fromstr = NEXTFROM;
354b74ae 2630 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2631 if (utf8) {
230e1fce 2632 U8 buffer[UTF8_MAXLEN], *endb;
3ece276e 2633 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
041457d9 2634 warn_utf8 ?
fc241834
RGS
2635 0 : UNICODE_ALLOW_ANY);
2636 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2637 *cur = '\0';
b162af07 2638 SvCUR_set(cat, cur - start);
fc241834
RGS
2639 GROWING(0, cat, start, cur,
2640 len+(endb-buffer)*UTF8_EXPAND);
2641 end = start+SvLEN(cat);
2642 }
d21d7215 2643 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2644 } else {
2645 if (cur >= end) {
2646 *cur = '\0';
b162af07 2647 SvCUR_set(cat, cur - start);
fc241834
RGS
2648 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2649 end = start+SvLEN(cat)-UTF8_MAXLEN;
2650 }
3ece276e 2651 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
041457d9 2652 warn_utf8 ?
230e1fce 2653 0 : UNICODE_ALLOW_ANY);
fc241834 2654 }
a6ec74c1 2655 }
a6ec74c1 2656 break;
fc241834 2657 }
a6ec74c1
JH
2658 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2659 case 'f':
a6ec74c1 2660 while (len-- > 0) {
f337b084
TH
2661 float afloat;
2662 NV anv;
a6ec74c1 2663 fromstr = NEXTFROM;
f337b084 2664 anv = SvNV(fromstr);
85bba25f 2665# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2666 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2667 * on Alpha; fake it if we don't have them.
2668 */
f337b084 2669 if (anv > FLT_MAX)
fc241834 2670 afloat = FLT_MAX;
f337b084 2671 else if (anv < -FLT_MAX)
fc241834 2672 afloat = -FLT_MAX;
f337b084 2673 else afloat = (float)anv;
baf3cf9c 2674# else
d22fb6ca
DD
2675#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2676 if(Perl_isnan(anv))
2677 afloat = (float)NV_NAN;
2678 else
2679#endif
919894b7
DM
2680 /* a simple cast to float is undefined if outside
2681 * the range of values that can be represented */
2682 afloat = (float)(anv > FLT_MAX ? NV_INF :
2683 anv < -FLT_MAX ? -NV_INF : anv);
baf3cf9c 2684# endif
3a88beaa 2685 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2686 }
2687 break;
2688 case 'd':
a6ec74c1 2689 while (len-- > 0) {
f337b084
TH
2690 double adouble;
2691 NV anv;
a6ec74c1 2692 fromstr = NEXTFROM;
f337b084 2693 anv = SvNV(fromstr);
85bba25f 2694# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2695 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2696 * on Alpha; fake it if we don't have them.
2697 */
f337b084 2698 if (anv > DBL_MAX)
fc241834 2699 adouble = DBL_MAX;
f337b084 2700 else if (anv < -DBL_MAX)
fc241834 2701 adouble = -DBL_MAX;
f337b084 2702 else adouble = (double)anv;
baf3cf9c 2703# else
f337b084 2704 adouble = (double)anv;
baf3cf9c 2705# endif
3a88beaa 2706 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2707 }
2708 break;
fc241834 2709 case 'F': {
275663fa 2710 NV_bytes anv;
1109a392 2711 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2712 while (len-- > 0) {
2713 fromstr = NEXTFROM;
cd07c537
DM
2714#ifdef __GNUC__
2715 /* to work round a gcc/x86 bug; don't use SvNV */
2716 anv.nv = sv_2nv(fromstr);
070e2677
TC
2717# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2718 && LONG_DOUBLESIZE > 10
2719 /* GCC sometimes overwrites the padding in the
2720 assignment above */
2721 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2722# endif
cd07c537 2723#else
275663fa 2724 anv.nv = SvNV(fromstr);
cd07c537 2725#endif
3a88beaa 2726 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2727 }
2728 break;
fc241834 2729 }
92d41999 2730#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2731 case 'D': {
275663fa 2732 ld_bytes aldouble;
1109a392
MHM
2733 /* long doubles can have unused bits, which may be nonzero */
2734 Zero(&aldouble, 1, long double);
92d41999
JH
2735 while (len-- > 0) {
2736 fromstr = NEXTFROM;
cd07c537
DM
2737# ifdef __GNUC__
2738 /* to work round a gcc/x86 bug; don't use SvNV */
2739 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2740# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2741 /* GCC sometimes overwrites the padding in the
2742 assignment above */
2743 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2744# endif
cd07c537 2745# else
275663fa 2746 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2747# endif
3a88beaa
NC
2748 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2749 needs_swap);
92d41999
JH
2750 }
2751 break;
fc241834 2752 }
92d41999 2753#endif
068bd2e7 2754 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2755 case 'n':
2756 while (len-- > 0) {
f337b084 2757 I16 ai16;
a6ec74c1 2758 fromstr = NEXTFROM;
354b74ae 2759 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2760 ai16 = PerlSock_htons(ai16);
3a88beaa 2761 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2762 }
2763 break;
068bd2e7 2764 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2765 case 'v':
2766 while (len-- > 0) {
f337b084 2767 I16 ai16;
a6ec74c1 2768 fromstr = NEXTFROM;
354b74ae 2769 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2770 ai16 = htovs(ai16);
3a88beaa 2771 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2772 }
2773 break;
49704364 2774 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2775#if SHORTSIZE != SIZE16
fc241834 2776 while (len-- > 0) {
f337b084 2777 unsigned short aushort;
fc241834 2778 fromstr = NEXTFROM;
354b74ae 2779 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2780 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2781 }
49704364
LW
2782 break;
2783#else
924ba076 2784 /* FALLTHROUGH */
a6ec74c1 2785#endif
49704364 2786 case 'S':
fc241834 2787 while (len-- > 0) {
f337b084 2788 U16 au16;
fc241834 2789 fromstr = NEXTFROM;
354b74ae 2790 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2791 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2792 }
2793 break;
49704364 2794 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2795#if SHORTSIZE != SIZE16
fc241834 2796 while (len-- > 0) {
f337b084 2797 short ashort;
fc241834 2798 fromstr = NEXTFROM;
354b74ae 2799 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2800 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2801 }
49704364
LW
2802 break;
2803#else
924ba076 2804 /* FALLTHROUGH */
a6ec74c1 2805#endif
49704364
LW
2806 case 's':
2807 while (len-- > 0) {
f337b084 2808 I16 ai16;
49704364 2809 fromstr = NEXTFROM;
354b74ae 2810 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2811 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2812 }
2813 break;
2814 case 'I':
49704364 2815 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2816 while (len-- > 0) {
f337b084 2817 unsigned int auint;
a6ec74c1 2818 fromstr = NEXTFROM;
354b74ae 2819 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2820 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2821 }
2822 break;
92d41999
JH
2823 case 'j':
2824 while (len-- > 0) {
f337b084 2825 IV aiv;
92d41999 2826 fromstr = NEXTFROM;
354b74ae 2827 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2828 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2829 }
2830 break;
2831 case 'J':
2832 while (len-- > 0) {
f337b084 2833 UV auv;
92d41999 2834 fromstr = NEXTFROM;
354b74ae 2835 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2836 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2837 }
2838 break;
a6ec74c1
JH
2839 case 'w':
2840 while (len-- > 0) {
f337b084 2841 NV anv;
a6ec74c1 2842 fromstr = NEXTFROM;
83388ae2 2843 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2844 anv = SvNV_nomg(fromstr);
a6ec74c1 2845
f337b084
TH
2846 if (anv < 0) {
2847 *cur = '\0';
b162af07 2848 SvCUR_set(cat, cur - start);
49704364 2849 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2850 }
a6ec74c1 2851
196b62db
NC
2852 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2853 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2854 any negative IVs will have already been got by the croak()
2855 above. IOK is untrue for fractions, so we test them
2856 against UV_MAX_P1. */
f337b084
TH
2857 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2858 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2859 char *in = buf + sizeof(buf);
1e9a122e 2860 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2861
2862 do {
eb160463 2863 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2864 auv >>= 7;
2865 } while (auv);
2866 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2867 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2868 in, (buf + sizeof(buf)) - in);
2869 } else if (SvPOKp(fromstr))
2870 goto w_string;
a6ec74c1 2871 else if (SvNOKp(fromstr)) {
0258719b 2872 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2873 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2874 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2875 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2876 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2877 Some C compilers are strict about integral constant
2878 expressions so we conservatively divide by a slightly
2879 smaller integer instead of multiplying by the exact
2880 floating-point value.
0258719b
NC
2881 */
2882#ifdef NV_MAX_10_EXP
f337b084 2883 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2884 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2885#else
f337b084 2886 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2887 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2888#endif
a6ec74c1
JH
2889 char *in = buf + sizeof(buf);
2890
8b6e33c7 2891 anv = Perl_floor(anv);
a6ec74c1 2892 do {
8b6e33c7 2893 const NV next = Perl_floor(anv / 128);
a6ec74c1 2894 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2895 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2896 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2897 anv = next;
2898 } while (anv > 0);
a6ec74c1 2899 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2900 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2901 in, (buf + sizeof(buf)) - in);
2902 } else {
8b6e33c7
AL
2903 const char *from;
2904 char *result, *in;
735b914b
JH
2905 SV *norm;
2906 STRLEN len;
2907 bool done;
2908
f337b084 2909 w_string:
735b914b 2910 /* Copy string and check for compliance */
1e9a122e 2911 from = SvPV_nomg_const(fromstr, len);
735b914b 2912 if ((norm = is_an_int(from, len)) == NULL)
49704364 2913 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2914
a02a5408 2915 Newx(result, len, char);
735b914b
JH
2916 in = result + len;
2917 done = FALSE;
f337b084 2918 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2919 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2920 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2921 in, (result + len) - in);
735b914b
JH
2922 Safefree(result);
2923 SvREFCNT_dec(norm); /* free norm */
fc241834 2924 }
a6ec74c1
JH
2925 }
2926 break;
2927 case 'i':
49704364 2928 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2929 while (len-- > 0) {
f337b084 2930 int aint;
a6ec74c1 2931 fromstr = NEXTFROM;
354b74ae 2932 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2933 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2934 }
2935 break;
068bd2e7 2936 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2937 case 'N':
2938 while (len-- > 0) {
f337b084 2939 U32 au32;
a6ec74c1 2940 fromstr = NEXTFROM;
354b74ae 2941 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2942 au32 = PerlSock_htonl(au32);
3a88beaa 2943 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2944 }
2945 break;
068bd2e7 2946 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2947 case 'V':
2948 while (len-- > 0) {
f337b084 2949 U32 au32;
a6ec74c1 2950 fromstr = NEXTFROM;
354b74ae 2951 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2952 au32 = htovl(au32);
3a88beaa 2953 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2954 }
2955 break;
49704364 2956 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2957#if LONGSIZE != SIZE32
fc241834 2958 while (len-- > 0) {
f337b084 2959 unsigned long aulong;
fc241834 2960 fromstr = NEXTFROM;
354b74ae 2961 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2962 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2963 }
49704364
LW
2964 break;
2965#else
2966 /* Fall though! */
a6ec74c1 2967#endif
49704364 2968 case 'L':
fc241834 2969 while (len-- > 0) {
f337b084 2970 U32 au32;
fc241834 2971 fromstr = NEXTFROM;
354b74ae 2972 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2973 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2974 }
2975 break;
49704364 2976 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2977#if LONGSIZE != SIZE32
fc241834 2978 while (len-- > 0) {
f337b084 2979 long along;
fc241834 2980 fromstr = NEXTFROM;
354b74ae 2981 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2982 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2983 }
49704364
LW
2984 break;
2985#else
2986 /* Fall though! */
a6ec74c1 2987#endif
49704364
LW
2988 case 'l':
2989 while (len-- > 0) {
f337b084 2990 I32 ai32;
49704364 2991 fromstr = NEXTFROM;
354b74ae 2992 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2993 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
2994 }
2995 break;
c174bf3b 2996#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
2997 case 'Q':
2998 while (len-- > 0) {
f337b084 2999 Uquad_t auquad;
a6ec74c1 3000 fromstr = NEXTFROM;
354b74ae 3001 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3002 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3003 }
3004 break;
3005 case 'q':
3006 while (len-- > 0) {
f337b084 3007 Quad_t aquad;
a6ec74c1 3008 fromstr = NEXTFROM;
354b74ae 3009 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3010 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3011 }
3012 break;
1640b983 3013#endif
a6ec74c1
JH
3014 case 'P':
3015 len = 1; /* assume SV is correct length */
f337b084 3016 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3017 /* FALLTHROUGH */
a6ec74c1
JH
3018 case 'p':
3019 while (len-- > 0) {
83003860 3020 const char *aptr;
f337b084 3021
a6ec74c1 3022 fromstr = NEXTFROM;
28a4f200 3023 SvGETMAGIC(fromstr);
3024 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3025 else {
a6ec74c1
JH
3026 /* XXX better yet, could spirit away the string to
3027 * a safe spot and hang on to it until the result
3028 * of pack() (and all copies of the result) are
3029 * gone.
3030 */
041457d9 3031 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3032 !SvREADONLY(fromstr)))) {
3033 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3034 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3035 }
3036 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3037 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3038 else
2596d9fe 3039 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3040 }
3a88beaa 3041 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3042 }
3043 break;
fc241834 3044 case 'u': {
f7fe979e 3045 const char *aptr, *aend;
fc241834 3046 bool from_utf8;
f337b084 3047
a6ec74c1 3048 fromstr = NEXTFROM;
fc241834
RGS
3049 if (len <= 2) len = 45;
3050 else len = len / 3 * 3;
3051 if (len >= 64) {
a2a5de95
NC
3052 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3053 "Field too wide in 'u' format in pack");
fc241834
RGS
3054 len = 63;
3055 }
83003860 3056 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3057 from_utf8 = DO_UTF8(fromstr);
3058 if (from_utf8) {
3059 aend = aptr + fromlen;
3f63b0e5 3060 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3061 } else aend = NULL; /* Unused, but keep compilers happy */
3062 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3063 while (fromlen > 0) {
fc241834 3064 U8 *end;
a6ec74c1 3065 I32 todo;
fc241834 3066 U8 hunk[1+63/3*4+1];
a6ec74c1 3067
eb160463 3068 if ((I32)fromlen > len)
a6ec74c1
JH
3069 todo = len;
3070 else
3071 todo = fromlen;
fc241834
RGS
3072 if (from_utf8) {
3073 char buffer[64];
9df874cd 3074 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3075 'u' | TYPE_IS_PACK)) {
3076 *cur = '\0';
b162af07 3077 SvCUR_set(cat, cur - start);
5637ef5b
NC
3078 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3079 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3080 aptr, aend, buffer, (long) todo);
fc241834 3081 }
e68aed92 3082 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3083 } else {
e68aed92 3084 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3085 aptr += todo;
3086 }
3a88beaa 3087 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3088 fromlen -= todo;
3089 }
a6ec74c1
JH
3090 break;
3091 }
f337b084
TH
3092 }
3093 *cur = '\0';
b162af07 3094 SvCUR_set(cat, cur - start);
f337b084 3095 no_change:
49704364 3096 *symptr = lookahead;
a6ec74c1 3097 }
49704364 3098 return beglist;
18529408
IZ
3099}
3100#undef NEXTFROM
3101
3102
3103PP(pp_pack)
3104{
20b7effb 3105 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3106 SV *cat = TARG;
18529408 3107 STRLEN fromlen;
349d4f2f 3108 SV *pat_sv = *++MARK;
eb578fdb
KW
3109 const char *pat = SvPV_const(pat_sv, fromlen);
3110 const char *patend = pat + fromlen;
18529408
IZ
3111
3112 MARK++;
76f68e9b 3113 sv_setpvs(cat, "");
f337b084 3114 SvUTF8_off(cat);
18529408 3115
7accc089 3116 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3117
a6ec74c1
JH
3118 SvSETMAGIC(cat);
3119 SP = ORIGMARK;
3120 PUSHs(cat);
3121 RETURN;
3122}
a6ec74c1 3123
73cb7263 3124/*
14d04a33 3125 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3126 */