This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
sv.c: Refactor slightly to avoid a goto
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
9df874cd 154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
e6006968
KW
219#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 221
66c611c5 222/* type modifiers */
62f95557 223#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
224#define TYPE_IS_BIG_ENDIAN 0x200
225#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 226#define TYPE_IS_PACK 0x800
1109a392 227#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 228#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
229#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230
7212898e
NC
231# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233
234# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235
78d46eaa 236#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 237#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
238#define PACK_SIZE_MASK 0x3F
239
4ad176ef 240#include "packsizetables.inc"
78d46eaa 241
20aa3a38
NC
242static void
243S_reverse_copy(const char *src, char *dest, STRLEN len)
244{
245 dest += len;
246 while (len--)
247 *--dest = *src++;
248}
249
08ca2aa3 250STATIC U8
9df874cd 251utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 252{
08ca2aa3 253 STRLEN retlen;
df337714
KW
254 UV val;
255
256 if (*s >= end) {
257 goto croak;
258 }
259 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 260 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
261 if (retlen == (STRLEN) -1)
262 croak:
f337b084
TH
263 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 265 if (val >= 0x100) {
a2a5de95
NC
266 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
267 "Character in '%c' format wrapped in unpack",
268 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
269 val &= 0xff;
270 }
271 *s += retlen;
fe2774ed 272 return (U8)val;
08ca2aa3
TH
273}
274
f337b084 275#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 276 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
277 *(U8 *)(s)++)
278
279STATIC bool
9df874cd 280S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
281{
282 UV val;
283 STRLEN retlen;
f7fe979e 284 const char *from = *s;
08ca2aa3 285 int bad = 0;
f7fe979e 286 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 287 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
288 const bool needs_swap = NEEDS_SWAP(datumtype);
289
228e69a7 290 if (UNLIKELY(needs_swap))
20aa3a38
NC
291 buf += buf_len;
292
08ca2aa3
TH
293 for (;buf_len > 0; buf_len--) {
294 if (from >= end) return FALSE;
f337b084 295 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 296 if (retlen == (STRLEN) -1) {
08ca2aa3
TH
297 from += UTF8SKIP(from);
298 bad |= 1;
299 } else from += retlen;
300 if (val >= 0x100) {
301 bad |= 2;
302 val &= 0xff;
303 }
228e69a7 304 if (UNLIKELY(needs_swap))
20aa3a38
NC
305 *(U8 *)--buf = (U8)val;
306 else
307 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
308 }
309 /* We have enough characters for the buffer. Did we have problems ? */
310 if (bad) {
311 if (bad & 1) {
312 /* Rewalk the string fragment while warning */
f7fe979e 313 const char *ptr;
9e27e96a 314 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
315 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
316 if (ptr >= end) break;
c80e42f3 317 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
318 }
319 if (from > end) from = end;
320 }
a2a5de95
NC
321 if ((bad & 2))
322 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 323 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
324 "Character(s) in '%c' format wrapped in %s",
325 (int) TYPE_NO_MODIFIERS(datumtype),
326 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
327 }
328 *s = from;
329 return TRUE;
330}
331
64844641 332STATIC char *
d21d7215
KW
333S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
334 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 335
228e69a7 336 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
337 const U8 *p = start + len;
338 while (p-- > start) {
55d09dc8 339 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
340 }
341 } else {
342 const U8 * const end = start + len;
343 while (start < end) {
55d09dc8 344 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
345 start++;
346 }
f337b084 347 }
64844641 348 return dest;
f337b084
TH
349}
350
3a88beaa 351#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 352STMT_START { \
228e69a7 353 if (UNLIKELY(utf8)) \
d21d7215 354 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 355 else { \
228e69a7 356 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
357 S_reverse_copy((char *)(buf), cur, len); \
358 else \
359 Copy(buf, cur, len, char); \
230e1fce
NC
360 (cur) += (len); \
361 } \
f337b084
TH
362} STMT_END
363
364#define GROWING(utf8, cat, start, cur, in_len) \
365STMT_START { \
366 STRLEN glen = (in_len); \
3473cf63 367 if (utf8) glen *= UTF8_EXPAND; \
f337b084 368 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 369 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
370 (cur) = (start) + SvCUR(cat); \
371 } \
372} STMT_END
373
374#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
375STMT_START { \
f7fe979e 376 const STRLEN glen = (in_len); \
f337b084 377 STRLEN gl = glen; \
3473cf63 378 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
379 if ((cur) + gl >= (start) + SvLEN(cat)) { \
380 *cur = '\0'; \
b162af07 381 SvCUR_set((cat), (cur) - (start)); \
0bd48802 382 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
383 (cur) = (start) + SvCUR(cat); \
384 } \
3a88beaa 385 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
386} STMT_END
387
388#define PUSH_BYTE(utf8, s, byte) \
389STMT_START { \
390 if (utf8) { \
f7fe979e 391 const U8 au8 = (byte); \
d21d7215 392 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
393 } else *(U8 *)(s)++ = (byte); \
394} STMT_END
395
396/* Only to be used inside a loop (see the break) */
397#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
398STMT_START { \
399 STRLEN retlen; \
400 if (str >= end) break; \
401 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 402 if (retlen == (STRLEN) -1) { \
f337b084
TH
403 *cur = '\0'; \
404 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
405 } \
406 str += retlen; \
407} STMT_END
408
f7fe979e
AL
409static const char *_action( const tempsym_t* symptr )
410{
10edeb5d 411 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
412}
413
206947d2 414/* Returns the sizeof() struct described by pat */
028d1f6d 415STATIC I32
f337b084 416S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 417{
f337b084 418 I32 total = 0;
206947d2 419
7918f24d
NC
420 PERL_ARGS_ASSERT_MEASURE_STRUCT;
421
49704364 422 while (next_symbol(symptr)) {
f337b084 423 I32 len;
f7fe979e 424 int size;
f337b084
TH
425
426 switch (symptr->howlen) {
fc241834 427 case e_star:
49704364 428 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 429 _action( symptr ) );
81d52ecd 430
f337b084
TH
431 default:
432 /* e_no_len and e_number */
433 len = symptr->length;
434 break;
49704364
WL
435 }
436
a7a3cfaa 437 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 438 if (!size) {
f7fe979e 439 int star;
80a13697
NC
440 /* endianness doesn't influence the size of a type */
441 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
442 default:
443 Perl_croak(aTHX_ "Invalid type '%c' in %s",
444 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 445 _action( symptr ) );
28be1210
TH
446 case '.' | TYPE_IS_SHRIEKING:
447 case '@' | TYPE_IS_SHRIEKING:
80a13697 448 case '@':
28be1210 449 case '.':
80a13697
NC
450 case '/':
451 case 'U': /* XXXX Is it correct? */
452 case 'w':
453 case 'u':
454 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 455 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 456 _action( symptr ) );
80a13697
NC
457 case '%':
458 size = 0;
459 break;
460 case '(':
fc241834
RGS
461 {
462 tempsym_t savsym = *symptr;
463 symptr->patptr = savsym.grpbeg;
464 symptr->patend = savsym.grpend;
465 /* XXXX Theoretically, we need to measure many times at
466 different positions, since the subexpression may contain
467 alignment commands, but be not of aligned length.
468 Need to detect this and croak(). */
469 size = measure_struct(symptr);
470 *symptr = savsym;
471 break;
472 }
80a13697
NC
473 case 'X' | TYPE_IS_SHRIEKING:
474 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
475 */
476 if (!len) /* Avoid division by 0 */
477 len = 1;
478 len = total % len; /* Assumed: the start is aligned. */
924ba076 479 /* FALLTHROUGH */
80a13697
NC
480 case 'X':
481 size = -1;
482 if (total < len)
f7fe979e 483 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
484 break;
485 case 'x' | TYPE_IS_SHRIEKING:
486 if (!len) /* Avoid division by 0 */
487 len = 1;
488 star = total % len; /* Assumed: the start is aligned. */
489 if (star) /* Other portable ways? */
490 len = len - star;
491 else
492 len = 0;
924ba076 493 /* FALLTHROUGH */
80a13697
NC
494 case 'x':
495 case 'A':
496 case 'Z':
497 case 'a':
80a13697
NC
498 size = 1;
499 break;
500 case 'B':
501 case 'b':
502 len = (len + 7)/8;
503 size = 1;
504 break;
505 case 'H':
506 case 'h':
507 len = (len + 1)/2;
508 size = 1;
509 break;
78d46eaa 510
80a13697
NC
511 case 'P':
512 len = 1;
513 size = sizeof(char*);
78d46eaa
NC
514 break;
515 }
206947d2
IZ
516 }
517 total += len * size;
518 }
519 return total;
520}
521
49704364
WL
522
523/* locate matching closing parenthesis or bracket
524 * returns char pointer to char after match, or NULL
525 */
f7fe979e 526STATIC const char *
5aaab254 527S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 528{
7918f24d
NC
529 PERL_ARGS_ASSERT_GROUP_END;
530
49704364 531 while (patptr < patend) {
f7fe979e 532 const char c = *patptr++;
49704364
WL
533
534 if (isSPACE(c))
535 continue;
536 else if (c == ender)
537 return patptr-1;
538 else if (c == '#') {
539 while (patptr < patend && *patptr != '\n')
540 patptr++;
541 continue;
542 } else if (c == '(')
543 patptr = group_end(patptr, patend, ')') + 1;
544 else if (c == '[')
545 patptr = group_end(patptr, patend, ']') + 1;
18529408 546 }
49704364
WL
547 Perl_croak(aTHX_ "No group ending character '%c' found in template",
548 ender);
a25b5927 549 NOT_REACHED; /* NOTREACHED */
18529408
IZ
550}
551
49704364
WL
552
553/* Convert unsigned decimal number to binary.
554 * Expects a pointer to the first digit and address of length variable
555 * Advances char pointer to 1st non-digit char and returns number
fc241834 556 */
f7fe979e 557STATIC const char *
5aaab254 558S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
559{
560 I32 len = *patptr++ - '0';
7918f24d
NC
561
562 PERL_ARGS_ASSERT_GET_NUM;
563
49704364
WL
564 while (isDIGIT(*patptr)) {
565 if (len >= 0x7FFFFFFF/10)
566 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
567 len = (len * 10) + (*patptr++ - '0');
568 }
569 *lenptr = len;
570 return patptr;
571}
572
573/* The marvellous template parsing routine: Using state stored in *symptr,
574 * locates next template code and count
575 */
576STATIC bool
f337b084 577S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 578{
f7fe979e 579 const char* patptr = symptr->patptr;
0bcc34c2 580 const char* const patend = symptr->patend;
49704364 581
7918f24d
NC
582 PERL_ARGS_ASSERT_NEXT_SYMBOL;
583
49704364
WL
584 symptr->flags &= ~FLAG_SLASH;
585
586 while (patptr < patend) {
587 if (isSPACE(*patptr))
588 patptr++;
589 else if (*patptr == '#') {
590 patptr++;
591 while (patptr < patend && *patptr != '\n')
592 patptr++;
593 if (patptr < patend)
594 patptr++;
595 } else {
fc241834 596 /* We should have found a template code */
49704364 597 I32 code = *patptr++ & 0xFF;
66c611c5 598 U32 inherited_modifiers = 0;
49704364
WL
599
600 if (code == ','){ /* grandfather in commas but with a warning */
601 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
602 symptr->flags |= FLAG_COMMA;
603 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 604 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
605 }
606 continue;
607 }
fc241834 608
49704364 609 /* for '(', skip to ')' */
fc241834 610 if (code == '(') {
49704364
WL
611 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
612 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 613 _action( symptr ) );
49704364
WL
614 symptr->grpbeg = patptr;
615 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
616 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
617 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 618 _action( symptr ) );
49704364
WL
619 }
620
66c611c5
MHM
621 /* look for group modifiers to inherit */
622 if (TYPE_ENDIANNESS(symptr->flags)) {
623 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
624 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
625 }
626
1109a392
MHM
627 /* look for modifiers */
628 while (patptr < patend) {
b7787f18
AL
629 const char *allowed;
630 I32 modifier;
1109a392
MHM
631 switch (*patptr) {
632 case '!':
633 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 634 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
635 break;
636 case '>':
637 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 638 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
639 break;
640 case '<':
641 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 642 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
643 break;
644 default:
b7787f18
AL
645 allowed = "";
646 modifier = 0;
1109a392
MHM
647 break;
648 }
66c611c5 649
1109a392
MHM
650 if (modifier == 0)
651 break;
66c611c5 652
1109a392
MHM
653 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
654 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 655 allowed, _action( symptr ) );
66c611c5
MHM
656
657 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 658 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 659 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
660 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
661 TYPE_ENDIANNESS_MASK)
662 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 663 *patptr, _action( symptr ) );
66c611c5 664
a2a5de95
NC
665 if ((code & modifier)) {
666 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
667 "Duplicate modifier '%c' after '%c' in %s",
668 *patptr, (int) TYPE_NO_MODIFIERS(code),
669 _action( symptr ) );
1109a392 670 }
66c611c5 671
1109a392
MHM
672 code |= modifier;
673 patptr++;
49704364
WL
674 }
675
66c611c5
MHM
676 /* inherit modifiers */
677 code |= inherited_modifiers;
678
fc241834 679 /* look for count and/or / */
49704364
WL
680 if (patptr < patend) {
681 if (isDIGIT(*patptr)) {
682 patptr = get_num( patptr, &symptr->length );
683 symptr->howlen = e_number;
684
685 } else if (*patptr == '*') {
686 patptr++;
687 symptr->howlen = e_star;
688
689 } else if (*patptr == '[') {
f7fe979e 690 const char* lenptr = ++patptr;
49704364
WL
691 symptr->howlen = e_number;
692 patptr = group_end( patptr, patend, ']' ) + 1;
693 /* what kind of [] is it? */
694 if (isDIGIT(*lenptr)) {
695 lenptr = get_num( lenptr, &symptr->length );
696 if( *lenptr != ']' )
697 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 698 _action( symptr ) );
49704364
WL
699 } else {
700 tempsym_t savsym = *symptr;
701 symptr->patend = patptr-1;
702 symptr->patptr = lenptr;
703 savsym.length = measure_struct(symptr);
704 *symptr = savsym;
705 }
706 } else {
707 symptr->howlen = e_no_len;
708 symptr->length = 1;
709 }
710
711 /* try to find / */
712 while (patptr < patend) {
713 if (isSPACE(*patptr))
714 patptr++;
715 else if (*patptr == '#') {
716 patptr++;
717 while (patptr < patend && *patptr != '\n')
718 patptr++;
719 if (patptr < patend)
720 patptr++;
721 } else {
66c611c5 722 if (*patptr == '/') {
49704364
WL
723 symptr->flags |= FLAG_SLASH;
724 patptr++;
66c611c5
MHM
725 if (patptr < patend &&
726 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 727 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 728 _action( symptr ) );
49704364
WL
729 }
730 break;
731 }
18529408 732 }
49704364
WL
733 } else {
734 /* at end - no count, no / */
735 symptr->howlen = e_no_len;
736 symptr->length = 1;
737 }
738
739 symptr->code = code;
fc241834 740 symptr->patptr = patptr;
49704364 741 return TRUE;
18529408 742 }
49704364 743 }
fc241834 744 symptr->patptr = patptr;
49704364 745 return FALSE;
18529408
IZ
746}
747
18529408 748/*
fc241834 749 There is no way to cleanly handle the case where we should process the
08ca2aa3 750 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
751 (e.g. estimates like strend-s as an upper bound for the number of
752 characters left wouldn't work). So if we foresee the need of this
753 (pattern starts with U or contains U0), we want to work on the encoded
754 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
755 themselves if they need to do a lot of unpacks like this on it
756*/
fc241834 757STATIC bool
08ca2aa3
TH
758need_utf8(const char *pat, const char *patend)
759{
760 bool first = TRUE;
7918f24d
NC
761
762 PERL_ARGS_ASSERT_NEED_UTF8;
763
08ca2aa3
TH
764 while (pat < patend) {
765 if (pat[0] == '#') {
766 pat++;
f7fe979e 767 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
768 if (!pat) return FALSE;
769 } else if (pat[0] == 'U') {
770 if (first || pat[1] == '0') return TRUE;
771 } else first = FALSE;
772 pat++;
773 }
774 return FALSE;
775}
776
777STATIC char
778first_symbol(const char *pat, const char *patend) {
7918f24d
NC
779 PERL_ARGS_ASSERT_FIRST_SYMBOL;
780
08ca2aa3
TH
781 while (pat < patend) {
782 if (pat[0] != '#') return pat[0];
783 pat++;
f7fe979e 784 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
785 if (!pat) return 0;
786 pat++;
787 }
788 return 0;
789}
790
791/*
dcccc8ff
KW
792
793=head1 Pack and Unpack
794
7accc089
JH
795=for apidoc unpackstring
796
796b6530 797The engine implementing the C<unpack()> Perl function.
21ebfc7a 798
796b6530
KW
799Using the template C<pat..patend>, this function unpacks the string
800C<s..strend> into a number of mortal SVs, which it pushes onto the perl
801argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 802C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
803pushed elements.
804
796b6530
KW
805The C<strend> and C<patend> pointers should point to the byte following the
806last character of each string.
21ebfc7a
DM
807
808Although this function returns its values on the perl argument stack, it
809doesn't take any parameters from that stack (and thus in particular
796b6530 810there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 811example).
7accc089
JH
812
813=cut */
814
815I32
f7fe979e 816Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 817{
f7fe979e 818 tempsym_t sym;
08ca2aa3 819
7918f24d
NC
820 PERL_ARGS_ASSERT_UNPACKSTRING;
821
f337b084 822 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
823 else if (need_utf8(pat, patend)) {
824 /* We probably should try to avoid this in case a scalar context call
825 wouldn't get to the "U0" */
826 STRLEN len = strend - s;
230e1fce 827 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
828 SAVEFREEPV(s);
829 strend = s + len;
f337b084 830 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
831 }
832
f337b084
TH
833 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
834 flags |= FLAG_PARSE_UTF8;
08ca2aa3 835
f7fe979e 836 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
837
838 return unpack_rec(&sym, s, s, strend, NULL );
839}
840
4136a0f7 841STATIC I32
f7fe979e 842S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 843{
20b7effb 844 dSP;
3297d27d 845 SV *sv = NULL;
f7fe979e 846 const I32 start_sp_offset = SP - PL_stack_base;
49704364 847 howlen_t howlen;
a6ec74c1 848 I32 checksum = 0;
92d41999 849 UV cuv = 0;
a6ec74c1 850 NV cdouble = 0.0;
f337b084 851 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 852 bool beyond = FALSE;
21c16052 853 bool explicit_length;
9e27e96a 854 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 855 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
856
857 PERL_ARGS_ASSERT_UNPACK_REC;
858
28be1210 859 symptr->strbeg = s - strbeg;
49704364 860
49704364 861 while (next_symbol(symptr)) {
a7a3cfaa 862 packprops_t props;
9e27e96a 863 I32 len;
f337b084 864 I32 datumtype = symptr->code;
a1219b5e 865 bool needs_swap;
206947d2 866 /* do first one only unless in list context
08ca2aa3 867 / is implemented by unpacking the count, then popping it from the
206947d2 868 stack, so must check that we're not in the middle of a / */
49704364 869 if ( unpack_only_one
206947d2 870 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 871 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 872 break;
49704364 873
f337b084 874 switch (howlen = symptr->howlen) {
fc241834
RGS
875 case e_star:
876 len = strend - strbeg; /* long enough */
49704364 877 break;
f337b084
TH
878 default:
879 /* e_no_len and e_number */
880 len = symptr->length;
881 break;
49704364 882 }
18529408 883
21c16052 884 explicit_length = TRUE;
a6ec74c1 885 redo_switch:
49704364 886 beyond = s >= strend;
a7a3cfaa
TH
887
888 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
889 if (props) {
890 /* props nonzero means we can process this letter. */
9e27e96a
AL
891 const long size = props & PACK_SIZE_MASK;
892 const long howmany = (strend - s) / size;
a7a3cfaa
TH
893 if (len > howmany)
894 len = howmany;
895
896 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
897 if (len && unpack_only_one) len = 1;
898 EXTEND(SP, len);
899 EXTEND_MORTAL(len);
78d46eaa
NC
900 }
901 }
a7a3cfaa 902
a1219b5e
NC
903 needs_swap = NEEDS_SWAP(datumtype);
904
1109a392 905 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 906 default:
1109a392 907 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 908
a6ec74c1 909 case '%':
49704364 910 if (howlen == e_no_len)
18529408 911 len = 16; /* len is not specified */
a6ec74c1 912 checksum = len;
92d41999 913 cuv = 0;
a6ec74c1 914 cdouble = 0;
18529408 915 continue;
81d52ecd 916
18529408
IZ
917 case '(':
918 {
49704364 919 tempsym_t savsym = *symptr;
9e27e96a 920 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 921 symptr->flags |= group_modifiers;
49704364 922 symptr->patend = savsym.grpend;
28be1210 923 symptr->previous = &savsym;
49704364 924 symptr->level++;
18529408 925 PUTBACK;
c6f750d1 926 if (len && unpack_only_one) len = 1;
18529408 927 while (len--) {
49704364 928 symptr->patptr = savsym.grpbeg;
f337b084
TH
929 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
930 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
931 unpack_rec(symptr, s, strbeg, strend, &s);
932 if (s == strend && savsym.howlen == e_star)
49704364 933 break; /* No way to continue */
18529408
IZ
934 }
935 SPAGAIN;
28be1210 936 savsym.flags = symptr->flags & ~group_modifiers;
49704364 937 *symptr = savsym;
18529408
IZ
938 break;
939 }
28be1210 940 case '.' | TYPE_IS_SHRIEKING:
28be1210 941 case '.': {
9e27e96a 942 const char *from;
28be1210 943 SV *sv;
9e27e96a 944 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
945 if (howlen == e_star) from = strbeg;
946 else if (len <= 0) from = s;
947 else {
948 tempsym_t *group = symptr;
949
950 while (--len && group) group = group->previous;
951 from = group ? strbeg + group->strbeg : strbeg;
952 }
953 sv = from <= s ?
00646304
CB
954 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
955 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 956 mXPUSHs(sv);
28be1210
TH
957 break;
958 }
28be1210 959 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 960 case '@':
28be1210 961 s = strbeg + symptr->strbeg;
28be1210 962 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 963 {
08ca2aa3
TH
964 while (len > 0) {
965 if (s >= strend)
966 Perl_croak(aTHX_ "'@' outside of string in unpack");
967 s += UTF8SKIP(s);
968 len--;
969 }
970 if (s > strend)
971 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
972 } else {
28be1210 973 if (strend-s < len)
fc241834 974 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 975 s += len;
08ca2aa3 976 }
a6ec74c1 977 break;
62f95557
IZ
978 case 'X' | TYPE_IS_SHRIEKING:
979 if (!len) /* Avoid division by 0 */
980 len = 1;
08ca2aa3 981 if (utf8) {
f7fe979e 982 const char *hop, *last;
f337b084
TH
983 I32 l = len;
984 hop = last = strbeg;
985 while (hop < s) {
986 hop += UTF8SKIP(hop);
987 if (--l == 0) {
08ca2aa3 988 last = hop;
f337b084
TH
989 l = len;
990 }
fc241834 991 }
f337b084
TH
992 if (last > s)
993 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
994 s = last;
995 break;
f337b084
TH
996 }
997 len = (s - strbeg) % len;
924ba076 998 /* FALLTHROUGH */
a6ec74c1 999 case 'X':
08ca2aa3
TH
1000 if (utf8) {
1001 while (len > 0) {
1002 if (s <= strbeg)
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1004 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1005 if (s <= strbeg)
1006 Perl_croak(aTHX_ "'X' outside of string in unpack");
1007 }
1008 len--;
1009 }
1010 } else {
fc241834
RGS
1011 if (len > s - strbeg)
1012 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1013 s -= len;
08ca2aa3 1014 }
a6ec74c1 1015 break;
9e27e96a
AL
1016 case 'x' | TYPE_IS_SHRIEKING: {
1017 I32 ai32;
62f95557
IZ
1018 if (!len) /* Avoid division by 0 */
1019 len = 1;
230e1fce
NC
1020 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1021 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1022 if (ai32 == 0) break;
1023 len -= ai32;
9e27e96a 1024 }
924ba076 1025 /* FALLTHROUGH */
a6ec74c1 1026 case 'x':
08ca2aa3
TH
1027 if (utf8) {
1028 while (len>0) {
1029 if (s >= strend)
1030 Perl_croak(aTHX_ "'x' outside of string in unpack");
1031 s += UTF8SKIP(s);
1032 len--;
1033 }
1034 } else {
fc241834
RGS
1035 if (len > strend - s)
1036 Perl_croak(aTHX_ "'x' outside of string in unpack");
1037 s += len;
f337b084 1038 }
a6ec74c1
JH
1039 break;
1040 case '/':
49704364 1041 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1042
a6ec74c1
JH
1043 case 'A':
1044 case 'Z':
1045 case 'a':
08ca2aa3
TH
1046 if (checksum) {
1047 /* Preliminary length estimate is assumed done in 'W' */
1048 if (len > strend - s) len = strend - s;
1049 goto W_checksum;
1050 }
1051 if (utf8) {
1052 I32 l;
f7fe979e 1053 const char *hop;
08ca2aa3
TH
1054 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1055 if (hop >= strend) {
1056 if (hop > strend)
1057 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1058 break;
fc241834 1059 }
a6ec74c1 1060 }
08ca2aa3
TH
1061 if (hop > strend)
1062 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1063 len = hop - s;
1064 } else if (len > strend - s)
1065 len = strend - s;
1066
1067 if (datumtype == 'Z') {
1068 /* 'Z' strips stuff after first null */
f7fe979e 1069 const char *ptr, *end;
f337b084
TH
1070 end = s + len;
1071 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1072 sv = newSVpvn(s, ptr-s);
1073 if (howlen == e_star) /* exact for 'Z*' */
1074 len = ptr-s + (ptr != strend ? 1 : 0);
1075 } else if (datumtype == 'A') {
1076 /* 'A' strips both nulls and spaces */
f7fe979e 1077 const char *ptr;
18bdf90a 1078 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1079 for (ptr = s+len-1; ptr >= s; ptr--) {
1080 if ( *ptr != 0
1081 && !UTF8_IS_CONTINUATION(*ptr)
1082 && !isSPACE_utf8_safe(ptr, strend))
1083 {
1084 break;
1085 }
1086 }
18bdf90a
TH
1087 if (ptr >= s) ptr += UTF8SKIP(ptr);
1088 else ptr++;
28be1210 1089 if (ptr > s+len)
18bdf90a
TH
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1091 } else {
1092 for (ptr = s+len-1; ptr >= s; ptr--)
1093 if (*ptr != 0 && !isSPACE(*ptr)) break;
1094 ptr++;
1095 }
08ca2aa3
TH
1096 sv = newSVpvn(s, ptr-s);
1097 } else sv = newSVpvn(s, len);
1098
1099 if (utf8) {
1100 SvUTF8_on(sv);
1101 /* Undo any upgrade done due to need_utf8() */
f337b084 1102 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1103 sv_utf8_downgrade(sv, 0);
a6ec74c1 1104 }
6e449a3a 1105 mXPUSHs(sv);
08ca2aa3 1106 s += len;
a6ec74c1
JH
1107 break;
1108 case 'B':
08ca2aa3
TH
1109 case 'b': {
1110 char *str;
49704364 1111 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1112 len = (strend - s) * 8;
1113 if (checksum) {
f337b084 1114 if (utf8)
08ca2aa3 1115 while (len >= 8 && s < strend) {
9df874cd 1116 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1117 len -= 8;
1118 }
f337b084 1119 else
fc241834 1120 while (len >= 8) {
08ca2aa3 1121 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1122 len -= 8;
1123 }
08ca2aa3
TH
1124 if (len && s < strend) {
1125 U8 bits;
f337b084
TH
1126 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1127 if (datumtype == 'b')
a6ec74c1 1128 while (len-- > 0) {
92d41999 1129 if (bits & 1) cuv++;
a6ec74c1
JH
1130 bits >>= 1;
1131 }
f337b084 1132 else
a6ec74c1 1133 while (len-- > 0) {
08ca2aa3 1134 if (bits & 0x80) cuv++;
a6ec74c1
JH
1135 bits <<= 1;
1136 }
fc241834 1137 }
a6ec74c1
JH
1138 break;
1139 }
08ca2aa3 1140
561b68a9 1141 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1142 SvPOK_on(sv);
1143 str = SvPVX(sv);
1144 if (datumtype == 'b') {
f337b084 1145 U8 bits = 0;
f7fe979e 1146 const I32 ai32 = len;
08ca2aa3
TH
1147 for (len = 0; len < ai32; len++) {
1148 if (len & 7) bits >>= 1;
1149 else if (utf8) {
1150 if (s >= strend) break;
9df874cd 1151 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1152 } else bits = *(U8 *) s++;
1153 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1154 }
08ca2aa3 1155 } else {
f337b084 1156 U8 bits = 0;
f7fe979e 1157 const I32 ai32 = len;
08ca2aa3
TH
1158 for (len = 0; len < ai32; len++) {
1159 if (len & 7) bits <<= 1;
1160 else if (utf8) {
1161 if (s >= strend) break;
9df874cd 1162 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1163 } else bits = *(U8 *) s++;
1164 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1165 }
1166 }
1167 *str = '\0';
aa07b2f6 1168 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1169 XPUSHs(sv);
a6ec74c1 1170 break;
08ca2aa3 1171 }
a6ec74c1 1172 case 'H':
08ca2aa3 1173 case 'h': {
3297d27d 1174 char *str = NULL;
fc241834 1175 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1176 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1177 len = (strend - s) * 2;
858fe5e1
TC
1178 if (!checksum) {
1179 sv = sv_2mortal(newSV(len ? len : 1));
1180 SvPOK_on(sv);
1181 str = SvPVX(sv);
1182 }
a6ec74c1 1183 if (datumtype == 'h') {
f337b084 1184 U8 bits = 0;
9e27e96a 1185 I32 ai32 = len;
fc241834
RGS
1186 for (len = 0; len < ai32; len++) {
1187 if (len & 1) bits >>= 4;
1188 else if (utf8) {
1189 if (s >= strend) break;
9df874cd 1190 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1191 } else bits = * (U8 *) s++;
858fe5e1
TC
1192 if (!checksum)
1193 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1194 }
08ca2aa3 1195 } else {
f337b084 1196 U8 bits = 0;
f7fe979e 1197 const I32 ai32 = len;
08ca2aa3
TH
1198 for (len = 0; len < ai32; len++) {
1199 if (len & 1) bits <<= 4;
1200 else if (utf8) {
1201 if (s >= strend) break;
9df874cd 1202 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1203 } else bits = *(U8 *) s++;
858fe5e1
TC
1204 if (!checksum)
1205 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1206 }
1207 }
858fe5e1
TC
1208 if (!checksum) {
1209 *str = '\0';
1210 SvCUR_set(sv, str - SvPVX_const(sv));
1211 XPUSHs(sv);
1212 }
a6ec74c1 1213 break;
08ca2aa3 1214 }
1651fc44
ML
1215 case 'C':
1216 if (len == 0) {
1217 if (explicit_length)
1218 /* Switch to "character" mode */
1219 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1220 break;
1221 }
924ba076 1222 /* FALLTHROUGH */
a6ec74c1 1223 case 'c':
1651fc44
ML
1224 while (len-- > 0 && s < strend) {
1225 int aint;
1226 if (utf8)
1227 {
1228 STRLEN retlen;
1229 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1230 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1231 if (retlen == (STRLEN) -1)
1651fc44
ML
1232 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1233 s += retlen;
1234 }
1235 else
1236 aint = *(U8 *)(s)++;
1237 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1238 aint -= 256;
08ca2aa3 1239 if (!checksum)
6e449a3a 1240 mPUSHi(aint);
73cb7263
NC
1241 else if (checksum > bits_in_uv)
1242 cdouble += (NV)aint;
1243 else
1244 cuv += aint;
a6ec74c1
JH
1245 }
1246 break;
08ca2aa3
TH
1247 case 'W':
1248 W_checksum:
1651fc44 1249 if (utf8) {
08ca2aa3 1250 while (len-- > 0 && s < strend) {
08ca2aa3 1251 STRLEN retlen;
f7fe979e 1252 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1253 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1254 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1255 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1256 s += retlen;
1257 if (!checksum)
6e449a3a 1258 mPUSHu(val);
08ca2aa3
TH
1259 else if (checksum > bits_in_uv)
1260 cdouble += (NV) val;
d6d3e8bd 1261 else
08ca2aa3 1262 cuv += val;
fc241834 1263 }
08ca2aa3 1264 } else if (!checksum)
a6ec74c1 1265 while (len-- > 0) {
f7fe979e 1266 const U8 ch = *(U8 *) s++;
6e449a3a 1267 mPUSHu(ch);
a6ec74c1 1268 }
08ca2aa3
TH
1269 else if (checksum > bits_in_uv)
1270 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1271 else
1272 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1273 break;
1274 case 'U':
35bcd338 1275 if (len == 0) {
c5333953 1276 if (explicit_length && howlen != e_star) {
08ca2aa3 1277 /* Switch to "bytes in UTF-8" mode */
f337b084 1278 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1279 else
1280 /* Should be impossible due to the need_utf8() test */
1281 Perl_croak(aTHX_ "U0 mode on a byte string");
1282 }
35bcd338
JH
1283 break;
1284 }
08ca2aa3 1285 if (len > strend - s) len = strend - s;
fc241834 1286 if (!checksum) {
08ca2aa3
TH
1287 if (len && unpack_only_one) len = 1;
1288 EXTEND(SP, len);
1289 EXTEND_MORTAL(len);
fc241834 1290 }
08ca2aa3
TH
1291 while (len-- > 0 && s < strend) {
1292 STRLEN retlen;
1293 UV auv;
1294 if (utf8) {
1295 U8 result[UTF8_MAXLEN];
f7fe979e 1296 const char *ptr = s;
08ca2aa3 1297 STRLEN len;
08ca2aa3
TH
1298 /* Bug: warns about bad utf8 even if we are short on bytes
1299 and will break out of the loop */
9df874cd 1300 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1301 'U'))
08ca2aa3
TH
1302 break;
1303 len = UTF8SKIP(result);
9df874cd 1304 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1305 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1306 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1307 len,
1308 &retlen,
1309 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1310 s = ptr;
1311 } else {
3ece276e
KW
1312 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1313 strend - s,
1314 &retlen,
1315 UTF8_ALLOW_DEFAULT));
df337714 1316 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1317 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1318 s += retlen;
1319 }
1320 if (!checksum)
6e449a3a 1321 mPUSHu(auv);
73cb7263 1322 else if (checksum > bits_in_uv)
08ca2aa3 1323 cdouble += (NV) auv;
73cb7263 1324 else
08ca2aa3 1325 cuv += auv;
a6ec74c1
JH
1326 }
1327 break;
49704364
WL
1328 case 's' | TYPE_IS_SHRIEKING:
1329#if SHORTSIZE != SIZE16
73cb7263 1330 while (len-- > 0) {
08ca2aa3 1331 short ashort;
aaec8192 1332 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1333 if (!checksum)
6e449a3a 1334 mPUSHi(ashort);
73cb7263
NC
1335 else if (checksum > bits_in_uv)
1336 cdouble += (NV)ashort;
1337 else
1338 cuv += ashort;
49704364
WL
1339 }
1340 break;
1341#else
924ba076 1342 /* FALLTHROUGH */
a6ec74c1 1343#endif
49704364 1344 case 's':
73cb7263 1345 while (len-- > 0) {
08ca2aa3
TH
1346 I16 ai16;
1347
1348#if U16SIZE > SIZE16
1349 ai16 = 0;
1350#endif
aaec8192 1351 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1352#if U16SIZE > SIZE16
73cb7263
NC
1353 if (ai16 > 32767)
1354 ai16 -= 65536;
a6ec74c1 1355#endif
08ca2aa3 1356 if (!checksum)
6e449a3a 1357 mPUSHi(ai16);
73cb7263
NC
1358 else if (checksum > bits_in_uv)
1359 cdouble += (NV)ai16;
1360 else
1361 cuv += ai16;
a6ec74c1
JH
1362 }
1363 break;
49704364
WL
1364 case 'S' | TYPE_IS_SHRIEKING:
1365#if SHORTSIZE != SIZE16
73cb7263 1366 while (len-- > 0) {
08ca2aa3 1367 unsigned short aushort;
aaec8192
NC
1368 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1369 needs_swap);
08ca2aa3 1370 if (!checksum)
6e449a3a 1371 mPUSHu(aushort);
73cb7263
NC
1372 else if (checksum > bits_in_uv)
1373 cdouble += (NV)aushort;
1374 else
1375 cuv += aushort;
49704364
WL
1376 }
1377 break;
1378#else
924ba076 1379 /* FALLTHROUGH */
49704364 1380#endif
a6ec74c1
JH
1381 case 'v':
1382 case 'n':
1383 case 'S':
73cb7263 1384 while (len-- > 0) {
08ca2aa3
TH
1385 U16 au16;
1386#if U16SIZE > SIZE16
1387 au16 = 0;
1388#endif
aaec8192 1389 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1390 if (datumtype == 'n')
1391 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1392 if (datumtype == 'v')
1393 au16 = vtohs(au16);
08ca2aa3 1394 if (!checksum)
6e449a3a 1395 mPUSHu(au16);
73cb7263 1396 else if (checksum > bits_in_uv)
f337b084 1397 cdouble += (NV) au16;
73cb7263
NC
1398 else
1399 cuv += au16;
a6ec74c1
JH
1400 }
1401 break;
068bd2e7
MHM
1402 case 'v' | TYPE_IS_SHRIEKING:
1403 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1404 while (len-- > 0) {
08ca2aa3
TH
1405 I16 ai16;
1406# if U16SIZE > SIZE16
1407 ai16 = 0;
1408# endif
aaec8192 1409 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1410 /* There should never be any byte-swapping here. */
1411 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1412 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1413 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1414 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1415 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1416 if (!checksum)
6e449a3a 1417 mPUSHi(ai16);
73cb7263 1418 else if (checksum > bits_in_uv)
08ca2aa3 1419 cdouble += (NV) ai16;
73cb7263
NC
1420 else
1421 cuv += ai16;
068bd2e7
MHM
1422 }
1423 break;
a6ec74c1 1424 case 'i':
49704364 1425 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1426 while (len-- > 0) {
08ca2aa3 1427 int aint;
aaec8192 1428 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1429 if (!checksum)
6e449a3a 1430 mPUSHi(aint);
73cb7263
NC
1431 else if (checksum > bits_in_uv)
1432 cdouble += (NV)aint;
1433 else
1434 cuv += aint;
a6ec74c1
JH
1435 }
1436 break;
1437 case 'I':
49704364 1438 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1439 while (len-- > 0) {
08ca2aa3 1440 unsigned int auint;
aaec8192 1441 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1442 if (!checksum)
6e449a3a 1443 mPUSHu(auint);
73cb7263
NC
1444 else if (checksum > bits_in_uv)
1445 cdouble += (NV)auint;
1446 else
1447 cuv += auint;
a6ec74c1
JH
1448 }
1449 break;
92d41999 1450 case 'j':
73cb7263 1451 while (len-- > 0) {
08ca2aa3 1452 IV aiv;
aaec8192 1453 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1454 if (!checksum)
6e449a3a 1455 mPUSHi(aiv);
73cb7263
NC
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)aiv;
1458 else
1459 cuv += aiv;
92d41999
JH
1460 }
1461 break;
1462 case 'J':
73cb7263 1463 while (len-- > 0) {
08ca2aa3 1464 UV auv;
aaec8192 1465 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1466 if (!checksum)
6e449a3a 1467 mPUSHu(auv);
73cb7263
NC
1468 else if (checksum > bits_in_uv)
1469 cdouble += (NV)auv;
1470 else
1471 cuv += auv;
92d41999
JH
1472 }
1473 break;
49704364
WL
1474 case 'l' | TYPE_IS_SHRIEKING:
1475#if LONGSIZE != SIZE32
73cb7263 1476 while (len-- > 0) {
08ca2aa3 1477 long along;
aaec8192 1478 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1479 if (!checksum)
6e449a3a 1480 mPUSHi(along);
73cb7263
NC
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)along;
1483 else
1484 cuv += along;
49704364
WL
1485 }
1486 break;
1487#else
924ba076 1488 /* FALLTHROUGH */
a6ec74c1 1489#endif
49704364 1490 case 'l':
73cb7263 1491 while (len-- > 0) {
08ca2aa3
TH
1492 I32 ai32;
1493#if U32SIZE > SIZE32
1494 ai32 = 0;
1495#endif
aaec8192 1496 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1497#if U32SIZE > SIZE32
08ca2aa3 1498 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1499#endif
08ca2aa3 1500 if (!checksum)
6e449a3a 1501 mPUSHi(ai32);
73cb7263
NC
1502 else if (checksum > bits_in_uv)
1503 cdouble += (NV)ai32;
1504 else
1505 cuv += ai32;
a6ec74c1
JH
1506 }
1507 break;
49704364
WL
1508 case 'L' | TYPE_IS_SHRIEKING:
1509#if LONGSIZE != SIZE32
73cb7263 1510 while (len-- > 0) {
08ca2aa3 1511 unsigned long aulong;
aaec8192 1512 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1513 if (!checksum)
6e449a3a 1514 mPUSHu(aulong);
73cb7263
NC
1515 else if (checksum > bits_in_uv)
1516 cdouble += (NV)aulong;
1517 else
1518 cuv += aulong;
49704364
WL
1519 }
1520 break;
1521#else
924ba076 1522 /* FALLTHROUGH */
49704364 1523#endif
a6ec74c1
JH
1524 case 'V':
1525 case 'N':
1526 case 'L':
73cb7263 1527 while (len-- > 0) {
08ca2aa3
TH
1528 U32 au32;
1529#if U32SIZE > SIZE32
1530 au32 = 0;
1531#endif
aaec8192 1532 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1533 if (datumtype == 'N')
1534 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1535 if (datumtype == 'V')
1536 au32 = vtohl(au32);
08ca2aa3 1537 if (!checksum)
6e449a3a 1538 mPUSHu(au32);
fc241834
RGS
1539 else if (checksum > bits_in_uv)
1540 cdouble += (NV)au32;
1541 else
1542 cuv += au32;
a6ec74c1
JH
1543 }
1544 break;
068bd2e7
MHM
1545 case 'V' | TYPE_IS_SHRIEKING:
1546 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1547 while (len-- > 0) {
08ca2aa3 1548 I32 ai32;
f8e5a5db 1549#if U32SIZE > SIZE32
08ca2aa3 1550 ai32 = 0;
f8e5a5db 1551#endif
aaec8192 1552 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1553 /* There should never be any byte swapping here. */
1554 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1555 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1556 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1557 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1558 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1559 if (!checksum)
6e449a3a 1560 mPUSHi(ai32);
73cb7263
NC
1561 else if (checksum > bits_in_uv)
1562 cdouble += (NV)ai32;
1563 else
1564 cuv += ai32;
068bd2e7
MHM
1565 }
1566 break;
a6ec74c1 1567 case 'p':
a6ec74c1 1568 while (len-- > 0) {
f7fe979e 1569 const char *aptr;
aaec8192 1570 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1571 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1572 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1573 }
1574 break;
1575 case 'w':
a6ec74c1
JH
1576 {
1577 UV auv = 0;
1578 U32 bytes = 0;
fc241834 1579
08ca2aa3
TH
1580 while (len > 0 && s < strend) {
1581 U8 ch;
f337b084 1582 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1583 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1584 /* UTF8_IS_XXXXX not right here because this is a BER, not
1585 * UTF-8 format - using constant 0x80 */
08ca2aa3 1586 if (ch < 0x80) {
a6ec74c1 1587 bytes = 0;
6e449a3a 1588 mPUSHu(auv);
a6ec74c1
JH
1589 len--;
1590 auv = 0;
08ca2aa3 1591 continue;
a6ec74c1 1592 }
08ca2aa3 1593 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1594 const char *t;
a6ec74c1 1595
147e3846
KW
1596 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1597 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1598 while (s < strend) {
f337b084 1599 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1600 sv = mul128(sv, (U8)(ch & 0x7f));
1601 if (!(ch & 0x80)) {
a6ec74c1
JH
1602 bytes = 0;
1603 break;
1604 }
1605 }
10516c54 1606 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1607 while (*t == '0')
1608 t++;
1609 sv_chop(sv, t);
6e449a3a 1610 mPUSHs(sv);
a6ec74c1
JH
1611 len--;
1612 auv = 0;
1613 }
1614 }
1615 if ((s >= strend) && bytes)
49704364 1616 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1617 }
1618 break;
1619 case 'P':
49704364
WL
1620 if (symptr->howlen == e_star)
1621 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1622 EXTEND(SP, 1);
2d3e0934 1623 if (s + sizeof(char*) <= strend) {
08ca2aa3 1624 char *aptr;
aaec8192 1625 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1626 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1627 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1628 }
a6ec74c1 1629 break;
c174bf3b 1630#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1631 case 'q':
73cb7263 1632 while (len-- > 0) {
08ca2aa3 1633 Quad_t aquad;
aaec8192 1634 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1635 if (!checksum)
c174bf3b 1636 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1637 else if (checksum > bits_in_uv)
1638 cdouble += (NV)aquad;
1639 else
1640 cuv += aquad;
1641 }
a6ec74c1
JH
1642 break;
1643 case 'Q':
73cb7263 1644 while (len-- > 0) {
08ca2aa3 1645 Uquad_t auquad;
aaec8192 1646 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1647 if (!checksum)
c174bf3b 1648 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1649 else if (checksum > bits_in_uv)
1650 cdouble += (NV)auquad;
1651 else
1652 cuv += auquad;
a6ec74c1
JH
1653 }
1654 break;
1640b983 1655#endif
a6ec74c1
JH
1656 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1657 case 'f':
73cb7263 1658 while (len-- > 0) {
08ca2aa3 1659 float afloat;
aaec8192 1660 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1661 if (!checksum)
6e449a3a 1662 mPUSHn(afloat);
08ca2aa3 1663 else
73cb7263 1664 cdouble += afloat;
fc241834 1665 }
a6ec74c1
JH
1666 break;
1667 case 'd':
73cb7263 1668 while (len-- > 0) {
08ca2aa3 1669 double adouble;
aaec8192 1670 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1671 if (!checksum)
6e449a3a 1672 mPUSHn(adouble);
08ca2aa3 1673 else
73cb7263 1674 cdouble += adouble;
fc241834 1675 }
a6ec74c1 1676 break;
92d41999 1677 case 'F':
73cb7263 1678 while (len-- > 0) {
275663fa 1679 NV_bytes anv;
aaec8192
NC
1680 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1681 datumtype, needs_swap);
08ca2aa3 1682 if (!checksum)
275663fa 1683 mPUSHn(anv.nv);
08ca2aa3 1684 else
275663fa 1685 cdouble += anv.nv;
fc241834 1686 }
92d41999
JH
1687 break;
1688#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1689 case 'D':
73cb7263 1690 while (len-- > 0) {
275663fa 1691 ld_bytes aldouble;
aaec8192
NC
1692 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1693 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1694 /* The most common long double format, the x86 80-bit
1695 * extended precision, has either 2 or 6 unused bytes,
1696 * which may contain garbage, which may contain
1697 * unintentional data. While we do zero the bytes of
1698 * the long double data in pack(), here in unpack() we
1699 * don't, because it's really hard to envision that
1700 * reading the long double off aldouble would be
e075ae47 1701 * affected by the unused bytes.
42262fd3
JH
1702 *
1703 * Note that trying to unpack 'long doubles' of 'long
1704 * doubles' packed in another system is in the general
1705 * case doomed without having more detail. */
08ca2aa3 1706 if (!checksum)
275663fa 1707 mPUSHn(aldouble.ld);
08ca2aa3 1708 else
275663fa 1709 cdouble += aldouble.ld;
92d41999
JH
1710 }
1711 break;
1712#endif
a6ec74c1 1713 case 'u':
858fe5e1 1714 if (!checksum) {
f7fe979e 1715 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1716 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1717 if (l) SvPOK_on(sv);
1718 }
08ca2aa3 1719
bee81d36
KW
1720 /* Note that all legal uuencoded strings are ASCII printables, so
1721 * have the same representation under UTF-8 vs not. This means we
1722 * can ignore UTF8ness on legal input. For illegal we stop at the
1723 * first failure, and don't report where/what that is, so again we
1724 * can ignore UTF8ness */
1725
9233b56b
KW
1726 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1727 I32 a, b, c, d;
1728 char hunk[3];
1729
1730 len = PL_uudmap[*(U8*)s++] & 077;
1731 while (len > 0) {
1732 if (s < strend && ISUUCHAR(*s))
1733 a = PL_uudmap[*(U8*)s++] & 077;
1734 else
1735 a = 0;
1736 if (s < strend && ISUUCHAR(*s))
1737 b = PL_uudmap[*(U8*)s++] & 077;
1738 else
1739 b = 0;
1740 if (s < strend && ISUUCHAR(*s))
1741 c = PL_uudmap[*(U8*)s++] & 077;
1742 else
1743 c = 0;
1744 if (s < strend && ISUUCHAR(*s))
1745 d = PL_uudmap[*(U8*)s++] & 077;
1746 else
1747 d = 0;
1748 hunk[0] = (char)((a << 2) | (b >> 4));
1749 hunk[1] = (char)((b << 4) | (c >> 2));
1750 hunk[2] = (char)((c << 6) | d);
1751 if (!checksum)
1752 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1753 len -= 3;
1754 }
1755 if (*s == '\n')
1756 s++;
1757 else /* possible checksum byte */
1758 if (s + 1 < strend && s[1] == '\n')
1759 s += 2;
1760 }
858fe5e1
TC
1761 if (!checksum)
1762 XPUSHs(sv);
a6ec74c1 1763 break;
99f862a1 1764 } /* End of switch */
49704364 1765
a6ec74c1 1766 if (checksum) {
1109a392 1767 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1768 (checksum > bits_in_uv &&
08ca2aa3
TH
1769 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1770 NV trouble, anv;
a6ec74c1 1771
08ca2aa3 1772 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1773 while (checksum >= 16) {
1774 checksum -= 16;
08ca2aa3 1775 anv *= 65536.0;
a6ec74c1 1776 }
a6ec74c1 1777 while (cdouble < 0.0)
08ca2aa3 1778 cdouble += anv;
09b94b1f
S
1779 cdouble = Perl_modf(cdouble / anv, &trouble);
1780#ifdef LONGDOUBLE_DOUBLEDOUBLE
1781 /* Workaround for powerpc doubledouble modfl bug:
1782 * close to 1.0L and -1.0L cdouble is 0, and trouble
1783 * is cdouble / anv. */
1784 if (trouble != Perl_ceil(trouble)) {
1785 cdouble = trouble;
1786 if (cdouble > 1.0L) cdouble -= 1.0L;
1787 if (cdouble < -1.0L) cdouble += 1.0L;
1788 }
1789#endif
1790 cdouble *= anv;
c4c5f44a 1791 sv = newSVnv(cdouble);
a6ec74c1
JH
1792 }
1793 else {
fa8ec7c1
NC
1794 if (checksum < bits_in_uv) {
1795 UV mask = ((UV)1 << checksum) - 1;
92d41999 1796 cuv &= mask;
a6ec74c1 1797 }
c4c5f44a 1798 sv = newSVuv(cuv);
a6ec74c1 1799 }
6e449a3a 1800 mXPUSHs(sv);
a6ec74c1
JH
1801 checksum = 0;
1802 }
fc241834 1803
49704364
WL
1804 if (symptr->flags & FLAG_SLASH){
1805 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1806 break;
49704364
WL
1807 if( next_symbol(symptr) ){
1808 if( symptr->howlen == e_number )
1809 Perl_croak(aTHX_ "Count after length/code in unpack" );
1810 if( beyond ){
1811 /* ...end of char buffer then no decent length available */
1812 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1813 } else {
1814 /* take top of stack (hope it's numeric) */
1815 len = POPi;
1816 if( len < 0 )
1817 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1818 }
1819 } else {
1820 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1821 }
1822 datumtype = symptr->code;
21c16052 1823 explicit_length = FALSE;
49704364
WL
1824 goto redo_switch;
1825 }
a6ec74c1 1826 }
49704364 1827
18529408
IZ
1828 if (new_s)
1829 *new_s = s;
1830 PUTBACK;
1831 return SP - PL_stack_base - start_sp_offset;
1832}
1833
1834PP(pp_unpack)
1835{
1836 dSP;
bab9c0ac 1837 dPOPPOPssrl;
1c23e2bd 1838 U8 gimme = GIMME_V;
18529408
IZ
1839 STRLEN llen;
1840 STRLEN rlen;
5c144d81
NC
1841 const char *pat = SvPV_const(left, llen);
1842 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1843 const char *strend = s + rlen;
1844 const char *patend = pat + llen;
08ca2aa3 1845 I32 cnt;
18529408
IZ
1846
1847 PUTBACK;
7accc089 1848 cnt = unpackstring(pat, patend, s, strend,
49704364 1849 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1850 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1851
18529408
IZ
1852 SPAGAIN;
1853 if ( !cnt && gimme == G_SCALAR )
1854 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1855 RETURN;
1856}
1857
f337b084 1858STATIC U8 *
e68aed92 1859doencodes(U8 *h, const U8 *s, I32 len)
a6ec74c1 1860{
f337b084 1861 *h++ = PL_uuemap[len];
a6ec74c1 1862 while (len > 2) {
f337b084
TH
1863 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1864 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1865 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1866 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1867 s += 3;
1868 len -= 3;
1869 }
1870 if (len > 0) {
e68aed92 1871 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1872 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1873 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1874 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1875 *h++ = PL_uuemap[0];
a6ec74c1 1876 }
f337b084
TH
1877 *h++ = '\n';
1878 return h;
a6ec74c1
JH
1879}
1880
1881STATIC SV *
f7fe979e 1882S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1883{
8b6e33c7
AL
1884 SV *result = newSVpvn(s, l);
1885 char *const result_c = SvPV_nolen(result); /* convenience */
1886 char *out = result_c;
1887 bool skip = 1;
1888 bool ignore = 0;
a6ec74c1 1889
7918f24d
NC
1890 PERL_ARGS_ASSERT_IS_AN_INT;
1891
a6ec74c1
JH
1892 while (*s) {
1893 switch (*s) {
1894 case ' ':
1895 break;
1896 case '+':
1897 if (!skip) {
1898 SvREFCNT_dec(result);
1899 return (NULL);
1900 }
1901 break;
1902 case '0':
1903 case '1':
1904 case '2':
1905 case '3':
1906 case '4':
1907 case '5':
1908 case '6':
1909 case '7':
1910 case '8':
1911 case '9':
1912 skip = 0;
1913 if (!ignore) {
1914 *(out++) = *s;
1915 }
1916 break;
1917 case '.':
1918 ignore = 1;
1919 break;
1920 default:
1921 SvREFCNT_dec(result);
1922 return (NULL);
1923 }
1924 s++;
1925 }
1926 *(out++) = '\0';
1927 SvCUR_set(result, out - result_c);
1928 return (result);
1929}
1930
1931/* pnum must be '\0' terminated */
1932STATIC int
1933S_div128(pTHX_ SV *pnum, bool *done)
1934{
8b6e33c7
AL
1935 STRLEN len;
1936 char * const s = SvPV(pnum, len);
1937 char *t = s;
1938 int m = 0;
1939
7918f24d
NC
1940 PERL_ARGS_ASSERT_DIV128;
1941
8b6e33c7
AL
1942 *done = 1;
1943 while (*t) {
1944 const int i = m * 10 + (*t - '0');
1945 const int r = (i >> 7); /* r < 10 */
1946 m = i & 0x7F;
1947 if (r) {
1948 *done = 0;
1949 }
1950 *(t++) = '0' + r;
a6ec74c1 1951 }
8b6e33c7
AL
1952 *(t++) = '\0';
1953 SvCUR_set(pnum, (STRLEN) (t - s));
1954 return (m);
a6ec74c1
JH
1955}
1956
18529408 1957/*
7accc089
JH
1958=for apidoc packlist
1959
796b6530 1960The engine implementing C<pack()> Perl function.
7accc089 1961
bfce84ec
AL
1962=cut
1963*/
7accc089
JH
1964
1965void
5aaab254 1966Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1967{
aadb217d
JH
1968 tempsym_t sym;
1969
7918f24d
NC
1970 PERL_ARGS_ASSERT_PACKLIST;
1971
f7fe979e 1972 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1973
f337b084
TH
1974 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1975 Also make sure any UTF8 flag is loaded */
56eb0262 1976 SvPV_force_nolen(cat);
bfce84ec
AL
1977 if (DO_UTF8(cat))
1978 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1979
49704364
WL
1980 (void)pack_rec( cat, &sym, beglist, endlist );
1981}
1982
f337b084
TH
1983/* like sv_utf8_upgrade, but also repoint the group start markers */
1984STATIC void
1985marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1986 STRLEN len;
1987 tempsym_t *group;
f7fe979e
AL
1988 const char *from_ptr, *from_start, *from_end, **marks, **m;
1989 char *to_start, *to_ptr;
f337b084
TH
1990
1991 if (SvUTF8(sv)) return;
1992
aa07b2f6 1993 from_start = SvPVX_const(sv);
f337b084
TH
1994 from_end = from_start + SvCUR(sv);
1995 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 1996 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
1997 if (from_ptr == from_end) {
1998 /* Simple case: no character needs to be changed */
1999 SvUTF8_on(sv);
2000 return;
2001 }
2002
3473cf63 2003 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2004 Newx(to_start, len, char);
f337b084
TH
2005 Copy(from_start, to_start, from_ptr-from_start, char);
2006 to_ptr = to_start + (from_ptr-from_start);
2007
a02a5408 2008 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2009 for (group=sym_ptr; group; group = group->previous)
2010 marks[group->level] = from_start + group->strbeg;
2011 marks[sym_ptr->level+1] = from_end+1;
2012 for (m = marks; *m < from_ptr; m++)
2013 *m = to_start + (*m-from_start);
2014
2015 for (;from_ptr < from_end; from_ptr++) {
2016 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2017 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2018 }
2019 *to_ptr = 0;
2020
2021 while (*m == from_ptr) *m++ = to_ptr;
2022 if (m != marks + sym_ptr->level+1) {
2023 Safefree(marks);
2024 Safefree(to_start);
5637ef5b
NC
2025 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2026 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2027 }
2028 for (group=sym_ptr; group; group = group->previous)
2029 group->strbeg = marks[group->level] - to_start;
2030 Safefree(marks);
2031
2032 if (SvOOK(sv)) {
2033 if (SvIVX(sv)) {
b162af07 2034 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2035 from_start -= SvIVX(sv);
2036 SvIV_set(sv, 0);
2037 }
2038 SvFLAGS(sv) &= ~SVf_OOK;
2039 }
2040 if (SvLEN(sv) != 0)
2041 Safefree(from_start);
f880fe2f 2042 SvPV_set(sv, to_start);
b162af07
SP
2043 SvCUR_set(sv, to_ptr - to_start);
2044 SvLEN_set(sv, len);
f337b084
TH
2045 SvUTF8_on(sv);
2046}
2047
2048/* Exponential string grower. Makes string extension effectively O(n)
2049 needed says how many extra bytes we need (not counting the final '\0')
2050 Only grows the string if there is an actual lack of space
2051*/
2052STATIC char *
0bd48802 2053S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2054 const STRLEN cur = SvCUR(sv);
2055 const STRLEN len = SvLEN(sv);
f337b084 2056 STRLEN extend;
7918f24d
NC
2057
2058 PERL_ARGS_ASSERT_SV_EXP_GROW;
2059
f337b084
TH
2060 if (len - cur > needed) return SvPVX(sv);
2061 extend = needed > len ? needed : len;
2062 return SvGROW(sv, len+extend+1);
2063}
49704364 2064
93f6e112 2065static SV *
b197e565 2066S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2067{
2068 SvGETMAGIC(sv);
93f6e112
FC
2069 if (UNLIKELY(SvAMAGIC(sv)))
2070 sv = sv_2num(sv);
99f450cc 2071 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2072 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2073 const NV nv = SvNV_nomg(sv);
2074 if (c == 'w')
147e3846 2075 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2076 else
147e3846 2077 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2078 }
93f6e112 2079 return sv;
354b74ae
FC
2080}
2081
93f6e112
FC
2082#define SvIV_no_inf(sv,d) \
2083 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2084#define SvUV_no_inf(sv,d) \
2085 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2086
49704364
WL
2087STATIC
2088SV **
f337b084 2089S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2090{
49704364 2091 tempsym_t lookahead;
f337b084
TH
2092 I32 items = endlist - beglist;
2093 bool found = next_symbol(symptr);
2094 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2095 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2096 char* from;
f337b084 2097
7918f24d
NC
2098 PERL_ARGS_ASSERT_PACK_REC;
2099
f337b084
TH
2100 if (symptr->level == 0 && found && symptr->code == 'U') {
2101 marked_upgrade(aTHX_ cat, symptr);
2102 symptr->flags |= FLAG_DO_UTF8;
2103 utf8 = 0;
49704364 2104 }
f337b084 2105 symptr->strbeg = SvCUR(cat);
49704364
WL
2106
2107 while (found) {
f337b084
TH
2108 SV *fromstr;
2109 STRLEN fromlen;
2110 I32 len;
a0714e2c 2111 SV *lengthcode = NULL;
49704364 2112 I32 datumtype = symptr->code;
f337b084
TH
2113 howlen_t howlen = symptr->howlen;
2114 char *start = SvPVX(cat);
2115 char *cur = start + SvCUR(cat);
a1219b5e 2116 bool needs_swap;
49704364 2117
fc1bb3f2 2118#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2119#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2120
2121 switch (howlen) {
fc241834 2122 case e_star:
f337b084
TH
2123 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2124 0 : items;
2125 break;
2126 default:
2127 /* e_no_len and e_number */
2128 len = symptr->length;
49704364
WL
2129 break;
2130 }
2131
f337b084 2132 if (len) {
a7a3cfaa 2133 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2134
a7a3cfaa
TH
2135 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2136 /* We can process this letter. */
2137 STRLEN size = props & PACK_SIZE_MASK;
2138 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2139 }
f337b084
TH
2140 }
2141
49704364
WL
2142 /* Look ahead for next symbol. Do we have code/code? */
2143 lookahead = *symptr;
2144 found = next_symbol(&lookahead);
246f24af
TH
2145 if (symptr->flags & FLAG_SLASH) {
2146 IV count;
f337b084 2147 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2148 if (strchr("aAZ", lookahead.code)) {
2149 if (lookahead.howlen == e_number) count = lookahead.length;
2150 else {
ce399ba6 2151 if (items > 0) {
48a5da33 2152 count = sv_len_utf8(*beglist);
ce399ba6 2153 }
246f24af
TH
2154 else count = 0;
2155 if (lookahead.code == 'Z') count++;
2156 }
2157 } else {
2158 if (lookahead.howlen == e_number && lookahead.length < items)
2159 count = lookahead.length;
2160 else count = items;
2161 }
2162 lookahead.howlen = e_number;
2163 lookahead.length = count;
2164 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2165 }
49704364 2166
a1219b5e
NC
2167 needs_swap = NEEDS_SWAP(datumtype);
2168
fc241834
RGS
2169 /* Code inside the switch must take care to properly update
2170 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2171 doesn't simply leave using break */
0c7df902 2172 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2173 default:
f337b084
TH
2174 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2175 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2176 case '%':
49704364 2177 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2178
28be1210 2179 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2180 case '.':
2181 if (howlen == e_star) from = start;
2182 else if (len == 0) from = cur;
2183 else {
2184 tempsym_t *group = symptr;
2185
2186 while (--len && group) group = group->previous;
2187 from = group ? start + group->strbeg : start;
2188 }
2189 fromstr = NEXTFROM;
354b74ae 2190 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2191 goto resize;
28be1210 2192 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2193 case '@':
28be1210
TH
2194 from = start + symptr->strbeg;
2195 resize:
28be1210 2196 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2197 if (len >= 0) {
2198 while (len && from < cur) {
2199 from += UTF8SKIP(from);
2200 len--;
2201 }
2202 if (from > cur)
2203 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2204 if (len) {
2205 /* Here we know from == cur */
2206 grow:
2207 GROWING(0, cat, start, cur, len);
2208 Zero(cur, len, char);
2209 cur += len;
2210 } else if (from < cur) {
2211 len = cur - from;
2212 goto shrink;
2213 } else goto no_change;
2214 } else {
2215 cur = from;
2216 len = -len;
2217 goto utf8_shrink;
f337b084 2218 }
28be1210
TH
2219 else {
2220 len -= cur - from;
f337b084 2221 if (len > 0) goto grow;
28be1210 2222 if (len == 0) goto no_change;
fc241834 2223 len = -len;
28be1210 2224 goto shrink;
f337b084 2225 }
a6ec74c1 2226 break;
81d52ecd 2227
fc241834 2228 case '(': {
49704364 2229 tempsym_t savsym = *symptr;
66c611c5
MHM
2230 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2231 symptr->flags |= group_modifiers;
49704364
WL
2232 symptr->patend = savsym.grpend;
2233 symptr->level++;
f337b084 2234 symptr->previous = &lookahead;
18529408 2235 while (len--) {
f337b084
TH
2236 U32 was_utf8;
2237 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2238 else symptr->flags &= ~FLAG_PARSE_UTF8;
2239 was_utf8 = SvUTF8(cat);
49704364 2240 symptr->patptr = savsym.grpbeg;
f337b084
TH
2241 beglist = pack_rec(cat, symptr, beglist, endlist);
2242 if (SvUTF8(cat) != was_utf8)
2243 /* This had better be an upgrade while in utf8==0 mode */
2244 utf8 = 1;
2245
49704364 2246 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2247 break; /* No way to continue */
2248 }
ee790063 2249 items = endlist - beglist;
f337b084
TH
2250 lookahead.flags = symptr->flags & ~group_modifiers;
2251 goto no_change;
18529408 2252 }
62f95557
IZ
2253 case 'X' | TYPE_IS_SHRIEKING:
2254 if (!len) /* Avoid division by 0 */
2255 len = 1;
f337b084
TH
2256 if (utf8) {
2257 char *hop, *last;
2258 I32 l = len;
2259 hop = last = start;
2260 while (hop < cur) {
2261 hop += UTF8SKIP(hop);
2262 if (--l == 0) {
2263 last = hop;
2264 l = len;
2265 }
2266 }
2267 if (last > cur)
2268 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2269 cur = last;
2270 break;
2271 }
2272 len = (cur-start) % len;
924ba076 2273 /* FALLTHROUGH */
a6ec74c1 2274 case 'X':
f337b084
TH
2275 if (utf8) {
2276 if (len < 1) goto no_change;
28be1210 2277 utf8_shrink:
f337b084
TH
2278 while (len > 0) {
2279 if (cur <= start)
28be1210
TH
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2282 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2283 if (cur <= start)
28be1210
TH
2284 Perl_croak(aTHX_ "'%c' outside of string in pack",
2285 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2286 }
2287 len--;
2288 }
2289 } else {
fc241834 2290 shrink:
f337b084 2291 if (cur - start < len)
28be1210
TH
2292 Perl_croak(aTHX_ "'%c' outside of string in pack",
2293 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2294 cur -= len;
2295 }
2296 if (cur < start+symptr->strbeg) {
2297 /* Make sure group starts don't point into the void */
2298 tempsym_t *group;
9e27e96a 2299 const STRLEN length = cur-start;
f337b084
TH
2300 for (group = symptr;
2301 group && length < group->strbeg;
2302 group = group->previous) group->strbeg = length;
2303 lookahead.strbeg = length;
2304 }
a6ec74c1 2305 break;
fc241834
RGS
2306 case 'x' | TYPE_IS_SHRIEKING: {
2307 I32 ai32;
62f95557
IZ
2308 if (!len) /* Avoid division by 0 */
2309 len = 1;
230e1fce 2310 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2311 else ai32 = (cur - start) % len;
2312 if (ai32 == 0) goto no_change;
2313 len -= ai32;
2314 }
924ba076 2315 /* FALLTHROUGH */
a6ec74c1 2316 case 'x':
f337b084 2317 goto grow;
a6ec74c1
JH
2318 case 'A':
2319 case 'Z':
f337b084 2320 case 'a': {
f7fe979e 2321 const char *aptr;
f337b084 2322
a6ec74c1 2323 fromstr = NEXTFROM;
e62f0680 2324 aptr = SvPV_const(fromstr, fromlen);
f337b084 2325 if (DO_UTF8(fromstr)) {
f7fe979e 2326 const char *end, *s;
f337b084
TH
2327
2328 if (!utf8 && !SvUTF8(cat)) {
2329 marked_upgrade(aTHX_ cat, symptr);
2330 lookahead.flags |= FLAG_DO_UTF8;
2331 lookahead.strbeg = symptr->strbeg;
2332 utf8 = 1;
2333 start = SvPVX(cat);
2334 cur = start + SvCUR(cat);
2335 }
fc241834 2336 if (howlen == e_star) {
f337b084
TH
2337 if (utf8) goto string_copy;
2338 len = fromlen+1;
2339 }
2340 s = aptr;
2341 end = aptr + fromlen;
2342 fromlen = datumtype == 'Z' ? len-1 : len;
2343 while ((I32) fromlen > 0 && s < end) {
2344 s += UTF8SKIP(s);
2345 fromlen--;
2346 }
2347 if (s > end)
2348 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2349 if (utf8) {
fc241834 2350 len = fromlen;
f337b084
TH
2351 if (datumtype == 'Z') len++;
2352 fromlen = s-aptr;
2353 len += fromlen;
fc241834 2354
f337b084 2355 goto string_copy;
fc241834 2356 }
f337b084
TH
2357 fromlen = len - fromlen;
2358 if (datumtype == 'Z') fromlen--;
2359 if (howlen == e_star) {
2360 len = fromlen;
2361 if (datumtype == 'Z') len++;
fc241834 2362 }
f337b084 2363 GROWING(0, cat, start, cur, len);
9df874cd 2364 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2365 datumtype | TYPE_IS_PACK))
5637ef5b 2366 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
147e3846 2367 "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
5637ef5b 2368 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2369 cur += fromlen;
a6ec74c1 2370 len -= fromlen;
f337b084
TH
2371 } else if (utf8) {
2372 if (howlen == e_star) {
2373 len = fromlen;
2374 if (datumtype == 'Z') len++;
a6ec74c1 2375 }
f337b084
TH
2376 if (len <= (I32) fromlen) {
2377 fromlen = len;
2378 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2379 }
fc241834 2380 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2381 upgrade, so:
2382 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2383 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2384 len -= fromlen;
2385 while (fromlen > 0) {
230e1fce 2386 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2387 aptr++;
2388 fromlen--;
fc241834 2389 }
f337b084
TH
2390 } else {
2391 string_copy:
2392 if (howlen == e_star) {
2393 len = fromlen;
2394 if (datumtype == 'Z') len++;
2395 }
2396 if (len <= (I32) fromlen) {
2397 fromlen = len;
2398 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2399 }
f337b084
TH
2400 GROWING(0, cat, start, cur, len);
2401 Copy(aptr, cur, fromlen, char);
2402 cur += fromlen;
2403 len -= fromlen;
a6ec74c1 2404 }
f337b084
TH
2405 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2406 cur += len;
3c4fb04a 2407 SvTAINT(cat);
a6ec74c1 2408 break;
f337b084 2409 }
a6ec74c1 2410 case 'B':
f337b084 2411 case 'b': {
b83604b4 2412 const char *str, *end;
f337b084
TH
2413 I32 l, field_len;
2414 U8 bits;
2415 bool utf8_source;
2416 U32 utf8_flags;
a6ec74c1 2417
fc241834 2418 fromstr = NEXTFROM;
b83604b4 2419 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2420 end = str + fromlen;
2421 if (DO_UTF8(fromstr)) {
2422 utf8_source = TRUE;
041457d9 2423 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2424 } else {
2425 utf8_source = FALSE;
2426 utf8_flags = 0; /* Unused, but keep compilers happy */
2427 }
2428 if (howlen == e_star) len = fromlen;
2429 field_len = (len+7)/8;
2430 GROWING(utf8, cat, start, cur, field_len);
2431 if (len > (I32)fromlen) len = fromlen;
2432 bits = 0;
2433 l = 0;
2434 if (datumtype == 'B')
2435 while (l++ < len) {
2436 if (utf8_source) {
95b63a38 2437 UV val = 0;
f337b084
TH
2438 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2439 bits |= val & 1;
2440 } else bits |= *str++ & 1;
2441 if (l & 7) bits <<= 1;
fc241834 2442 else {
f337b084
TH
2443 PUSH_BYTE(utf8, cur, bits);
2444 bits = 0;
a6ec74c1
JH
2445 }
2446 }
f337b084
TH
2447 else
2448 /* datumtype == 'b' */
2449 while (l++ < len) {
2450 if (utf8_source) {
95b63a38 2451 UV val = 0;
f337b084
TH
2452 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2453 if (val & 1) bits |= 0x80;
2454 } else if (*str++ & 1)
2455 bits |= 0x80;
2456 if (l & 7) bits >>= 1;
fc241834 2457 else {
f337b084
TH
2458 PUSH_BYTE(utf8, cur, bits);
2459 bits = 0;
a6ec74c1
JH
2460 }
2461 }
f337b084
TH
2462 l--;
2463 if (l & 7) {
fc241834 2464 if (datumtype == 'B')
f337b084 2465 bits <<= 7 - (l & 7);
fc241834 2466 else
f337b084
TH
2467 bits >>= 7 - (l & 7);
2468 PUSH_BYTE(utf8, cur, bits);
2469 l += 7;
a6ec74c1 2470 }
f337b084
TH
2471 /* Determine how many chars are left in the requested field */
2472 l /= 8;
2473 if (howlen == e_star) field_len = 0;
2474 else field_len -= l;
2475 Zero(cur, field_len, char);
2476 cur += field_len;
a6ec74c1 2477 break;
f337b084 2478 }
a6ec74c1 2479 case 'H':
f337b084 2480 case 'h': {
10516c54 2481 const char *str, *end;
f337b084
TH
2482 I32 l, field_len;
2483 U8 bits;
2484 bool utf8_source;
2485 U32 utf8_flags;
a6ec74c1 2486
fc241834 2487 fromstr = NEXTFROM;
10516c54 2488 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2489 end = str + fromlen;
2490 if (DO_UTF8(fromstr)) {
2491 utf8_source = TRUE;
041457d9 2492 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2493 } else {
2494 utf8_source = FALSE;
2495 utf8_flags = 0; /* Unused, but keep compilers happy */
2496 }
2497 if (howlen == e_star) len = fromlen;
2498 field_len = (len+1)/2;
2499 GROWING(utf8, cat, start, cur, field_len);
0403a1ad 2500 if (!utf8_source && len > (I32)fromlen) len = fromlen;
f337b084
TH
2501 bits = 0;
2502 l = 0;
2503 if (datumtype == 'H')
2504 while (l++ < len) {
2505 if (utf8_source) {
95b63a38 2506 UV val = 0;
f337b084
TH
2507 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2508 if (val < 256 && isALPHA(val))
2509 bits |= (val + 9) & 0xf;
a6ec74c1 2510 else
f337b084
TH
2511 bits |= val & 0xf;
2512 } else if (isALPHA(*str))
2513 bits |= (*str++ + 9) & 0xf;
2514 else
2515 bits |= *str++ & 0xf;
2516 if (l & 1) bits <<= 4;
fc241834 2517 else {
f337b084
TH
2518 PUSH_BYTE(utf8, cur, bits);
2519 bits = 0;
a6ec74c1
JH
2520 }
2521 }
f337b084
TH
2522 else
2523 while (l++ < len) {
2524 if (utf8_source) {
95b63a38 2525 UV val = 0;
f337b084
TH
2526 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2527 if (val < 256 && isALPHA(val))
2528 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2529 else
f337b084
TH
2530 bits |= (val & 0xf) << 4;
2531 } else if (isALPHA(*str))
2532 bits |= ((*str++ + 9) & 0xf) << 4;
2533 else
2534 bits |= (*str++ & 0xf) << 4;
2535 if (l & 1) bits >>= 4;
fc241834 2536 else {
f337b084
TH
2537 PUSH_BYTE(utf8, cur, bits);
2538 bits = 0;
a6ec74c1 2539 }
fc241834 2540 }
f337b084
TH
2541 l--;
2542 if (l & 1) {
2543 PUSH_BYTE(utf8, cur, bits);
2544 l++;
2545 }
2546 /* Determine how many chars are left in the requested field */
2547 l /= 2;
2548 if (howlen == e_star) field_len = 0;
2549 else field_len -= l;
2550 Zero(cur, field_len, char);
2551 cur += field_len;
2552 break;
fc241834
RGS
2553 }
2554 case 'c':
f337b084
TH
2555 while (len-- > 0) {
2556 IV aiv;
2557 fromstr = NEXTFROM;
354b74ae 2558 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2559 if ((-128 > aiv || aiv > 127))
2560 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2561 "Character in 'c' format wrapped in pack");
585ec06d 2562 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2563 }
2564 break;
2565 case 'C':
f337b084
TH
2566 if (len == 0) {
2567 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2568 break;
2569 }
a6ec74c1 2570 while (len-- > 0) {
f337b084 2571 IV aiv;
a6ec74c1 2572 fromstr = NEXTFROM;
354b74ae 2573 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2574 if ((0 > aiv || aiv > 0xff))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'C' format wrapped in pack");
1651fc44 2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2578 }
fc241834
RGS
2579 break;
2580 case 'W': {
2581 char *end;
670f1322 2582 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2583
2584 end = start+SvLEN(cat)-1;
2585 if (utf8) end -= UTF8_MAXLEN-1;
2586 while (len-- > 0) {
2587 UV auv;
2588 fromstr = NEXTFROM;
354b74ae 2589 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2590 if (in_bytes) auv = auv % 0x100;
2591 if (utf8) {
2592 W_utf8:
bf4a926a 2593 if (cur >= end) {
fc241834 2594 *cur = '\0';
b162af07 2595 SvCUR_set(cat, cur - start);
fc241834
RGS
2596
2597 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2598 end = start+SvLEN(cat)-UTF8_MAXLEN;
2599 }
05fefba9 2600 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2601 } else {
2602 if (auv >= 0x100) {
2603 if (!SvUTF8(cat)) {
2604 *cur = '\0';
b162af07 2605 SvCUR_set(cat, cur - start);
fc241834
RGS
2606 marked_upgrade(aTHX_ cat, symptr);
2607 lookahead.flags |= FLAG_DO_UTF8;
2608 lookahead.strbeg = symptr->strbeg;
2609 utf8 = 1;
2610 start = SvPVX(cat);
2611 cur = start + SvCUR(cat);
2612 end = start+SvLEN(cat)-UTF8_MAXLEN;
2613 goto W_utf8;
2614 }
a2a5de95
NC
2615 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2616 "Character in 'W' format wrapped in pack");
fc241834
RGS
2617 auv &= 0xff;
2618 }
2619 if (cur >= end) {
2620 *cur = '\0';
b162af07 2621 SvCUR_set(cat, cur - start);
fc241834
RGS
2622 GROWING(0, cat, start, cur, len+1);
2623 end = start+SvLEN(cat)-1;
2624 }
fe2774ed 2625 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2626 }
2627 }
2628 break;
fc241834
RGS
2629 }
2630 case 'U': {
2631 char *end;
2632
2633 if (len == 0) {
2634 if (!(symptr->flags & FLAG_DO_UTF8)) {
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2638 }
2639 utf8 = 0;
2640 goto no_change;
2641 }
2642
2643 end = start+SvLEN(cat);
2644 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2645 while (len-- > 0) {
fc241834 2646 UV auv;
a6ec74c1 2647 fromstr = NEXTFROM;
354b74ae 2648 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2649 if (utf8) {
230e1fce 2650 U8 buffer[UTF8_MAXLEN], *endb;
05fefba9 2651 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2652 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2653 *cur = '\0';
b162af07 2654 SvCUR_set(cat, cur - start);
fc241834
RGS
2655 GROWING(0, cat, start, cur,
2656 len+(endb-buffer)*UTF8_EXPAND);
2657 end = start+SvLEN(cat);
2658 }
d21d7215 2659 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2660 } else {
2661 if (cur >= end) {
2662 *cur = '\0';
b162af07 2663 SvCUR_set(cat, cur - start);
fc241834
RGS
2664 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2665 end = start+SvLEN(cat)-UTF8_MAXLEN;
2666 }
05fefba9
KW
2667 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2668 UNI_TO_NATIVE(auv),
2669 0);
fc241834 2670 }
a6ec74c1 2671 }
a6ec74c1 2672 break;
fc241834 2673 }
a6ec74c1
JH
2674 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2675 case 'f':
a6ec74c1 2676 while (len-- > 0) {
f337b084
TH
2677 float afloat;
2678 NV anv;
a6ec74c1 2679 fromstr = NEXTFROM;
f337b084 2680 anv = SvNV(fromstr);
a7157111 2681# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2682 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2683 * on Alpha; fake it if we don't have them.
2684 */
f337b084 2685 if (anv > FLT_MAX)
fc241834 2686 afloat = FLT_MAX;
f337b084 2687 else if (anv < -FLT_MAX)
fc241834 2688 afloat = -FLT_MAX;
f337b084 2689 else afloat = (float)anv;
baf3cf9c 2690# else
a7157111 2691# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2692 if(Perl_isnan(anv))
2693 afloat = (float)NV_NAN;
2694 else
a7157111
JH
2695# endif
2696# ifdef NV_INF
919894b7
DM
2697 /* a simple cast to float is undefined if outside
2698 * the range of values that can be represented */
2699 afloat = (float)(anv > FLT_MAX ? NV_INF :
2700 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2701# endif
baf3cf9c 2702# endif
3a88beaa 2703 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2704 }
2705 break;
2706 case 'd':
a6ec74c1 2707 while (len-- > 0) {
f337b084
TH
2708 double adouble;
2709 NV anv;
a6ec74c1 2710 fromstr = NEXTFROM;
f337b084 2711 anv = SvNV(fromstr);
a7157111 2712# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2713 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2714 * on Alpha; fake it if we don't have them.
2715 */
f337b084 2716 if (anv > DBL_MAX)
fc241834 2717 adouble = DBL_MAX;
f337b084 2718 else if (anv < -DBL_MAX)
fc241834 2719 adouble = -DBL_MAX;
f337b084 2720 else adouble = (double)anv;
baf3cf9c 2721# else
f337b084 2722 adouble = (double)anv;
baf3cf9c 2723# endif
3a88beaa 2724 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2725 }
2726 break;
fc241834 2727 case 'F': {
275663fa 2728 NV_bytes anv;
1109a392 2729 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2730 while (len-- > 0) {
2731 fromstr = NEXTFROM;
cd07c537
DM
2732#ifdef __GNUC__
2733 /* to work round a gcc/x86 bug; don't use SvNV */
2734 anv.nv = sv_2nv(fromstr);
070e2677
TC
2735# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2736 && LONG_DOUBLESIZE > 10
2737 /* GCC sometimes overwrites the padding in the
2738 assignment above */
2739 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2740# endif
cd07c537 2741#else
275663fa 2742 anv.nv = SvNV(fromstr);
cd07c537 2743#endif
3a88beaa 2744 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2745 }
2746 break;
fc241834 2747 }
92d41999 2748#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2749 case 'D': {
275663fa 2750 ld_bytes aldouble;
1109a392
MHM
2751 /* long doubles can have unused bits, which may be nonzero */
2752 Zero(&aldouble, 1, long double);
92d41999
JH
2753 while (len-- > 0) {
2754 fromstr = NEXTFROM;
cd07c537
DM
2755# ifdef __GNUC__
2756 /* to work round a gcc/x86 bug; don't use SvNV */
2757 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2758# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2759 /* GCC sometimes overwrites the padding in the
2760 assignment above */
2761 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2762# endif
cd07c537 2763# else
275663fa 2764 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2765# endif
3a88beaa
NC
2766 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2767 needs_swap);
92d41999
JH
2768 }
2769 break;
fc241834 2770 }
92d41999 2771#endif
068bd2e7 2772 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2773 case 'n':
2774 while (len-- > 0) {
f337b084 2775 I16 ai16;
a6ec74c1 2776 fromstr = NEXTFROM;
354b74ae 2777 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2778 ai16 = PerlSock_htons(ai16);
3a88beaa 2779 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2780 }
2781 break;
068bd2e7 2782 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2783 case 'v':
2784 while (len-- > 0) {
f337b084 2785 I16 ai16;
a6ec74c1 2786 fromstr = NEXTFROM;
354b74ae 2787 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2788 ai16 = htovs(ai16);
3a88beaa 2789 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2790 }
2791 break;
49704364 2792 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2793#if SHORTSIZE != SIZE16
fc241834 2794 while (len-- > 0) {
f337b084 2795 unsigned short aushort;
fc241834 2796 fromstr = NEXTFROM;
354b74ae 2797 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2798 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2799 }
49704364
WL
2800 break;
2801#else
924ba076 2802 /* FALLTHROUGH */
a6ec74c1 2803#endif
49704364 2804 case 'S':
fc241834 2805 while (len-- > 0) {
f337b084 2806 U16 au16;
fc241834 2807 fromstr = NEXTFROM;
354b74ae 2808 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2809 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2810 }
2811 break;
49704364 2812 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2813#if SHORTSIZE != SIZE16
fc241834 2814 while (len-- > 0) {
f337b084 2815 short ashort;
fc241834 2816 fromstr = NEXTFROM;
354b74ae 2817 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2818 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2819 }
49704364
WL
2820 break;
2821#else
924ba076 2822 /* FALLTHROUGH */
a6ec74c1 2823#endif
49704364
WL
2824 case 's':
2825 while (len-- > 0) {
f337b084 2826 I16 ai16;
49704364 2827 fromstr = NEXTFROM;
354b74ae 2828 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2829 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2830 }
2831 break;
2832 case 'I':
49704364 2833 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2834 while (len-- > 0) {
f337b084 2835 unsigned int auint;
a6ec74c1 2836 fromstr = NEXTFROM;
354b74ae 2837 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2838 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2839 }
2840 break;
92d41999
JH
2841 case 'j':
2842 while (len-- > 0) {
f337b084 2843 IV aiv;
92d41999 2844 fromstr = NEXTFROM;
354b74ae 2845 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2846 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2847 }
2848 break;
2849 case 'J':
2850 while (len-- > 0) {
f337b084 2851 UV auv;
92d41999 2852 fromstr = NEXTFROM;
354b74ae 2853 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2854 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2855 }
2856 break;
a6ec74c1
JH
2857 case 'w':
2858 while (len-- > 0) {
f337b084 2859 NV anv;
a6ec74c1 2860 fromstr = NEXTFROM;
83388ae2 2861 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2862 anv = SvNV_nomg(fromstr);
a6ec74c1 2863
f337b084
TH
2864 if (anv < 0) {
2865 *cur = '\0';
b162af07 2866 SvCUR_set(cat, cur - start);
49704364 2867 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2868 }
a6ec74c1 2869
196b62db
NC
2870 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2871 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2872 any negative IVs will have already been got by the croak()
2873 above. IOK is untrue for fractions, so we test them
2874 against UV_MAX_P1. */
f337b084
TH
2875 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2876 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2877 char *in = buf + sizeof(buf);
1e9a122e 2878 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2879
2880 do {
eb160463 2881 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2882 auv >>= 7;
2883 } while (auv);
2884 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2885 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2886 in, (buf + sizeof(buf)) - in);
2887 } else if (SvPOKp(fromstr))
2888 goto w_string;
a6ec74c1 2889 else if (SvNOKp(fromstr)) {
0258719b 2890 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2891 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2892 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2893 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2894 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2895 Some C compilers are strict about integral constant
2896 expressions so we conservatively divide by a slightly
2897 smaller integer instead of multiplying by the exact
2898 floating-point value.
0258719b
NC
2899 */
2900#ifdef NV_MAX_10_EXP
f337b084 2901 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2902 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2903#else
f337b084 2904 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2905 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2906#endif
a6ec74c1
JH
2907 char *in = buf + sizeof(buf);
2908
8b6e33c7 2909 anv = Perl_floor(anv);
a6ec74c1 2910 do {
8b6e33c7 2911 const NV next = Perl_floor(anv / 128);
a6ec74c1 2912 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2913 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2914 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2915 anv = next;
2916 } while (anv > 0);
a6ec74c1 2917 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2918 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2919 in, (buf + sizeof(buf)) - in);
2920 } else {
8b6e33c7
AL
2921 const char *from;
2922 char *result, *in;
735b914b
JH
2923 SV *norm;
2924 STRLEN len;
2925 bool done;
2926
f337b084 2927 w_string:
735b914b 2928 /* Copy string and check for compliance */
1e9a122e 2929 from = SvPV_nomg_const(fromstr, len);
735b914b 2930 if ((norm = is_an_int(from, len)) == NULL)
49704364 2931 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2932
a02a5408 2933 Newx(result, len, char);
735b914b
JH
2934 in = result + len;
2935 done = FALSE;
f337b084 2936 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2937 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2938 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2939 in, (result + len) - in);
735b914b
JH
2940 Safefree(result);
2941 SvREFCNT_dec(norm); /* free norm */
fc241834 2942 }
a6ec74c1
JH
2943 }
2944 break;
2945 case 'i':
49704364 2946 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2947 while (len-- > 0) {
f337b084 2948 int aint;
a6ec74c1 2949 fromstr = NEXTFROM;
354b74ae 2950 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2951 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2952 }
2953 break;
068bd2e7 2954 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2955 case 'N':
2956 while (len-- > 0) {
f337b084 2957 U32 au32;
a6ec74c1 2958 fromstr = NEXTFROM;
354b74ae 2959 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2960 au32 = PerlSock_htonl(au32);
3a88beaa 2961 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2962 }
2963 break;
068bd2e7 2964 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2965 case 'V':
2966 while (len-- > 0) {
f337b084 2967 U32 au32;
a6ec74c1 2968 fromstr = NEXTFROM;
354b74ae 2969 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2970 au32 = htovl(au32);
3a88beaa 2971 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2972 }
2973 break;
49704364 2974 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2975#if LONGSIZE != SIZE32
fc241834 2976 while (len-- > 0) {
f337b084 2977 unsigned long aulong;
fc241834 2978 fromstr = NEXTFROM;
354b74ae 2979 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2980 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2981 }
49704364
WL
2982 break;
2983#else
2984 /* Fall though! */
a6ec74c1 2985#endif
49704364 2986 case 'L':
fc241834 2987 while (len-- > 0) {
f337b084 2988 U32 au32;
fc241834 2989 fromstr = NEXTFROM;
354b74ae 2990 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2991 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2992 }
2993 break;
49704364 2994 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2995#if LONGSIZE != SIZE32
fc241834 2996 while (len-- > 0) {
f337b084 2997 long along;
fc241834 2998 fromstr = NEXTFROM;
354b74ae 2999 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3000 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3001 }
49704364
WL
3002 break;
3003#else
3004 /* Fall though! */
a6ec74c1 3005#endif
49704364
WL
3006 case 'l':
3007 while (len-- > 0) {
f337b084 3008 I32 ai32;
49704364 3009 fromstr = NEXTFROM;
354b74ae 3010 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3011 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3012 }
3013 break;
c174bf3b 3014#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3015 case 'Q':
3016 while (len-- > 0) {
f337b084 3017 Uquad_t auquad;
a6ec74c1 3018 fromstr = NEXTFROM;
354b74ae 3019 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3020 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3021 }
3022 break;
3023 case 'q':
3024 while (len-- > 0) {
f337b084 3025 Quad_t aquad;
a6ec74c1 3026 fromstr = NEXTFROM;
354b74ae 3027 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3028 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3029 }
3030 break;
1640b983 3031#endif
a6ec74c1
JH
3032 case 'P':
3033 len = 1; /* assume SV is correct length */
f337b084 3034 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3035 /* FALLTHROUGH */
a6ec74c1
JH
3036 case 'p':
3037 while (len-- > 0) {
83003860 3038 const char *aptr;
f337b084 3039
a6ec74c1 3040 fromstr = NEXTFROM;
28a4f200
TH
3041 SvGETMAGIC(fromstr);
3042 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3043 else {
a6ec74c1
JH
3044 /* XXX better yet, could spirit away the string to
3045 * a safe spot and hang on to it until the result
3046 * of pack() (and all copies of the result) are
3047 * gone.
3048 */
b199a398
FC
3049 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3050 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3051 !SvREADONLY(fromstr)))) {
3052 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3053 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3054 }
3055 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3056 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3057 else
2596d9fe 3058 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3059 }
3a88beaa 3060 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3061 }
3062 break;
fc241834 3063 case 'u': {
f7fe979e 3064 const char *aptr, *aend;
fc241834 3065 bool from_utf8;
f337b084 3066
a6ec74c1 3067 fromstr = NEXTFROM;
fc241834
RGS
3068 if (len <= 2) len = 45;
3069 else len = len / 3 * 3;
3070 if (len >= 64) {
a2a5de95
NC
3071 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3072 "Field too wide in 'u' format in pack");
fc241834
RGS
3073 len = 63;
3074 }
83003860 3075 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3076 from_utf8 = DO_UTF8(fromstr);
3077 if (from_utf8) {
3078 aend = aptr + fromlen;
3f63b0e5 3079 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3080 } else aend = NULL; /* Unused, but keep compilers happy */
3081 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3082 while (fromlen > 0) {
fc241834 3083 U8 *end;
a6ec74c1 3084 I32 todo;
fc241834 3085 U8 hunk[1+63/3*4+1];
a6ec74c1 3086
eb160463 3087 if ((I32)fromlen > len)
a6ec74c1
JH
3088 todo = len;
3089 else
3090 todo = fromlen;
fc241834
RGS
3091 if (from_utf8) {
3092 char buffer[64];
9df874cd 3093 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3094 'u' | TYPE_IS_PACK)) {
3095 *cur = '\0';
b162af07 3096 SvCUR_set(cat, cur - start);
5637ef5b
NC
3097 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3098 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3099 aptr, aend, buffer, (long) todo);
fc241834 3100 }
e68aed92 3101 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3102 } else {
e68aed92 3103 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3104 aptr += todo;
3105 }
3a88beaa 3106 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3107 fromlen -= todo;
3108 }
a6ec74c1
JH
3109 break;
3110 }
f337b084
TH
3111 }
3112 *cur = '\0';
b162af07 3113 SvCUR_set(cat, cur - start);
f337b084 3114 no_change:
49704364 3115 *symptr = lookahead;
a6ec74c1 3116 }
49704364 3117 return beglist;
18529408
IZ
3118}
3119#undef NEXTFROM
3120
3121
3122PP(pp_pack)
3123{
20b7effb 3124 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3125 SV *cat = TARG;
18529408 3126 STRLEN fromlen;
349d4f2f 3127 SV *pat_sv = *++MARK;
eb578fdb
KW
3128 const char *pat = SvPV_const(pat_sv, fromlen);
3129 const char *patend = pat + fromlen;
18529408
IZ
3130
3131 MARK++;
e8f7c79f 3132 SvPVCLEAR(cat);
f337b084 3133 SvUTF8_off(cat);
18529408 3134
7accc089 3135 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3136
a6ec74c1
JH
3137 SvSETMAGIC(cat);
3138 SP = ORIGMARK;
3139 PUSHs(cat);
3140 RETURN;
3141}
a6ec74c1 3142
73cb7263 3143/*
14d04a33 3144 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3145 */