This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pp_pack.c: Remove needless branch
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
9df874cd 154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
LW
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
e6006968
KW
219#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 221
66c611c5 222/* type modifiers */
62f95557 223#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
224#define TYPE_IS_BIG_ENDIAN 0x200
225#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 226#define TYPE_IS_PACK 0x800
1109a392 227#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 228#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
229#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230
7212898e
NC
231# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233
234# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235
78d46eaa 236#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 237#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
238#define PACK_SIZE_MASK 0x3F
239
4ad176ef 240#include "packsizetables.inc"
78d46eaa 241
20aa3a38
NC
242static void
243S_reverse_copy(const char *src, char *dest, STRLEN len)
244{
245 dest += len;
246 while (len--)
247 *--dest = *src++;
248}
249
08ca2aa3 250STATIC U8
9df874cd 251utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 252{
08ca2aa3 253 STRLEN retlen;
df337714
KW
254 UV val;
255
256 if (*s >= end) {
257 goto croak;
258 }
259 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 260 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 261 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
262 warnings), but these two mean we make no progress in the string and
263 might enter an infinite loop */
df337714
KW
264 if (retlen == (STRLEN) -1)
265 croak:
f337b084
TH
266 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
267 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 268 if (val >= 0x100) {
a2a5de95
NC
269 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
270 "Character in '%c' format wrapped in unpack",
271 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
272 val &= 0xff;
273 }
274 *s += retlen;
fe2774ed 275 return (U8)val;
08ca2aa3
TH
276}
277
f337b084 278#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 279 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
280 *(U8 *)(s)++)
281
282STATIC bool
9df874cd 283S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
284{
285 UV val;
286 STRLEN retlen;
f7fe979e 287 const char *from = *s;
08ca2aa3 288 int bad = 0;
f7fe979e 289 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 290 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
291 const bool needs_swap = NEEDS_SWAP(datumtype);
292
228e69a7 293 if (UNLIKELY(needs_swap))
20aa3a38
NC
294 buf += buf_len;
295
08ca2aa3
TH
296 for (;buf_len > 0; buf_len--) {
297 if (from >= end) return FALSE;
f337b084 298 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 299 if (retlen == (STRLEN) -1) {
08ca2aa3
TH
300 from += UTF8SKIP(from);
301 bad |= 1;
302 } else from += retlen;
303 if (val >= 0x100) {
304 bad |= 2;
305 val &= 0xff;
306 }
228e69a7 307 if (UNLIKELY(needs_swap))
20aa3a38
NC
308 *(U8 *)--buf = (U8)val;
309 else
310 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
311 }
312 /* We have enough characters for the buffer. Did we have problems ? */
313 if (bad) {
314 if (bad & 1) {
315 /* Rewalk the string fragment while warning */
f7fe979e 316 const char *ptr;
9e27e96a 317 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
318 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
319 if (ptr >= end) break;
c80e42f3 320 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
321 }
322 if (from > end) from = end;
323 }
a2a5de95
NC
324 if ((bad & 2))
325 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 326 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
327 "Character(s) in '%c' format wrapped in %s",
328 (int) TYPE_NO_MODIFIERS(datumtype),
329 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
330 }
331 *s = from;
332 return TRUE;
333}
334
64844641 335STATIC char *
d21d7215
KW
336S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
337 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 338
228e69a7 339 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
340 const U8 *p = start + len;
341 while (p-- > start) {
55d09dc8 342 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
343 }
344 } else {
345 const U8 * const end = start + len;
346 while (start < end) {
55d09dc8 347 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
348 start++;
349 }
f337b084 350 }
64844641 351 return dest;
f337b084
TH
352}
353
3a88beaa 354#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 355STMT_START { \
228e69a7 356 if (UNLIKELY(utf8)) \
d21d7215 357 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 358 else { \
228e69a7 359 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
360 S_reverse_copy((char *)(buf), cur, len); \
361 else \
362 Copy(buf, cur, len, char); \
230e1fce
NC
363 (cur) += (len); \
364 } \
f337b084
TH
365} STMT_END
366
367#define GROWING(utf8, cat, start, cur, in_len) \
368STMT_START { \
369 STRLEN glen = (in_len); \
3473cf63 370 if (utf8) glen *= UTF8_EXPAND; \
f337b084 371 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 372 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
373 (cur) = (start) + SvCUR(cat); \
374 } \
375} STMT_END
376
377#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
378STMT_START { \
f7fe979e 379 const STRLEN glen = (in_len); \
f337b084 380 STRLEN gl = glen; \
3473cf63 381 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
382 if ((cur) + gl >= (start) + SvLEN(cat)) { \
383 *cur = '\0'; \
b162af07 384 SvCUR_set((cat), (cur) - (start)); \
0bd48802 385 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
386 (cur) = (start) + SvCUR(cat); \
387 } \
3a88beaa 388 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
389} STMT_END
390
391#define PUSH_BYTE(utf8, s, byte) \
392STMT_START { \
393 if (utf8) { \
f7fe979e 394 const U8 au8 = (byte); \
d21d7215 395 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
396 } else *(U8 *)(s)++ = (byte); \
397} STMT_END
398
399/* Only to be used inside a loop (see the break) */
400#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
401STMT_START { \
402 STRLEN retlen; \
403 if (str >= end) break; \
404 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 405 if (retlen == (STRLEN) -1) { \
f337b084
TH
406 *cur = '\0'; \
407 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
408 } \
409 str += retlen; \
410} STMT_END
411
f7fe979e
AL
412static const char *_action( const tempsym_t* symptr )
413{
10edeb5d 414 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
415}
416
206947d2 417/* Returns the sizeof() struct described by pat */
028d1f6d 418STATIC I32
f337b084 419S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 420{
f337b084 421 I32 total = 0;
206947d2 422
7918f24d
NC
423 PERL_ARGS_ASSERT_MEASURE_STRUCT;
424
49704364 425 while (next_symbol(symptr)) {
f337b084 426 I32 len;
f7fe979e 427 int size;
f337b084
TH
428
429 switch (symptr->howlen) {
fc241834 430 case e_star:
49704364 431 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 432 _action( symptr ) );
81d52ecd 433
f337b084
TH
434 default:
435 /* e_no_len and e_number */
436 len = symptr->length;
437 break;
49704364
LW
438 }
439
a7a3cfaa 440 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 441 if (!size) {
f7fe979e 442 int star;
80a13697
NC
443 /* endianness doesn't influence the size of a type */
444 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
445 default:
446 Perl_croak(aTHX_ "Invalid type '%c' in %s",
447 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 448 _action( symptr ) );
28be1210
TH
449 case '.' | TYPE_IS_SHRIEKING:
450 case '@' | TYPE_IS_SHRIEKING:
80a13697 451 case '@':
28be1210 452 case '.':
80a13697
NC
453 case '/':
454 case 'U': /* XXXX Is it correct? */
455 case 'w':
456 case 'u':
457 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 458 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 459 _action( symptr ) );
80a13697
NC
460 case '%':
461 size = 0;
462 break;
463 case '(':
fc241834
RGS
464 {
465 tempsym_t savsym = *symptr;
466 symptr->patptr = savsym.grpbeg;
467 symptr->patend = savsym.grpend;
468 /* XXXX Theoretically, we need to measure many times at
469 different positions, since the subexpression may contain
470 alignment commands, but be not of aligned length.
471 Need to detect this and croak(). */
472 size = measure_struct(symptr);
473 *symptr = savsym;
474 break;
475 }
80a13697
NC
476 case 'X' | TYPE_IS_SHRIEKING:
477 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
478 */
479 if (!len) /* Avoid division by 0 */
480 len = 1;
481 len = total % len; /* Assumed: the start is aligned. */
924ba076 482 /* FALLTHROUGH */
80a13697
NC
483 case 'X':
484 size = -1;
485 if (total < len)
f7fe979e 486 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
487 break;
488 case 'x' | TYPE_IS_SHRIEKING:
489 if (!len) /* Avoid division by 0 */
490 len = 1;
491 star = total % len; /* Assumed: the start is aligned. */
492 if (star) /* Other portable ways? */
493 len = len - star;
494 else
495 len = 0;
924ba076 496 /* FALLTHROUGH */
80a13697
NC
497 case 'x':
498 case 'A':
499 case 'Z':
500 case 'a':
80a13697
NC
501 size = 1;
502 break;
503 case 'B':
504 case 'b':
505 len = (len + 7)/8;
506 size = 1;
507 break;
508 case 'H':
509 case 'h':
510 len = (len + 1)/2;
511 size = 1;
512 break;
78d46eaa 513
80a13697
NC
514 case 'P':
515 len = 1;
516 size = sizeof(char*);
78d46eaa
NC
517 break;
518 }
206947d2
IZ
519 }
520 total += len * size;
521 }
522 return total;
523}
524
49704364
LW
525
526/* locate matching closing parenthesis or bracket
527 * returns char pointer to char after match, or NULL
528 */
f7fe979e 529STATIC const char *
5aaab254 530S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 531{
7918f24d
NC
532 PERL_ARGS_ASSERT_GROUP_END;
533
49704364 534 while (patptr < patend) {
f7fe979e 535 const char c = *patptr++;
49704364
LW
536
537 if (isSPACE(c))
538 continue;
539 else if (c == ender)
540 return patptr-1;
541 else if (c == '#') {
542 while (patptr < patend && *patptr != '\n')
543 patptr++;
544 continue;
545 } else if (c == '(')
546 patptr = group_end(patptr, patend, ')') + 1;
547 else if (c == '[')
548 patptr = group_end(patptr, patend, ']') + 1;
18529408 549 }
49704364
LW
550 Perl_croak(aTHX_ "No group ending character '%c' found in template",
551 ender);
a25b5927 552 NOT_REACHED; /* NOTREACHED */
18529408
IZ
553}
554
49704364
LW
555
556/* Convert unsigned decimal number to binary.
557 * Expects a pointer to the first digit and address of length variable
558 * Advances char pointer to 1st non-digit char and returns number
fc241834 559 */
f7fe979e 560STATIC const char *
5aaab254 561S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
562{
563 I32 len = *patptr++ - '0';
7918f24d
NC
564
565 PERL_ARGS_ASSERT_GET_NUM;
566
49704364
LW
567 while (isDIGIT(*patptr)) {
568 if (len >= 0x7FFFFFFF/10)
569 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
570 len = (len * 10) + (*patptr++ - '0');
571 }
572 *lenptr = len;
573 return patptr;
574}
575
576/* The marvellous template parsing routine: Using state stored in *symptr,
577 * locates next template code and count
578 */
579STATIC bool
f337b084 580S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 581{
f7fe979e 582 const char* patptr = symptr->patptr;
0bcc34c2 583 const char* const patend = symptr->patend;
49704364 584
7918f24d
NC
585 PERL_ARGS_ASSERT_NEXT_SYMBOL;
586
49704364
LW
587 symptr->flags &= ~FLAG_SLASH;
588
589 while (patptr < patend) {
590 if (isSPACE(*patptr))
591 patptr++;
592 else if (*patptr == '#') {
593 patptr++;
594 while (patptr < patend && *patptr != '\n')
595 patptr++;
596 if (patptr < patend)
597 patptr++;
598 } else {
fc241834 599 /* We should have found a template code */
49704364 600 I32 code = *patptr++ & 0xFF;
66c611c5 601 U32 inherited_modifiers = 0;
49704364
LW
602
603 if (code == ','){ /* grandfather in commas but with a warning */
604 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
605 symptr->flags |= FLAG_COMMA;
606 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 607 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
608 }
609 continue;
610 }
fc241834 611
49704364 612 /* for '(', skip to ')' */
fc241834 613 if (code == '(') {
49704364
LW
614 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
615 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 616 _action( symptr ) );
49704364
LW
617 symptr->grpbeg = patptr;
618 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
619 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
620 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 621 _action( symptr ) );
49704364
LW
622 }
623
66c611c5
MHM
624 /* look for group modifiers to inherit */
625 if (TYPE_ENDIANNESS(symptr->flags)) {
626 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
627 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
628 }
629
1109a392
MHM
630 /* look for modifiers */
631 while (patptr < patend) {
b7787f18
AL
632 const char *allowed;
633 I32 modifier;
1109a392
MHM
634 switch (*patptr) {
635 case '!':
636 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 637 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
638 break;
639 case '>':
640 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 641 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
642 break;
643 case '<':
644 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 645 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
646 break;
647 default:
b7787f18
AL
648 allowed = "";
649 modifier = 0;
1109a392
MHM
650 break;
651 }
66c611c5 652
1109a392
MHM
653 if (modifier == 0)
654 break;
66c611c5 655
1109a392
MHM
656 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
657 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 658 allowed, _action( symptr ) );
66c611c5
MHM
659
660 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 661 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 662 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
663 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
664 TYPE_ENDIANNESS_MASK)
665 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 666 *patptr, _action( symptr ) );
66c611c5 667
a2a5de95
NC
668 if ((code & modifier)) {
669 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
670 "Duplicate modifier '%c' after '%c' in %s",
671 *patptr, (int) TYPE_NO_MODIFIERS(code),
672 _action( symptr ) );
1109a392 673 }
66c611c5 674
1109a392
MHM
675 code |= modifier;
676 patptr++;
49704364
LW
677 }
678
66c611c5
MHM
679 /* inherit modifiers */
680 code |= inherited_modifiers;
681
fc241834 682 /* look for count and/or / */
49704364
LW
683 if (patptr < patend) {
684 if (isDIGIT(*patptr)) {
685 patptr = get_num( patptr, &symptr->length );
686 symptr->howlen = e_number;
687
688 } else if (*patptr == '*') {
689 patptr++;
690 symptr->howlen = e_star;
691
692 } else if (*patptr == '[') {
f7fe979e 693 const char* lenptr = ++patptr;
49704364
LW
694 symptr->howlen = e_number;
695 patptr = group_end( patptr, patend, ']' ) + 1;
696 /* what kind of [] is it? */
697 if (isDIGIT(*lenptr)) {
698 lenptr = get_num( lenptr, &symptr->length );
699 if( *lenptr != ']' )
700 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 701 _action( symptr ) );
49704364
LW
702 } else {
703 tempsym_t savsym = *symptr;
704 symptr->patend = patptr-1;
705 symptr->patptr = lenptr;
706 savsym.length = measure_struct(symptr);
707 *symptr = savsym;
708 }
709 } else {
710 symptr->howlen = e_no_len;
711 symptr->length = 1;
712 }
713
714 /* try to find / */
715 while (patptr < patend) {
716 if (isSPACE(*patptr))
717 patptr++;
718 else if (*patptr == '#') {
719 patptr++;
720 while (patptr < patend && *patptr != '\n')
721 patptr++;
722 if (patptr < patend)
723 patptr++;
724 } else {
66c611c5 725 if (*patptr == '/') {
49704364
LW
726 symptr->flags |= FLAG_SLASH;
727 patptr++;
66c611c5
MHM
728 if (patptr < patend &&
729 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 730 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 731 _action( symptr ) );
49704364
LW
732 }
733 break;
734 }
18529408 735 }
49704364
LW
736 } else {
737 /* at end - no count, no / */
738 symptr->howlen = e_no_len;
739 symptr->length = 1;
740 }
741
742 symptr->code = code;
fc241834 743 symptr->patptr = patptr;
49704364 744 return TRUE;
18529408 745 }
49704364 746 }
fc241834 747 symptr->patptr = patptr;
49704364 748 return FALSE;
18529408
IZ
749}
750
18529408 751/*
fc241834 752 There is no way to cleanly handle the case where we should process the
08ca2aa3 753 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
754 (e.g. estimates like strend-s as an upper bound for the number of
755 characters left wouldn't work). So if we foresee the need of this
756 (pattern starts with U or contains U0), we want to work on the encoded
757 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
758 themselves if they need to do a lot of unpacks like this on it
759*/
fc241834 760STATIC bool
08ca2aa3
TH
761need_utf8(const char *pat, const char *patend)
762{
763 bool first = TRUE;
7918f24d
NC
764
765 PERL_ARGS_ASSERT_NEED_UTF8;
766
08ca2aa3
TH
767 while (pat < patend) {
768 if (pat[0] == '#') {
769 pat++;
f7fe979e 770 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
771 if (!pat) return FALSE;
772 } else if (pat[0] == 'U') {
773 if (first || pat[1] == '0') return TRUE;
774 } else first = FALSE;
775 pat++;
776 }
777 return FALSE;
778}
779
780STATIC char
781first_symbol(const char *pat, const char *patend) {
7918f24d
NC
782 PERL_ARGS_ASSERT_FIRST_SYMBOL;
783
08ca2aa3
TH
784 while (pat < patend) {
785 if (pat[0] != '#') return pat[0];
786 pat++;
f7fe979e 787 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
788 if (!pat) return 0;
789 pat++;
790 }
791 return 0;
792}
793
794/*
dcccc8ff
KW
795
796=head1 Pack and Unpack
797
7accc089
JH
798=for apidoc unpackstring
799
796b6530 800The engine implementing the C<unpack()> Perl function.
21ebfc7a 801
796b6530
KW
802Using the template C<pat..patend>, this function unpacks the string
803C<s..strend> into a number of mortal SVs, which it pushes onto the perl
804argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 805C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
806pushed elements.
807
796b6530
KW
808The C<strend> and C<patend> pointers should point to the byte following the
809last character of each string.
21ebfc7a
DM
810
811Although this function returns its values on the perl argument stack, it
812doesn't take any parameters from that stack (and thus in particular
796b6530 813there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 814example).
7accc089
JH
815
816=cut */
817
818I32
f7fe979e 819Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 820{
f7fe979e 821 tempsym_t sym;
08ca2aa3 822
7918f24d
NC
823 PERL_ARGS_ASSERT_UNPACKSTRING;
824
f337b084 825 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
826 else if (need_utf8(pat, patend)) {
827 /* We probably should try to avoid this in case a scalar context call
828 wouldn't get to the "U0" */
829 STRLEN len = strend - s;
230e1fce 830 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
831 SAVEFREEPV(s);
832 strend = s + len;
f337b084 833 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
834 }
835
f337b084
TH
836 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
837 flags |= FLAG_PARSE_UTF8;
08ca2aa3 838
f7fe979e 839 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
840
841 return unpack_rec(&sym, s, s, strend, NULL );
842}
843
4136a0f7 844STATIC I32
f7fe979e 845S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 846{
20b7effb 847 dSP;
3297d27d 848 SV *sv = NULL;
f7fe979e 849 const I32 start_sp_offset = SP - PL_stack_base;
49704364 850 howlen_t howlen;
a6ec74c1 851 I32 checksum = 0;
92d41999 852 UV cuv = 0;
a6ec74c1 853 NV cdouble = 0.0;
f337b084 854 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 855 bool beyond = FALSE;
21c16052 856 bool explicit_length;
9e27e96a 857 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 858 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
859
860 PERL_ARGS_ASSERT_UNPACK_REC;
861
28be1210 862 symptr->strbeg = s - strbeg;
49704364 863
49704364 864 while (next_symbol(symptr)) {
a7a3cfaa 865 packprops_t props;
9e27e96a 866 I32 len;
f337b084 867 I32 datumtype = symptr->code;
a1219b5e 868 bool needs_swap;
206947d2 869 /* do first one only unless in list context
08ca2aa3 870 / is implemented by unpacking the count, then popping it from the
206947d2 871 stack, so must check that we're not in the middle of a / */
49704364 872 if ( unpack_only_one
206947d2 873 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 874 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 875 break;
49704364 876
f337b084 877 switch (howlen = symptr->howlen) {
fc241834
RGS
878 case e_star:
879 len = strend - strbeg; /* long enough */
49704364 880 break;
f337b084
TH
881 default:
882 /* e_no_len and e_number */
883 len = symptr->length;
884 break;
49704364 885 }
18529408 886
21c16052 887 explicit_length = TRUE;
a6ec74c1 888 redo_switch:
49704364 889 beyond = s >= strend;
a7a3cfaa
TH
890
891 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
892 if (props) {
893 /* props nonzero means we can process this letter. */
9e27e96a
AL
894 const long size = props & PACK_SIZE_MASK;
895 const long howmany = (strend - s) / size;
a7a3cfaa
TH
896 if (len > howmany)
897 len = howmany;
898
899 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
900 if (len && unpack_only_one) len = 1;
901 EXTEND(SP, len);
902 EXTEND_MORTAL(len);
78d46eaa
NC
903 }
904 }
a7a3cfaa 905
a1219b5e
NC
906 needs_swap = NEEDS_SWAP(datumtype);
907
1109a392 908 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 909 default:
1109a392 910 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 911
a6ec74c1 912 case '%':
49704364 913 if (howlen == e_no_len)
18529408 914 len = 16; /* len is not specified */
a6ec74c1 915 checksum = len;
92d41999 916 cuv = 0;
a6ec74c1 917 cdouble = 0;
18529408 918 continue;
81d52ecd 919
18529408
IZ
920 case '(':
921 {
49704364 922 tempsym_t savsym = *symptr;
9e27e96a 923 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 924 symptr->flags |= group_modifiers;
49704364 925 symptr->patend = savsym.grpend;
28be1210 926 symptr->previous = &savsym;
49704364 927 symptr->level++;
18529408 928 PUTBACK;
c6f750d1 929 if (len && unpack_only_one) len = 1;
18529408 930 while (len--) {
49704364 931 symptr->patptr = savsym.grpbeg;
f337b084
TH
932 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
933 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
934 unpack_rec(symptr, s, strbeg, strend, &s);
935 if (s == strend && savsym.howlen == e_star)
49704364 936 break; /* No way to continue */
18529408
IZ
937 }
938 SPAGAIN;
28be1210 939 savsym.flags = symptr->flags & ~group_modifiers;
49704364 940 *symptr = savsym;
18529408
IZ
941 break;
942 }
28be1210 943 case '.' | TYPE_IS_SHRIEKING:
28be1210 944 case '.': {
9e27e96a 945 const char *from;
28be1210 946 SV *sv;
9e27e96a 947 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
948 if (howlen == e_star) from = strbeg;
949 else if (len <= 0) from = s;
950 else {
951 tempsym_t *group = symptr;
952
953 while (--len && group) group = group->previous;
954 from = group ? strbeg + group->strbeg : strbeg;
955 }
956 sv = from <= s ?
00646304
CB
957 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
958 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 959 mXPUSHs(sv);
28be1210
TH
960 break;
961 }
28be1210 962 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 963 case '@':
28be1210 964 s = strbeg + symptr->strbeg;
28be1210 965 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 966 {
08ca2aa3
TH
967 while (len > 0) {
968 if (s >= strend)
969 Perl_croak(aTHX_ "'@' outside of string in unpack");
970 s += UTF8SKIP(s);
971 len--;
972 }
973 if (s > strend)
974 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
975 } else {
28be1210 976 if (strend-s < len)
fc241834 977 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 978 s += len;
08ca2aa3 979 }
a6ec74c1 980 break;
62f95557
IZ
981 case 'X' | TYPE_IS_SHRIEKING:
982 if (!len) /* Avoid division by 0 */
983 len = 1;
08ca2aa3 984 if (utf8) {
f7fe979e 985 const char *hop, *last;
f337b084
TH
986 I32 l = len;
987 hop = last = strbeg;
988 while (hop < s) {
989 hop += UTF8SKIP(hop);
990 if (--l == 0) {
08ca2aa3 991 last = hop;
f337b084
TH
992 l = len;
993 }
fc241834 994 }
f337b084
TH
995 if (last > s)
996 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
997 s = last;
998 break;
f337b084
TH
999 }
1000 len = (s - strbeg) % len;
924ba076 1001 /* FALLTHROUGH */
a6ec74c1 1002 case 'X':
08ca2aa3
TH
1003 if (utf8) {
1004 while (len > 0) {
1005 if (s <= strbeg)
1006 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1007 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1008 if (s <= strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack");
1010 }
1011 len--;
1012 }
1013 } else {
fc241834
RGS
1014 if (len > s - strbeg)
1015 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1016 s -= len;
08ca2aa3 1017 }
a6ec74c1 1018 break;
9e27e96a
AL
1019 case 'x' | TYPE_IS_SHRIEKING: {
1020 I32 ai32;
62f95557
IZ
1021 if (!len) /* Avoid division by 0 */
1022 len = 1;
230e1fce
NC
1023 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1024 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1025 if (ai32 == 0) break;
1026 len -= ai32;
9e27e96a 1027 }
924ba076 1028 /* FALLTHROUGH */
a6ec74c1 1029 case 'x':
08ca2aa3
TH
1030 if (utf8) {
1031 while (len>0) {
1032 if (s >= strend)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1034 s += UTF8SKIP(s);
1035 len--;
1036 }
1037 } else {
fc241834
RGS
1038 if (len > strend - s)
1039 Perl_croak(aTHX_ "'x' outside of string in unpack");
1040 s += len;
f337b084 1041 }
a6ec74c1
JH
1042 break;
1043 case '/':
49704364 1044 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1045
a6ec74c1
JH
1046 case 'A':
1047 case 'Z':
1048 case 'a':
08ca2aa3
TH
1049 if (checksum) {
1050 /* Preliminary length estimate is assumed done in 'W' */
1051 if (len > strend - s) len = strend - s;
1052 goto W_checksum;
1053 }
1054 if (utf8) {
1055 I32 l;
f7fe979e 1056 const char *hop;
08ca2aa3
TH
1057 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1058 if (hop >= strend) {
1059 if (hop > strend)
1060 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1061 break;
fc241834 1062 }
a6ec74c1 1063 }
08ca2aa3
TH
1064 if (hop > strend)
1065 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1066 len = hop - s;
1067 } else if (len > strend - s)
1068 len = strend - s;
1069
1070 if (datumtype == 'Z') {
1071 /* 'Z' strips stuff after first null */
f7fe979e 1072 const char *ptr, *end;
f337b084
TH
1073 end = s + len;
1074 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1075 sv = newSVpvn(s, ptr-s);
1076 if (howlen == e_star) /* exact for 'Z*' */
1077 len = ptr-s + (ptr != strend ? 1 : 0);
1078 } else if (datumtype == 'A') {
1079 /* 'A' strips both nulls and spaces */
f7fe979e 1080 const char *ptr;
18bdf90a 1081 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1082 for (ptr = s+len-1; ptr >= s; ptr--) {
1083 if ( *ptr != 0
1084 && !UTF8_IS_CONTINUATION(*ptr)
1085 && !isSPACE_utf8_safe(ptr, strend))
1086 {
1087 break;
1088 }
1089 }
18bdf90a
TH
1090 if (ptr >= s) ptr += UTF8SKIP(ptr);
1091 else ptr++;
28be1210 1092 if (ptr > s+len)
18bdf90a
TH
1093 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1094 } else {
1095 for (ptr = s+len-1; ptr >= s; ptr--)
1096 if (*ptr != 0 && !isSPACE(*ptr)) break;
1097 ptr++;
1098 }
08ca2aa3
TH
1099 sv = newSVpvn(s, ptr-s);
1100 } else sv = newSVpvn(s, len);
1101
1102 if (utf8) {
1103 SvUTF8_on(sv);
1104 /* Undo any upgrade done due to need_utf8() */
f337b084 1105 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1106 sv_utf8_downgrade(sv, 0);
a6ec74c1 1107 }
6e449a3a 1108 mXPUSHs(sv);
08ca2aa3 1109 s += len;
a6ec74c1
JH
1110 break;
1111 case 'B':
08ca2aa3
TH
1112 case 'b': {
1113 char *str;
49704364 1114 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1115 len = (strend - s) * 8;
1116 if (checksum) {
f337b084 1117 if (utf8)
08ca2aa3 1118 while (len >= 8 && s < strend) {
9df874cd 1119 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1120 len -= 8;
1121 }
f337b084 1122 else
fc241834 1123 while (len >= 8) {
08ca2aa3 1124 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1125 len -= 8;
1126 }
08ca2aa3
TH
1127 if (len && s < strend) {
1128 U8 bits;
f337b084
TH
1129 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1130 if (datumtype == 'b')
a6ec74c1 1131 while (len-- > 0) {
92d41999 1132 if (bits & 1) cuv++;
a6ec74c1
JH
1133 bits >>= 1;
1134 }
f337b084 1135 else
a6ec74c1 1136 while (len-- > 0) {
08ca2aa3 1137 if (bits & 0x80) cuv++;
a6ec74c1
JH
1138 bits <<= 1;
1139 }
fc241834 1140 }
a6ec74c1
JH
1141 break;
1142 }
08ca2aa3 1143
561b68a9 1144 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1145 SvPOK_on(sv);
1146 str = SvPVX(sv);
1147 if (datumtype == 'b') {
f337b084 1148 U8 bits = 0;
f7fe979e 1149 const I32 ai32 = len;
08ca2aa3
TH
1150 for (len = 0; len < ai32; len++) {
1151 if (len & 7) bits >>= 1;
1152 else if (utf8) {
1153 if (s >= strend) break;
9df874cd 1154 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1155 } else bits = *(U8 *) s++;
1156 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1157 }
08ca2aa3 1158 } else {
f337b084 1159 U8 bits = 0;
f7fe979e 1160 const I32 ai32 = len;
08ca2aa3
TH
1161 for (len = 0; len < ai32; len++) {
1162 if (len & 7) bits <<= 1;
1163 else if (utf8) {
1164 if (s >= strend) break;
9df874cd 1165 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1166 } else bits = *(U8 *) s++;
1167 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1168 }
1169 }
1170 *str = '\0';
aa07b2f6 1171 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1172 XPUSHs(sv);
a6ec74c1 1173 break;
08ca2aa3 1174 }
a6ec74c1 1175 case 'H':
08ca2aa3 1176 case 'h': {
3297d27d 1177 char *str = NULL;
fc241834 1178 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1179 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1180 len = (strend - s) * 2;
858fe5e1
TC
1181 if (!checksum) {
1182 sv = sv_2mortal(newSV(len ? len : 1));
1183 SvPOK_on(sv);
1184 str = SvPVX(sv);
1185 }
a6ec74c1 1186 if (datumtype == 'h') {
f337b084 1187 U8 bits = 0;
9e27e96a 1188 I32 ai32 = len;
fc241834
RGS
1189 for (len = 0; len < ai32; len++) {
1190 if (len & 1) bits >>= 4;
1191 else if (utf8) {
1192 if (s >= strend) break;
9df874cd 1193 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1194 } else bits = * (U8 *) s++;
858fe5e1
TC
1195 if (!checksum)
1196 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1197 }
08ca2aa3 1198 } else {
f337b084 1199 U8 bits = 0;
f7fe979e 1200 const I32 ai32 = len;
08ca2aa3
TH
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits <<= 4;
1203 else if (utf8) {
1204 if (s >= strend) break;
9df874cd 1205 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1206 } else bits = *(U8 *) s++;
858fe5e1
TC
1207 if (!checksum)
1208 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1209 }
1210 }
858fe5e1
TC
1211 if (!checksum) {
1212 *str = '\0';
1213 SvCUR_set(sv, str - SvPVX_const(sv));
1214 XPUSHs(sv);
1215 }
a6ec74c1 1216 break;
08ca2aa3 1217 }
1651fc44
ML
1218 case 'C':
1219 if (len == 0) {
1220 if (explicit_length)
1221 /* Switch to "character" mode */
1222 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1223 break;
1224 }
924ba076 1225 /* FALLTHROUGH */
a6ec74c1 1226 case 'c':
1651fc44
ML
1227 while (len-- > 0 && s < strend) {
1228 int aint;
1229 if (utf8)
1230 {
1231 STRLEN retlen;
1232 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1233 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1234 if (retlen == (STRLEN) -1)
1651fc44
ML
1235 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1236 s += retlen;
1237 }
1238 else
1239 aint = *(U8 *)(s)++;
1240 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1241 aint -= 256;
08ca2aa3 1242 if (!checksum)
6e449a3a 1243 mPUSHi(aint);
73cb7263
NC
1244 else if (checksum > bits_in_uv)
1245 cdouble += (NV)aint;
1246 else
1247 cuv += aint;
a6ec74c1
JH
1248 }
1249 break;
08ca2aa3
TH
1250 case 'W':
1251 W_checksum:
1651fc44 1252 if (utf8) {
08ca2aa3 1253 while (len-- > 0 && s < strend) {
08ca2aa3 1254 STRLEN retlen;
f7fe979e 1255 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1257 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1258 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1259 s += retlen;
1260 if (!checksum)
6e449a3a 1261 mPUSHu(val);
08ca2aa3
TH
1262 else if (checksum > bits_in_uv)
1263 cdouble += (NV) val;
d6d3e8bd 1264 else
08ca2aa3 1265 cuv += val;
fc241834 1266 }
08ca2aa3 1267 } else if (!checksum)
a6ec74c1 1268 while (len-- > 0) {
f7fe979e 1269 const U8 ch = *(U8 *) s++;
6e449a3a 1270 mPUSHu(ch);
a6ec74c1 1271 }
08ca2aa3
TH
1272 else if (checksum > bits_in_uv)
1273 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1274 else
1275 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1276 break;
1277 case 'U':
35bcd338 1278 if (len == 0) {
c5333953 1279 if (explicit_length && howlen != e_star) {
08ca2aa3 1280 /* Switch to "bytes in UTF-8" mode */
f337b084 1281 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1282 else
1283 /* Should be impossible due to the need_utf8() test */
1284 Perl_croak(aTHX_ "U0 mode on a byte string");
1285 }
35bcd338
JH
1286 break;
1287 }
08ca2aa3 1288 if (len > strend - s) len = strend - s;
fc241834 1289 if (!checksum) {
08ca2aa3
TH
1290 if (len && unpack_only_one) len = 1;
1291 EXTEND(SP, len);
1292 EXTEND_MORTAL(len);
fc241834 1293 }
08ca2aa3
TH
1294 while (len-- > 0 && s < strend) {
1295 STRLEN retlen;
1296 UV auv;
1297 if (utf8) {
1298 U8 result[UTF8_MAXLEN];
f7fe979e 1299 const char *ptr = s;
08ca2aa3 1300 STRLEN len;
08ca2aa3
TH
1301 /* Bug: warns about bad utf8 even if we are short on bytes
1302 and will break out of the loop */
9df874cd 1303 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1304 'U'))
08ca2aa3
TH
1305 break;
1306 len = UTF8SKIP(result);
9df874cd 1307 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1308 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1309 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1310 len,
1311 &retlen,
1312 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1313 s = ptr;
1314 } else {
3ece276e
KW
1315 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1316 strend - s,
1317 &retlen,
1318 UTF8_ALLOW_DEFAULT));
df337714 1319 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1320 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1321 s += retlen;
1322 }
1323 if (!checksum)
6e449a3a 1324 mPUSHu(auv);
73cb7263 1325 else if (checksum > bits_in_uv)
08ca2aa3 1326 cdouble += (NV) auv;
73cb7263 1327 else
08ca2aa3 1328 cuv += auv;
a6ec74c1
JH
1329 }
1330 break;
49704364
LW
1331 case 's' | TYPE_IS_SHRIEKING:
1332#if SHORTSIZE != SIZE16
73cb7263 1333 while (len-- > 0) {
08ca2aa3 1334 short ashort;
aaec8192 1335 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1336 if (!checksum)
6e449a3a 1337 mPUSHi(ashort);
73cb7263
NC
1338 else if (checksum > bits_in_uv)
1339 cdouble += (NV)ashort;
1340 else
1341 cuv += ashort;
49704364
LW
1342 }
1343 break;
1344#else
924ba076 1345 /* FALLTHROUGH */
a6ec74c1 1346#endif
49704364 1347 case 's':
73cb7263 1348 while (len-- > 0) {
08ca2aa3
TH
1349 I16 ai16;
1350
1351#if U16SIZE > SIZE16
1352 ai16 = 0;
1353#endif
aaec8192 1354 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1355#if U16SIZE > SIZE16
73cb7263
NC
1356 if (ai16 > 32767)
1357 ai16 -= 65536;
a6ec74c1 1358#endif
08ca2aa3 1359 if (!checksum)
6e449a3a 1360 mPUSHi(ai16);
73cb7263
NC
1361 else if (checksum > bits_in_uv)
1362 cdouble += (NV)ai16;
1363 else
1364 cuv += ai16;
a6ec74c1
JH
1365 }
1366 break;
49704364
LW
1367 case 'S' | TYPE_IS_SHRIEKING:
1368#if SHORTSIZE != SIZE16
73cb7263 1369 while (len-- > 0) {
08ca2aa3 1370 unsigned short aushort;
aaec8192
NC
1371 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1372 needs_swap);
08ca2aa3 1373 if (!checksum)
6e449a3a 1374 mPUSHu(aushort);
73cb7263
NC
1375 else if (checksum > bits_in_uv)
1376 cdouble += (NV)aushort;
1377 else
1378 cuv += aushort;
49704364
LW
1379 }
1380 break;
1381#else
924ba076 1382 /* FALLTHROUGH */
49704364 1383#endif
a6ec74c1
JH
1384 case 'v':
1385 case 'n':
1386 case 'S':
73cb7263 1387 while (len-- > 0) {
08ca2aa3
TH
1388 U16 au16;
1389#if U16SIZE > SIZE16
1390 au16 = 0;
1391#endif
aaec8192 1392 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1393 if (datumtype == 'n')
1394 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1395 if (datumtype == 'v')
1396 au16 = vtohs(au16);
08ca2aa3 1397 if (!checksum)
6e449a3a 1398 mPUSHu(au16);
73cb7263 1399 else if (checksum > bits_in_uv)
f337b084 1400 cdouble += (NV) au16;
73cb7263
NC
1401 else
1402 cuv += au16;
a6ec74c1
JH
1403 }
1404 break;
068bd2e7
MHM
1405 case 'v' | TYPE_IS_SHRIEKING:
1406 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1407 while (len-- > 0) {
08ca2aa3
TH
1408 I16 ai16;
1409# if U16SIZE > SIZE16
1410 ai16 = 0;
1411# endif
aaec8192 1412 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1413 /* There should never be any byte-swapping here. */
1414 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1415 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1416 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1417 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1418 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1419 if (!checksum)
6e449a3a 1420 mPUSHi(ai16);
73cb7263 1421 else if (checksum > bits_in_uv)
08ca2aa3 1422 cdouble += (NV) ai16;
73cb7263
NC
1423 else
1424 cuv += ai16;
068bd2e7
MHM
1425 }
1426 break;
a6ec74c1 1427 case 'i':
49704364 1428 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1429 while (len-- > 0) {
08ca2aa3 1430 int aint;
aaec8192 1431 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1432 if (!checksum)
6e449a3a 1433 mPUSHi(aint);
73cb7263
NC
1434 else if (checksum > bits_in_uv)
1435 cdouble += (NV)aint;
1436 else
1437 cuv += aint;
a6ec74c1
JH
1438 }
1439 break;
1440 case 'I':
49704364 1441 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1442 while (len-- > 0) {
08ca2aa3 1443 unsigned int auint;
aaec8192 1444 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1445 if (!checksum)
6e449a3a 1446 mPUSHu(auint);
73cb7263
NC
1447 else if (checksum > bits_in_uv)
1448 cdouble += (NV)auint;
1449 else
1450 cuv += auint;
a6ec74c1
JH
1451 }
1452 break;
92d41999 1453 case 'j':
73cb7263 1454 while (len-- > 0) {
08ca2aa3 1455 IV aiv;
aaec8192 1456 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1457 if (!checksum)
6e449a3a 1458 mPUSHi(aiv);
73cb7263
NC
1459 else if (checksum > bits_in_uv)
1460 cdouble += (NV)aiv;
1461 else
1462 cuv += aiv;
92d41999
JH
1463 }
1464 break;
1465 case 'J':
73cb7263 1466 while (len-- > 0) {
08ca2aa3 1467 UV auv;
aaec8192 1468 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1469 if (!checksum)
6e449a3a 1470 mPUSHu(auv);
73cb7263
NC
1471 else if (checksum > bits_in_uv)
1472 cdouble += (NV)auv;
1473 else
1474 cuv += auv;
92d41999
JH
1475 }
1476 break;
49704364
LW
1477 case 'l' | TYPE_IS_SHRIEKING:
1478#if LONGSIZE != SIZE32
73cb7263 1479 while (len-- > 0) {
08ca2aa3 1480 long along;
aaec8192 1481 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1482 if (!checksum)
6e449a3a 1483 mPUSHi(along);
73cb7263
NC
1484 else if (checksum > bits_in_uv)
1485 cdouble += (NV)along;
1486 else
1487 cuv += along;
49704364
LW
1488 }
1489 break;
1490#else
924ba076 1491 /* FALLTHROUGH */
a6ec74c1 1492#endif
49704364 1493 case 'l':
73cb7263 1494 while (len-- > 0) {
08ca2aa3
TH
1495 I32 ai32;
1496#if U32SIZE > SIZE32
1497 ai32 = 0;
1498#endif
aaec8192 1499 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1500#if U32SIZE > SIZE32
08ca2aa3 1501 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1502#endif
08ca2aa3 1503 if (!checksum)
6e449a3a 1504 mPUSHi(ai32);
73cb7263
NC
1505 else if (checksum > bits_in_uv)
1506 cdouble += (NV)ai32;
1507 else
1508 cuv += ai32;
a6ec74c1
JH
1509 }
1510 break;
49704364
LW
1511 case 'L' | TYPE_IS_SHRIEKING:
1512#if LONGSIZE != SIZE32
73cb7263 1513 while (len-- > 0) {
08ca2aa3 1514 unsigned long aulong;
aaec8192 1515 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1516 if (!checksum)
6e449a3a 1517 mPUSHu(aulong);
73cb7263
NC
1518 else if (checksum > bits_in_uv)
1519 cdouble += (NV)aulong;
1520 else
1521 cuv += aulong;
49704364
LW
1522 }
1523 break;
1524#else
924ba076 1525 /* FALLTHROUGH */
49704364 1526#endif
a6ec74c1
JH
1527 case 'V':
1528 case 'N':
1529 case 'L':
73cb7263 1530 while (len-- > 0) {
08ca2aa3
TH
1531 U32 au32;
1532#if U32SIZE > SIZE32
1533 au32 = 0;
1534#endif
aaec8192 1535 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1536 if (datumtype == 'N')
1537 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1538 if (datumtype == 'V')
1539 au32 = vtohl(au32);
08ca2aa3 1540 if (!checksum)
6e449a3a 1541 mPUSHu(au32);
fc241834
RGS
1542 else if (checksum > bits_in_uv)
1543 cdouble += (NV)au32;
1544 else
1545 cuv += au32;
a6ec74c1
JH
1546 }
1547 break;
068bd2e7
MHM
1548 case 'V' | TYPE_IS_SHRIEKING:
1549 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1550 while (len-- > 0) {
08ca2aa3 1551 I32 ai32;
f8e5a5db 1552#if U32SIZE > SIZE32
08ca2aa3 1553 ai32 = 0;
f8e5a5db 1554#endif
aaec8192 1555 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1556 /* There should never be any byte swapping here. */
1557 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1558 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1559 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1560 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1561 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1562 if (!checksum)
6e449a3a 1563 mPUSHi(ai32);
73cb7263
NC
1564 else if (checksum > bits_in_uv)
1565 cdouble += (NV)ai32;
1566 else
1567 cuv += ai32;
068bd2e7
MHM
1568 }
1569 break;
a6ec74c1 1570 case 'p':
a6ec74c1 1571 while (len-- > 0) {
f7fe979e 1572 const char *aptr;
aaec8192 1573 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1574 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1575 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1576 }
1577 break;
1578 case 'w':
a6ec74c1
JH
1579 {
1580 UV auv = 0;
1581 U32 bytes = 0;
fc241834 1582
08ca2aa3
TH
1583 while (len > 0 && s < strend) {
1584 U8 ch;
f337b084 1585 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1586 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1587 /* UTF8_IS_XXXXX not right here because this is a BER, not
1588 * UTF-8 format - using constant 0x80 */
08ca2aa3 1589 if (ch < 0x80) {
a6ec74c1 1590 bytes = 0;
6e449a3a 1591 mPUSHu(auv);
a6ec74c1
JH
1592 len--;
1593 auv = 0;
08ca2aa3 1594 continue;
a6ec74c1 1595 }
08ca2aa3 1596 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1597 const char *t;
a6ec74c1 1598
147e3846
KW
1599 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1600 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1601 while (s < strend) {
f337b084 1602 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1603 sv = mul128(sv, (U8)(ch & 0x7f));
1604 if (!(ch & 0x80)) {
a6ec74c1
JH
1605 bytes = 0;
1606 break;
1607 }
1608 }
10516c54 1609 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1610 while (*t == '0')
1611 t++;
1612 sv_chop(sv, t);
6e449a3a 1613 mPUSHs(sv);
a6ec74c1
JH
1614 len--;
1615 auv = 0;
1616 }
1617 }
1618 if ((s >= strend) && bytes)
49704364 1619 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1620 }
1621 break;
1622 case 'P':
49704364
LW
1623 if (symptr->howlen == e_star)
1624 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1625 EXTEND(SP, 1);
2d3e0934 1626 if (s + sizeof(char*) <= strend) {
08ca2aa3 1627 char *aptr;
aaec8192 1628 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1629 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1630 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1631 }
a6ec74c1 1632 break;
c174bf3b 1633#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1634 case 'q':
73cb7263 1635 while (len-- > 0) {
08ca2aa3 1636 Quad_t aquad;
aaec8192 1637 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1638 if (!checksum)
c174bf3b 1639 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV)aquad;
1642 else
1643 cuv += aquad;
1644 }
a6ec74c1
JH
1645 break;
1646 case 'Q':
73cb7263 1647 while (len-- > 0) {
08ca2aa3 1648 Uquad_t auquad;
aaec8192 1649 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1650 if (!checksum)
c174bf3b 1651 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1652 else if (checksum > bits_in_uv)
1653 cdouble += (NV)auquad;
1654 else
1655 cuv += auquad;
a6ec74c1
JH
1656 }
1657 break;
1640b983 1658#endif
a6ec74c1
JH
1659 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1660 case 'f':
73cb7263 1661 while (len-- > 0) {
08ca2aa3 1662 float afloat;
aaec8192 1663 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1664 if (!checksum)
6e449a3a 1665 mPUSHn(afloat);
08ca2aa3 1666 else
73cb7263 1667 cdouble += afloat;
fc241834 1668 }
a6ec74c1
JH
1669 break;
1670 case 'd':
73cb7263 1671 while (len-- > 0) {
08ca2aa3 1672 double adouble;
aaec8192 1673 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1674 if (!checksum)
6e449a3a 1675 mPUSHn(adouble);
08ca2aa3 1676 else
73cb7263 1677 cdouble += adouble;
fc241834 1678 }
a6ec74c1 1679 break;
92d41999 1680 case 'F':
73cb7263 1681 while (len-- > 0) {
275663fa 1682 NV_bytes anv;
aaec8192
NC
1683 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1684 datumtype, needs_swap);
08ca2aa3 1685 if (!checksum)
275663fa 1686 mPUSHn(anv.nv);
08ca2aa3 1687 else
275663fa 1688 cdouble += anv.nv;
fc241834 1689 }
92d41999
JH
1690 break;
1691#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1692 case 'D':
73cb7263 1693 while (len-- > 0) {
275663fa 1694 ld_bytes aldouble;
aaec8192
NC
1695 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1696 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1697 /* The most common long double format, the x86 80-bit
1698 * extended precision, has either 2 or 6 unused bytes,
1699 * which may contain garbage, which may contain
1700 * unintentional data. While we do zero the bytes of
1701 * the long double data in pack(), here in unpack() we
1702 * don't, because it's really hard to envision that
1703 * reading the long double off aldouble would be
e075ae47 1704 * affected by the unused bytes.
42262fd3
JH
1705 *
1706 * Note that trying to unpack 'long doubles' of 'long
1707 * doubles' packed in another system is in the general
1708 * case doomed without having more detail. */
08ca2aa3 1709 if (!checksum)
275663fa 1710 mPUSHn(aldouble.ld);
08ca2aa3 1711 else
275663fa 1712 cdouble += aldouble.ld;
92d41999
JH
1713 }
1714 break;
1715#endif
a6ec74c1 1716 case 'u':
858fe5e1 1717 if (!checksum) {
f7fe979e 1718 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1719 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1720 if (l) SvPOK_on(sv);
1721 }
08ca2aa3 1722
bee81d36
KW
1723 /* Note that all legal uuencoded strings are ASCII printables, so
1724 * have the same representation under UTF-8 vs not. This means we
1725 * can ignore UTF8ness on legal input. For illegal we stop at the
1726 * first failure, and don't report where/what that is, so again we
1727 * can ignore UTF8ness */
1728
9233b56b
KW
1729 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1730 I32 a, b, c, d;
1731 char hunk[3];
1732
1733 len = PL_uudmap[*(U8*)s++] & 077;
1734 while (len > 0) {
1735 if (s < strend && ISUUCHAR(*s))
1736 a = PL_uudmap[*(U8*)s++] & 077;
1737 else
1738 a = 0;
1739 if (s < strend && ISUUCHAR(*s))
1740 b = PL_uudmap[*(U8*)s++] & 077;
1741 else
1742 b = 0;
1743 if (s < strend && ISUUCHAR(*s))
1744 c = PL_uudmap[*(U8*)s++] & 077;
1745 else
1746 c = 0;
1747 if (s < strend && ISUUCHAR(*s))
1748 d = PL_uudmap[*(U8*)s++] & 077;
1749 else
1750 d = 0;
1751 hunk[0] = (char)((a << 2) | (b >> 4));
1752 hunk[1] = (char)((b << 4) | (c >> 2));
1753 hunk[2] = (char)((c << 6) | d);
1754 if (!checksum)
1755 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1756 len -= 3;
1757 }
1758 if (*s == '\n')
1759 s++;
1760 else /* possible checksum byte */
1761 if (s + 1 < strend && s[1] == '\n')
1762 s += 2;
1763 }
858fe5e1
TC
1764 if (!checksum)
1765 XPUSHs(sv);
a6ec74c1 1766 break;
99f862a1 1767 } /* End of switch */
49704364 1768
a6ec74c1 1769 if (checksum) {
1109a392 1770 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1771 (checksum > bits_in_uv &&
08ca2aa3
TH
1772 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1773 NV trouble, anv;
a6ec74c1 1774
08ca2aa3 1775 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1776 while (checksum >= 16) {
1777 checksum -= 16;
08ca2aa3 1778 anv *= 65536.0;
a6ec74c1 1779 }
a6ec74c1 1780 while (cdouble < 0.0)
08ca2aa3 1781 cdouble += anv;
09b94b1f
S
1782 cdouble = Perl_modf(cdouble / anv, &trouble);
1783#ifdef LONGDOUBLE_DOUBLEDOUBLE
1784 /* Workaround for powerpc doubledouble modfl bug:
1785 * close to 1.0L and -1.0L cdouble is 0, and trouble
1786 * is cdouble / anv. */
1787 if (trouble != Perl_ceil(trouble)) {
1788 cdouble = trouble;
1789 if (cdouble > 1.0L) cdouble -= 1.0L;
1790 if (cdouble < -1.0L) cdouble += 1.0L;
1791 }
1792#endif
1793 cdouble *= anv;
c4c5f44a 1794 sv = newSVnv(cdouble);
a6ec74c1
JH
1795 }
1796 else {
fa8ec7c1
NC
1797 if (checksum < bits_in_uv) {
1798 UV mask = ((UV)1 << checksum) - 1;
92d41999 1799 cuv &= mask;
a6ec74c1 1800 }
c4c5f44a 1801 sv = newSVuv(cuv);
a6ec74c1 1802 }
6e449a3a 1803 mXPUSHs(sv);
a6ec74c1
JH
1804 checksum = 0;
1805 }
fc241834 1806
49704364
LW
1807 if (symptr->flags & FLAG_SLASH){
1808 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1809 break;
49704364
LW
1810 if( next_symbol(symptr) ){
1811 if( symptr->howlen == e_number )
1812 Perl_croak(aTHX_ "Count after length/code in unpack" );
1813 if( beyond ){
1814 /* ...end of char buffer then no decent length available */
1815 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1816 } else {
1817 /* take top of stack (hope it's numeric) */
1818 len = POPi;
1819 if( len < 0 )
1820 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1821 }
1822 } else {
1823 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1824 }
1825 datumtype = symptr->code;
21c16052 1826 explicit_length = FALSE;
49704364
LW
1827 goto redo_switch;
1828 }
a6ec74c1 1829 }
49704364 1830
18529408
IZ
1831 if (new_s)
1832 *new_s = s;
1833 PUTBACK;
1834 return SP - PL_stack_base - start_sp_offset;
1835}
1836
1837PP(pp_unpack)
1838{
1839 dSP;
bab9c0ac 1840 dPOPPOPssrl;
1c23e2bd 1841 U8 gimme = GIMME_V;
18529408
IZ
1842 STRLEN llen;
1843 STRLEN rlen;
5c144d81
NC
1844 const char *pat = SvPV_const(left, llen);
1845 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1846 const char *strend = s + rlen;
1847 const char *patend = pat + llen;
08ca2aa3 1848 I32 cnt;
18529408
IZ
1849
1850 PUTBACK;
7accc089 1851 cnt = unpackstring(pat, patend, s, strend,
49704364 1852 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1853 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1854
18529408
IZ
1855 SPAGAIN;
1856 if ( !cnt && gimme == G_SCALAR )
1857 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1858 RETURN;
1859}
1860
f337b084 1861STATIC U8 *
e68aed92 1862doencodes(U8 *h, const U8 *s, I32 len)
a6ec74c1 1863{
f337b084 1864 *h++ = PL_uuemap[len];
a6ec74c1 1865 while (len > 2) {
f337b084
TH
1866 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1867 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1868 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1869 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1870 s += 3;
1871 len -= 3;
1872 }
1873 if (len > 0) {
e68aed92 1874 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1875 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1876 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1877 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1878 *h++ = PL_uuemap[0];
a6ec74c1 1879 }
f337b084
TH
1880 *h++ = '\n';
1881 return h;
a6ec74c1
JH
1882}
1883
1884STATIC SV *
f7fe979e 1885S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1886{
8b6e33c7
AL
1887 SV *result = newSVpvn(s, l);
1888 char *const result_c = SvPV_nolen(result); /* convenience */
1889 char *out = result_c;
1890 bool skip = 1;
1891 bool ignore = 0;
a6ec74c1 1892
7918f24d
NC
1893 PERL_ARGS_ASSERT_IS_AN_INT;
1894
a6ec74c1
JH
1895 while (*s) {
1896 switch (*s) {
1897 case ' ':
1898 break;
1899 case '+':
1900 if (!skip) {
1901 SvREFCNT_dec(result);
1902 return (NULL);
1903 }
1904 break;
1905 case '0':
1906 case '1':
1907 case '2':
1908 case '3':
1909 case '4':
1910 case '5':
1911 case '6':
1912 case '7':
1913 case '8':
1914 case '9':
1915 skip = 0;
1916 if (!ignore) {
1917 *(out++) = *s;
1918 }
1919 break;
1920 case '.':
1921 ignore = 1;
1922 break;
1923 default:
1924 SvREFCNT_dec(result);
1925 return (NULL);
1926 }
1927 s++;
1928 }
1929 *(out++) = '\0';
1930 SvCUR_set(result, out - result_c);
1931 return (result);
1932}
1933
1934/* pnum must be '\0' terminated */
1935STATIC int
1936S_div128(pTHX_ SV *pnum, bool *done)
1937{
8b6e33c7
AL
1938 STRLEN len;
1939 char * const s = SvPV(pnum, len);
1940 char *t = s;
1941 int m = 0;
1942
7918f24d
NC
1943 PERL_ARGS_ASSERT_DIV128;
1944
8b6e33c7
AL
1945 *done = 1;
1946 while (*t) {
1947 const int i = m * 10 + (*t - '0');
1948 const int r = (i >> 7); /* r < 10 */
1949 m = i & 0x7F;
1950 if (r) {
1951 *done = 0;
1952 }
1953 *(t++) = '0' + r;
a6ec74c1 1954 }
8b6e33c7
AL
1955 *(t++) = '\0';
1956 SvCUR_set(pnum, (STRLEN) (t - s));
1957 return (m);
a6ec74c1
JH
1958}
1959
18529408 1960/*
7accc089
JH
1961=for apidoc packlist
1962
796b6530 1963The engine implementing C<pack()> Perl function.
7accc089 1964
bfce84ec
AL
1965=cut
1966*/
7accc089
JH
1967
1968void
5aaab254 1969Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1970{
aadb217d
JH
1971 tempsym_t sym;
1972
7918f24d
NC
1973 PERL_ARGS_ASSERT_PACKLIST;
1974
f7fe979e 1975 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1976
f337b084
TH
1977 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1978 Also make sure any UTF8 flag is loaded */
56eb0262 1979 SvPV_force_nolen(cat);
bfce84ec
AL
1980 if (DO_UTF8(cat))
1981 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1982
49704364
LW
1983 (void)pack_rec( cat, &sym, beglist, endlist );
1984}
1985
f337b084
TH
1986/* like sv_utf8_upgrade, but also repoint the group start markers */
1987STATIC void
1988marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1989 STRLEN len;
1990 tempsym_t *group;
f7fe979e
AL
1991 const char *from_ptr, *from_start, *from_end, **marks, **m;
1992 char *to_start, *to_ptr;
f337b084
TH
1993
1994 if (SvUTF8(sv)) return;
1995
aa07b2f6 1996 from_start = SvPVX_const(sv);
f337b084
TH
1997 from_end = from_start + SvCUR(sv);
1998 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 1999 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2000 if (from_ptr == from_end) {
2001 /* Simple case: no character needs to be changed */
2002 SvUTF8_on(sv);
2003 return;
2004 }
2005
3473cf63 2006 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2007 Newx(to_start, len, char);
f337b084
TH
2008 Copy(from_start, to_start, from_ptr-from_start, char);
2009 to_ptr = to_start + (from_ptr-from_start);
2010
a02a5408 2011 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2012 for (group=sym_ptr; group; group = group->previous)
2013 marks[group->level] = from_start + group->strbeg;
2014 marks[sym_ptr->level+1] = from_end+1;
2015 for (m = marks; *m < from_ptr; m++)
2016 *m = to_start + (*m-from_start);
2017
2018 for (;from_ptr < from_end; from_ptr++) {
2019 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2020 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2021 }
2022 *to_ptr = 0;
2023
2024 while (*m == from_ptr) *m++ = to_ptr;
2025 if (m != marks + sym_ptr->level+1) {
2026 Safefree(marks);
2027 Safefree(to_start);
5637ef5b
NC
2028 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2029 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2030 }
2031 for (group=sym_ptr; group; group = group->previous)
2032 group->strbeg = marks[group->level] - to_start;
2033 Safefree(marks);
2034
2035 if (SvOOK(sv)) {
2036 if (SvIVX(sv)) {
b162af07 2037 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2038 from_start -= SvIVX(sv);
2039 SvIV_set(sv, 0);
2040 }
2041 SvFLAGS(sv) &= ~SVf_OOK;
2042 }
2043 if (SvLEN(sv) != 0)
2044 Safefree(from_start);
f880fe2f 2045 SvPV_set(sv, to_start);
b162af07
SP
2046 SvCUR_set(sv, to_ptr - to_start);
2047 SvLEN_set(sv, len);
f337b084
TH
2048 SvUTF8_on(sv);
2049}
2050
2051/* Exponential string grower. Makes string extension effectively O(n)
2052 needed says how many extra bytes we need (not counting the final '\0')
2053 Only grows the string if there is an actual lack of space
2054*/
2055STATIC char *
0bd48802 2056S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2057 const STRLEN cur = SvCUR(sv);
2058 const STRLEN len = SvLEN(sv);
f337b084 2059 STRLEN extend;
7918f24d
NC
2060
2061 PERL_ARGS_ASSERT_SV_EXP_GROW;
2062
f337b084
TH
2063 if (len - cur > needed) return SvPVX(sv);
2064 extend = needed > len ? needed : len;
2065 return SvGROW(sv, len+extend+1);
2066}
49704364 2067
93f6e112 2068static SV *
b197e565 2069S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2070{
2071 SvGETMAGIC(sv);
93f6e112
FC
2072 if (UNLIKELY(SvAMAGIC(sv)))
2073 sv = sv_2num(sv);
99f450cc 2074 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2075 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2076 const NV nv = SvNV_nomg(sv);
2077 if (c == 'w')
147e3846 2078 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2079 else
147e3846 2080 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2081 }
93f6e112 2082 return sv;
354b74ae
FC
2083}
2084
93f6e112
FC
2085#define SvIV_no_inf(sv,d) \
2086 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2087#define SvUV_no_inf(sv,d) \
2088 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2089
49704364
LW
2090STATIC
2091SV **
f337b084 2092S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2093{
49704364 2094 tempsym_t lookahead;
f337b084
TH
2095 I32 items = endlist - beglist;
2096 bool found = next_symbol(symptr);
2097 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2098 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2099 char* from;
f337b084 2100
7918f24d
NC
2101 PERL_ARGS_ASSERT_PACK_REC;
2102
f337b084
TH
2103 if (symptr->level == 0 && found && symptr->code == 'U') {
2104 marked_upgrade(aTHX_ cat, symptr);
2105 symptr->flags |= FLAG_DO_UTF8;
2106 utf8 = 0;
49704364 2107 }
f337b084 2108 symptr->strbeg = SvCUR(cat);
49704364
LW
2109
2110 while (found) {
f337b084
TH
2111 SV *fromstr;
2112 STRLEN fromlen;
2113 I32 len;
a0714e2c 2114 SV *lengthcode = NULL;
49704364 2115 I32 datumtype = symptr->code;
f337b084
TH
2116 howlen_t howlen = symptr->howlen;
2117 char *start = SvPVX(cat);
2118 char *cur = start + SvCUR(cat);
a1219b5e 2119 bool needs_swap;
49704364 2120
fc1bb3f2 2121#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2122#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2123
2124 switch (howlen) {
fc241834 2125 case e_star:
f337b084
TH
2126 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2127 0 : items;
2128 break;
2129 default:
2130 /* e_no_len and e_number */
2131 len = symptr->length;
49704364
LW
2132 break;
2133 }
2134
f337b084 2135 if (len) {
a7a3cfaa 2136 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2137
a7a3cfaa
TH
2138 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2139 /* We can process this letter. */
2140 STRLEN size = props & PACK_SIZE_MASK;
2141 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2142 }
f337b084
TH
2143 }
2144
49704364
LW
2145 /* Look ahead for next symbol. Do we have code/code? */
2146 lookahead = *symptr;
2147 found = next_symbol(&lookahead);
246f24af
TH
2148 if (symptr->flags & FLAG_SLASH) {
2149 IV count;
f337b084 2150 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2151 if (strchr("aAZ", lookahead.code)) {
2152 if (lookahead.howlen == e_number) count = lookahead.length;
2153 else {
ce399ba6 2154 if (items > 0) {
48a5da33 2155 count = sv_len_utf8(*beglist);
ce399ba6 2156 }
246f24af
TH
2157 else count = 0;
2158 if (lookahead.code == 'Z') count++;
2159 }
2160 } else {
2161 if (lookahead.howlen == e_number && lookahead.length < items)
2162 count = lookahead.length;
2163 else count = items;
2164 }
2165 lookahead.howlen = e_number;
2166 lookahead.length = count;
2167 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2168 }
49704364 2169
a1219b5e
NC
2170 needs_swap = NEEDS_SWAP(datumtype);
2171
fc241834
RGS
2172 /* Code inside the switch must take care to properly update
2173 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2174 doesn't simply leave using break */
0c7df902 2175 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2176 default:
f337b084
TH
2177 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2178 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2179 case '%':
49704364 2180 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2181
28be1210 2182 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2183 case '.':
2184 if (howlen == e_star) from = start;
2185 else if (len == 0) from = cur;
2186 else {
2187 tempsym_t *group = symptr;
2188
2189 while (--len && group) group = group->previous;
2190 from = group ? start + group->strbeg : start;
2191 }
2192 fromstr = NEXTFROM;
354b74ae 2193 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2194 goto resize;
28be1210 2195 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2196 case '@':
28be1210
TH
2197 from = start + symptr->strbeg;
2198 resize:
28be1210 2199 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2200 if (len >= 0) {
2201 while (len && from < cur) {
2202 from += UTF8SKIP(from);
2203 len--;
2204 }
2205 if (from > cur)
2206 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2207 if (len) {
2208 /* Here we know from == cur */
2209 grow:
2210 GROWING(0, cat, start, cur, len);
2211 Zero(cur, len, char);
2212 cur += len;
2213 } else if (from < cur) {
2214 len = cur - from;
2215 goto shrink;
2216 } else goto no_change;
2217 } else {
2218 cur = from;
2219 len = -len;
2220 goto utf8_shrink;
f337b084 2221 }
28be1210
TH
2222 else {
2223 len -= cur - from;
f337b084 2224 if (len > 0) goto grow;
28be1210 2225 if (len == 0) goto no_change;
fc241834 2226 len = -len;
28be1210 2227 goto shrink;
f337b084 2228 }
a6ec74c1 2229 break;
81d52ecd 2230
fc241834 2231 case '(': {
49704364 2232 tempsym_t savsym = *symptr;
66c611c5
MHM
2233 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2234 symptr->flags |= group_modifiers;
49704364
LW
2235 symptr->patend = savsym.grpend;
2236 symptr->level++;
f337b084 2237 symptr->previous = &lookahead;
18529408 2238 while (len--) {
f337b084
TH
2239 U32 was_utf8;
2240 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2241 else symptr->flags &= ~FLAG_PARSE_UTF8;
2242 was_utf8 = SvUTF8(cat);
49704364 2243 symptr->patptr = savsym.grpbeg;
f337b084
TH
2244 beglist = pack_rec(cat, symptr, beglist, endlist);
2245 if (SvUTF8(cat) != was_utf8)
2246 /* This had better be an upgrade while in utf8==0 mode */
2247 utf8 = 1;
2248
49704364 2249 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2250 break; /* No way to continue */
2251 }
ee790063 2252 items = endlist - beglist;
f337b084
TH
2253 lookahead.flags = symptr->flags & ~group_modifiers;
2254 goto no_change;
18529408 2255 }
62f95557
IZ
2256 case 'X' | TYPE_IS_SHRIEKING:
2257 if (!len) /* Avoid division by 0 */
2258 len = 1;
f337b084
TH
2259 if (utf8) {
2260 char *hop, *last;
2261 I32 l = len;
2262 hop = last = start;
2263 while (hop < cur) {
2264 hop += UTF8SKIP(hop);
2265 if (--l == 0) {
2266 last = hop;
2267 l = len;
2268 }
2269 }
2270 if (last > cur)
2271 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2272 cur = last;
2273 break;
2274 }
2275 len = (cur-start) % len;
924ba076 2276 /* FALLTHROUGH */
a6ec74c1 2277 case 'X':
f337b084
TH
2278 if (utf8) {
2279 if (len < 1) goto no_change;
28be1210 2280 utf8_shrink:
f337b084
TH
2281 while (len > 0) {
2282 if (cur <= start)
28be1210
TH
2283 Perl_croak(aTHX_ "'%c' outside of string in pack",
2284 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2285 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2286 if (cur <= start)
28be1210
TH
2287 Perl_croak(aTHX_ "'%c' outside of string in pack",
2288 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2289 }
2290 len--;
2291 }
2292 } else {
fc241834 2293 shrink:
f337b084 2294 if (cur - start < len)
28be1210
TH
2295 Perl_croak(aTHX_ "'%c' outside of string in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2297 cur -= len;
2298 }
2299 if (cur < start+symptr->strbeg) {
2300 /* Make sure group starts don't point into the void */
2301 tempsym_t *group;
9e27e96a 2302 const STRLEN length = cur-start;
f337b084
TH
2303 for (group = symptr;
2304 group && length < group->strbeg;
2305 group = group->previous) group->strbeg = length;
2306 lookahead.strbeg = length;
2307 }
a6ec74c1 2308 break;
fc241834
RGS
2309 case 'x' | TYPE_IS_SHRIEKING: {
2310 I32 ai32;
62f95557
IZ
2311 if (!len) /* Avoid division by 0 */
2312 len = 1;
230e1fce 2313 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2314 else ai32 = (cur - start) % len;
2315 if (ai32 == 0) goto no_change;
2316 len -= ai32;
2317 }
924ba076 2318 /* FALLTHROUGH */
a6ec74c1 2319 case 'x':
f337b084 2320 goto grow;
a6ec74c1
JH
2321 case 'A':
2322 case 'Z':
f337b084 2323 case 'a': {
f7fe979e 2324 const char *aptr;
f337b084 2325
a6ec74c1 2326 fromstr = NEXTFROM;
e62f0680 2327 aptr = SvPV_const(fromstr, fromlen);
f337b084 2328 if (DO_UTF8(fromstr)) {
f7fe979e 2329 const char *end, *s;
f337b084
TH
2330
2331 if (!utf8 && !SvUTF8(cat)) {
2332 marked_upgrade(aTHX_ cat, symptr);
2333 lookahead.flags |= FLAG_DO_UTF8;
2334 lookahead.strbeg = symptr->strbeg;
2335 utf8 = 1;
2336 start = SvPVX(cat);
2337 cur = start + SvCUR(cat);
2338 }
fc241834 2339 if (howlen == e_star) {
f337b084
TH
2340 if (utf8) goto string_copy;
2341 len = fromlen+1;
2342 }
2343 s = aptr;
2344 end = aptr + fromlen;
2345 fromlen = datumtype == 'Z' ? len-1 : len;
2346 while ((I32) fromlen > 0 && s < end) {
2347 s += UTF8SKIP(s);
2348 fromlen--;
2349 }
2350 if (s > end)
2351 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2352 if (utf8) {
fc241834 2353 len = fromlen;
f337b084
TH
2354 if (datumtype == 'Z') len++;
2355 fromlen = s-aptr;
2356 len += fromlen;
fc241834 2357
f337b084 2358 goto string_copy;
fc241834 2359 }
f337b084
TH
2360 fromlen = len - fromlen;
2361 if (datumtype == 'Z') fromlen--;
2362 if (howlen == e_star) {
2363 len = fromlen;
2364 if (datumtype == 'Z') len++;
fc241834 2365 }
f337b084 2366 GROWING(0, cat, start, cur, len);
9df874cd 2367 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2368 datumtype | TYPE_IS_PACK))
5637ef5b 2369 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
147e3846 2370 "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
5637ef5b 2371 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2372 cur += fromlen;
a6ec74c1 2373 len -= fromlen;
f337b084
TH
2374 } else if (utf8) {
2375 if (howlen == e_star) {
2376 len = fromlen;
2377 if (datumtype == 'Z') len++;
a6ec74c1 2378 }
f337b084
TH
2379 if (len <= (I32) fromlen) {
2380 fromlen = len;
2381 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2382 }
fc241834 2383 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2384 upgrade, so:
2385 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2386 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2387 len -= fromlen;
2388 while (fromlen > 0) {
230e1fce 2389 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2390 aptr++;
2391 fromlen--;
fc241834 2392 }
f337b084
TH
2393 } else {
2394 string_copy:
2395 if (howlen == e_star) {
2396 len = fromlen;
2397 if (datumtype == 'Z') len++;
2398 }
2399 if (len <= (I32) fromlen) {
2400 fromlen = len;
2401 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2402 }
f337b084
TH
2403 GROWING(0, cat, start, cur, len);
2404 Copy(aptr, cur, fromlen, char);
2405 cur += fromlen;
2406 len -= fromlen;
a6ec74c1 2407 }
f337b084
TH
2408 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2409 cur += len;
3c4fb04a 2410 SvTAINT(cat);
a6ec74c1 2411 break;
f337b084 2412 }
a6ec74c1 2413 case 'B':
f337b084 2414 case 'b': {
b83604b4 2415 const char *str, *end;
f337b084
TH
2416 I32 l, field_len;
2417 U8 bits;
2418 bool utf8_source;
2419 U32 utf8_flags;
a6ec74c1 2420
fc241834 2421 fromstr = NEXTFROM;
b83604b4 2422 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2423 end = str + fromlen;
2424 if (DO_UTF8(fromstr)) {
2425 utf8_source = TRUE;
041457d9 2426 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2427 } else {
2428 utf8_source = FALSE;
2429 utf8_flags = 0; /* Unused, but keep compilers happy */
2430 }
2431 if (howlen == e_star) len = fromlen;
2432 field_len = (len+7)/8;
2433 GROWING(utf8, cat, start, cur, field_len);
2434 if (len > (I32)fromlen) len = fromlen;
2435 bits = 0;
2436 l = 0;
2437 if (datumtype == 'B')
2438 while (l++ < len) {
2439 if (utf8_source) {
95b63a38 2440 UV val = 0;
f337b084
TH
2441 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2442 bits |= val & 1;
2443 } else bits |= *str++ & 1;
2444 if (l & 7) bits <<= 1;
fc241834 2445 else {
f337b084
TH
2446 PUSH_BYTE(utf8, cur, bits);
2447 bits = 0;
a6ec74c1
JH
2448 }
2449 }
f337b084
TH
2450 else
2451 /* datumtype == 'b' */
2452 while (l++ < len) {
2453 if (utf8_source) {
95b63a38 2454 UV val = 0;
f337b084
TH
2455 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2456 if (val & 1) bits |= 0x80;
2457 } else if (*str++ & 1)
2458 bits |= 0x80;
2459 if (l & 7) bits >>= 1;
fc241834 2460 else {
f337b084
TH
2461 PUSH_BYTE(utf8, cur, bits);
2462 bits = 0;
a6ec74c1
JH
2463 }
2464 }
f337b084
TH
2465 l--;
2466 if (l & 7) {
fc241834 2467 if (datumtype == 'B')
f337b084 2468 bits <<= 7 - (l & 7);
fc241834 2469 else
f337b084
TH
2470 bits >>= 7 - (l & 7);
2471 PUSH_BYTE(utf8, cur, bits);
2472 l += 7;
a6ec74c1 2473 }
f337b084
TH
2474 /* Determine how many chars are left in the requested field */
2475 l /= 8;
2476 if (howlen == e_star) field_len = 0;
2477 else field_len -= l;
2478 Zero(cur, field_len, char);
2479 cur += field_len;
a6ec74c1 2480 break;
f337b084 2481 }
a6ec74c1 2482 case 'H':
f337b084 2483 case 'h': {
10516c54 2484 const char *str, *end;
f337b084
TH
2485 I32 l, field_len;
2486 U8 bits;
2487 bool utf8_source;
2488 U32 utf8_flags;
a6ec74c1 2489
fc241834 2490 fromstr = NEXTFROM;
10516c54 2491 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2492 end = str + fromlen;
2493 if (DO_UTF8(fromstr)) {
2494 utf8_source = TRUE;
041457d9 2495 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2496 } else {
2497 utf8_source = FALSE;
2498 utf8_flags = 0; /* Unused, but keep compilers happy */
2499 }
2500 if (howlen == e_star) len = fromlen;
2501 field_len = (len+1)/2;
2502 GROWING(utf8, cat, start, cur, field_len);
0403a1ad 2503 if (!utf8_source && len > (I32)fromlen) len = fromlen;
f337b084
TH
2504 bits = 0;
2505 l = 0;
2506 if (datumtype == 'H')
2507 while (l++ < len) {
2508 if (utf8_source) {
95b63a38 2509 UV val = 0;
f337b084
TH
2510 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2511 if (val < 256 && isALPHA(val))
2512 bits |= (val + 9) & 0xf;
a6ec74c1 2513 else
f337b084
TH
2514 bits |= val & 0xf;
2515 } else if (isALPHA(*str))
2516 bits |= (*str++ + 9) & 0xf;
2517 else
2518 bits |= *str++ & 0xf;
2519 if (l & 1) bits <<= 4;
fc241834 2520 else {
f337b084
TH
2521 PUSH_BYTE(utf8, cur, bits);
2522 bits = 0;
a6ec74c1
JH
2523 }
2524 }
f337b084
TH
2525 else
2526 while (l++ < len) {
2527 if (utf8_source) {
95b63a38 2528 UV val = 0;
f337b084
TH
2529 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2530 if (val < 256 && isALPHA(val))
2531 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2532 else
f337b084
TH
2533 bits |= (val & 0xf) << 4;
2534 } else if (isALPHA(*str))
2535 bits |= ((*str++ + 9) & 0xf) << 4;
2536 else
2537 bits |= (*str++ & 0xf) << 4;
2538 if (l & 1) bits >>= 4;
fc241834 2539 else {
f337b084
TH
2540 PUSH_BYTE(utf8, cur, bits);
2541 bits = 0;
a6ec74c1 2542 }
fc241834 2543 }
f337b084
TH
2544 l--;
2545 if (l & 1) {
2546 PUSH_BYTE(utf8, cur, bits);
2547 l++;
2548 }
2549 /* Determine how many chars are left in the requested field */
2550 l /= 2;
2551 if (howlen == e_star) field_len = 0;
2552 else field_len -= l;
2553 Zero(cur, field_len, char);
2554 cur += field_len;
2555 break;
fc241834
RGS
2556 }
2557 case 'c':
f337b084
TH
2558 while (len-- > 0) {
2559 IV aiv;
2560 fromstr = NEXTFROM;
354b74ae 2561 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2562 if ((-128 > aiv || aiv > 127))
2563 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2564 "Character in 'c' format wrapped in pack");
585ec06d 2565 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2566 }
2567 break;
2568 case 'C':
f337b084
TH
2569 if (len == 0) {
2570 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2571 break;
2572 }
a6ec74c1 2573 while (len-- > 0) {
f337b084 2574 IV aiv;
a6ec74c1 2575 fromstr = NEXTFROM;
354b74ae 2576 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2577 if ((0 > aiv || aiv > 0xff))
2578 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2579 "Character in 'C' format wrapped in pack");
1651fc44 2580 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2581 }
fc241834
RGS
2582 break;
2583 case 'W': {
2584 char *end;
670f1322 2585 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2586
2587 end = start+SvLEN(cat)-1;
2588 if (utf8) end -= UTF8_MAXLEN-1;
2589 while (len-- > 0) {
2590 UV auv;
2591 fromstr = NEXTFROM;
354b74ae 2592 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2593 if (in_bytes) auv = auv % 0x100;
2594 if (utf8) {
2595 W_utf8:
bf4a926a 2596 if (cur >= end) {
fc241834 2597 *cur = '\0';
b162af07 2598 SvCUR_set(cat, cur - start);
fc241834
RGS
2599
2600 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2601 end = start+SvLEN(cat)-UTF8_MAXLEN;
2602 }
05fefba9 2603 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2604 } else {
2605 if (auv >= 0x100) {
2606 if (!SvUTF8(cat)) {
2607 *cur = '\0';
b162af07 2608 SvCUR_set(cat, cur - start);
fc241834
RGS
2609 marked_upgrade(aTHX_ cat, symptr);
2610 lookahead.flags |= FLAG_DO_UTF8;
2611 lookahead.strbeg = symptr->strbeg;
2612 utf8 = 1;
2613 start = SvPVX(cat);
2614 cur = start + SvCUR(cat);
2615 end = start+SvLEN(cat)-UTF8_MAXLEN;
2616 goto W_utf8;
2617 }
a2a5de95
NC
2618 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2619 "Character in 'W' format wrapped in pack");
fc241834
RGS
2620 auv &= 0xff;
2621 }
2622 if (cur >= end) {
2623 *cur = '\0';
b162af07 2624 SvCUR_set(cat, cur - start);
fc241834
RGS
2625 GROWING(0, cat, start, cur, len+1);
2626 end = start+SvLEN(cat)-1;
2627 }
fe2774ed 2628 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2629 }
2630 }
2631 break;
fc241834
RGS
2632 }
2633 case 'U': {
2634 char *end;
2635
2636 if (len == 0) {
2637 if (!(symptr->flags & FLAG_DO_UTF8)) {
2638 marked_upgrade(aTHX_ cat, symptr);
2639 lookahead.flags |= FLAG_DO_UTF8;
2640 lookahead.strbeg = symptr->strbeg;
2641 }
2642 utf8 = 0;
2643 goto no_change;
2644 }
2645
2646 end = start+SvLEN(cat);
2647 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2648 while (len-- > 0) {
fc241834 2649 UV auv;
a6ec74c1 2650 fromstr = NEXTFROM;
354b74ae 2651 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2652 if (utf8) {
230e1fce 2653 U8 buffer[UTF8_MAXLEN], *endb;
05fefba9 2654 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2655 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2656 *cur = '\0';
b162af07 2657 SvCUR_set(cat, cur - start);
fc241834
RGS
2658 GROWING(0, cat, start, cur,
2659 len+(endb-buffer)*UTF8_EXPAND);
2660 end = start+SvLEN(cat);
2661 }
d21d7215 2662 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2663 } else {
2664 if (cur >= end) {
2665 *cur = '\0';
b162af07 2666 SvCUR_set(cat, cur - start);
fc241834
RGS
2667 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2668 end = start+SvLEN(cat)-UTF8_MAXLEN;
2669 }
05fefba9
KW
2670 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2671 UNI_TO_NATIVE(auv),
2672 0);
fc241834 2673 }
a6ec74c1 2674 }
a6ec74c1 2675 break;
fc241834 2676 }
a6ec74c1
JH
2677 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2678 case 'f':
a6ec74c1 2679 while (len-- > 0) {
f337b084
TH
2680 float afloat;
2681 NV anv;
a6ec74c1 2682 fromstr = NEXTFROM;
f337b084 2683 anv = SvNV(fromstr);
a7157111 2684# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2685 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2686 * on Alpha; fake it if we don't have them.
2687 */
f337b084 2688 if (anv > FLT_MAX)
fc241834 2689 afloat = FLT_MAX;
f337b084 2690 else if (anv < -FLT_MAX)
fc241834 2691 afloat = -FLT_MAX;
f337b084 2692 else afloat = (float)anv;
baf3cf9c 2693# else
a7157111 2694# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2695 if(Perl_isnan(anv))
2696 afloat = (float)NV_NAN;
2697 else
a7157111
JH
2698# endif
2699# ifdef NV_INF
919894b7
DM
2700 /* a simple cast to float is undefined if outside
2701 * the range of values that can be represented */
2702 afloat = (float)(anv > FLT_MAX ? NV_INF :
2703 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2704# endif
baf3cf9c 2705# endif
3a88beaa 2706 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2707 }
2708 break;
2709 case 'd':
a6ec74c1 2710 while (len-- > 0) {
f337b084
TH
2711 double adouble;
2712 NV anv;
a6ec74c1 2713 fromstr = NEXTFROM;
f337b084 2714 anv = SvNV(fromstr);
a7157111 2715# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2716 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2717 * on Alpha; fake it if we don't have them.
2718 */
f337b084 2719 if (anv > DBL_MAX)
fc241834 2720 adouble = DBL_MAX;
f337b084 2721 else if (anv < -DBL_MAX)
fc241834 2722 adouble = -DBL_MAX;
f337b084 2723 else adouble = (double)anv;
baf3cf9c 2724# else
f337b084 2725 adouble = (double)anv;
baf3cf9c 2726# endif
3a88beaa 2727 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2728 }
2729 break;
fc241834 2730 case 'F': {
275663fa 2731 NV_bytes anv;
1109a392 2732 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2733 while (len-- > 0) {
2734 fromstr = NEXTFROM;
cd07c537
DM
2735#ifdef __GNUC__
2736 /* to work round a gcc/x86 bug; don't use SvNV */
2737 anv.nv = sv_2nv(fromstr);
070e2677
TC
2738# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2739 && LONG_DOUBLESIZE > 10
2740 /* GCC sometimes overwrites the padding in the
2741 assignment above */
2742 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2743# endif
cd07c537 2744#else
275663fa 2745 anv.nv = SvNV(fromstr);
cd07c537 2746#endif
3a88beaa 2747 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2748 }
2749 break;
fc241834 2750 }
92d41999 2751#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2752 case 'D': {
275663fa 2753 ld_bytes aldouble;
1109a392
MHM
2754 /* long doubles can have unused bits, which may be nonzero */
2755 Zero(&aldouble, 1, long double);
92d41999
JH
2756 while (len-- > 0) {
2757 fromstr = NEXTFROM;
cd07c537
DM
2758# ifdef __GNUC__
2759 /* to work round a gcc/x86 bug; don't use SvNV */
2760 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2761# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2762 /* GCC sometimes overwrites the padding in the
2763 assignment above */
2764 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2765# endif
cd07c537 2766# else
275663fa 2767 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2768# endif
3a88beaa
NC
2769 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2770 needs_swap);
92d41999
JH
2771 }
2772 break;
fc241834 2773 }
92d41999 2774#endif
068bd2e7 2775 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2776 case 'n':
2777 while (len-- > 0) {
f337b084 2778 I16 ai16;
a6ec74c1 2779 fromstr = NEXTFROM;
354b74ae 2780 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2781 ai16 = PerlSock_htons(ai16);
3a88beaa 2782 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2783 }
2784 break;
068bd2e7 2785 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2786 case 'v':
2787 while (len-- > 0) {
f337b084 2788 I16 ai16;
a6ec74c1 2789 fromstr = NEXTFROM;
354b74ae 2790 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2791 ai16 = htovs(ai16);
3a88beaa 2792 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2793 }
2794 break;
49704364 2795 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2796#if SHORTSIZE != SIZE16
fc241834 2797 while (len-- > 0) {
f337b084 2798 unsigned short aushort;
fc241834 2799 fromstr = NEXTFROM;
354b74ae 2800 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2801 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2802 }
49704364
LW
2803 break;
2804#else
924ba076 2805 /* FALLTHROUGH */
a6ec74c1 2806#endif
49704364 2807 case 'S':
fc241834 2808 while (len-- > 0) {
f337b084 2809 U16 au16;
fc241834 2810 fromstr = NEXTFROM;
354b74ae 2811 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2812 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2813 }
2814 break;
49704364 2815 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2816#if SHORTSIZE != SIZE16
fc241834 2817 while (len-- > 0) {
f337b084 2818 short ashort;
fc241834 2819 fromstr = NEXTFROM;
354b74ae 2820 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2821 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2822 }
49704364
LW
2823 break;
2824#else
924ba076 2825 /* FALLTHROUGH */
a6ec74c1 2826#endif
49704364
LW
2827 case 's':
2828 while (len-- > 0) {
f337b084 2829 I16 ai16;
49704364 2830 fromstr = NEXTFROM;
354b74ae 2831 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2832 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2833 }
2834 break;
2835 case 'I':
49704364 2836 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2837 while (len-- > 0) {
f337b084 2838 unsigned int auint;
a6ec74c1 2839 fromstr = NEXTFROM;
354b74ae 2840 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2841 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2842 }
2843 break;
92d41999
JH
2844 case 'j':
2845 while (len-- > 0) {
f337b084 2846 IV aiv;
92d41999 2847 fromstr = NEXTFROM;
354b74ae 2848 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2849 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2850 }
2851 break;
2852 case 'J':
2853 while (len-- > 0) {
f337b084 2854 UV auv;
92d41999 2855 fromstr = NEXTFROM;
354b74ae 2856 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2857 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2858 }
2859 break;
a6ec74c1
JH
2860 case 'w':
2861 while (len-- > 0) {
f337b084 2862 NV anv;
a6ec74c1 2863 fromstr = NEXTFROM;
83388ae2 2864 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2865 anv = SvNV_nomg(fromstr);
a6ec74c1 2866
f337b084
TH
2867 if (anv < 0) {
2868 *cur = '\0';
b162af07 2869 SvCUR_set(cat, cur - start);
49704364 2870 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2871 }
a6ec74c1 2872
196b62db
NC
2873 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2874 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2875 any negative IVs will have already been got by the croak()
2876 above. IOK is untrue for fractions, so we test them
2877 against UV_MAX_P1. */
f337b084
TH
2878 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2879 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2880 char *in = buf + sizeof(buf);
1e9a122e 2881 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2882
2883 do {
eb160463 2884 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2885 auv >>= 7;
2886 } while (auv);
2887 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2888 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2889 in, (buf + sizeof(buf)) - in);
2890 } else if (SvPOKp(fromstr))
2891 goto w_string;
a6ec74c1 2892 else if (SvNOKp(fromstr)) {
0258719b 2893 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2894 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2895 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2896 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2897 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2898 Some C compilers are strict about integral constant
2899 expressions so we conservatively divide by a slightly
2900 smaller integer instead of multiplying by the exact
2901 floating-point value.
0258719b
NC
2902 */
2903#ifdef NV_MAX_10_EXP
f337b084 2904 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2905 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2906#else
f337b084 2907 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2908 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2909#endif
a6ec74c1
JH
2910 char *in = buf + sizeof(buf);
2911
8b6e33c7 2912 anv = Perl_floor(anv);
a6ec74c1 2913 do {
8b6e33c7 2914 const NV next = Perl_floor(anv / 128);
a6ec74c1 2915 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2916 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2917 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2918 anv = next;
2919 } while (anv > 0);
a6ec74c1 2920 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2921 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2922 in, (buf + sizeof(buf)) - in);
2923 } else {
8b6e33c7
AL
2924 const char *from;
2925 char *result, *in;
735b914b
JH
2926 SV *norm;
2927 STRLEN len;
2928 bool done;
2929
f337b084 2930 w_string:
735b914b 2931 /* Copy string and check for compliance */
1e9a122e 2932 from = SvPV_nomg_const(fromstr, len);
735b914b 2933 if ((norm = is_an_int(from, len)) == NULL)
49704364 2934 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2935
a02a5408 2936 Newx(result, len, char);
735b914b
JH
2937 in = result + len;
2938 done = FALSE;
f337b084 2939 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2940 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2941 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2942 in, (result + len) - in);
735b914b
JH
2943 Safefree(result);
2944 SvREFCNT_dec(norm); /* free norm */
fc241834 2945 }
a6ec74c1
JH
2946 }
2947 break;
2948 case 'i':
49704364 2949 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2950 while (len-- > 0) {
f337b084 2951 int aint;
a6ec74c1 2952 fromstr = NEXTFROM;
354b74ae 2953 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2954 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2955 }
2956 break;
068bd2e7 2957 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2958 case 'N':
2959 while (len-- > 0) {
f337b084 2960 U32 au32;
a6ec74c1 2961 fromstr = NEXTFROM;
354b74ae 2962 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2963 au32 = PerlSock_htonl(au32);
3a88beaa 2964 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2965 }
2966 break;
068bd2e7 2967 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2968 case 'V':
2969 while (len-- > 0) {
f337b084 2970 U32 au32;
a6ec74c1 2971 fromstr = NEXTFROM;
354b74ae 2972 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2973 au32 = htovl(au32);
3a88beaa 2974 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2975 }
2976 break;
49704364 2977 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2978#if LONGSIZE != SIZE32
fc241834 2979 while (len-- > 0) {
f337b084 2980 unsigned long aulong;
fc241834 2981 fromstr = NEXTFROM;
354b74ae 2982 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2983 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2984 }
49704364
LW
2985 break;
2986#else
2987 /* Fall though! */
a6ec74c1 2988#endif
49704364 2989 case 'L':
fc241834 2990 while (len-- > 0) {
f337b084 2991 U32 au32;
fc241834 2992 fromstr = NEXTFROM;
354b74ae 2993 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2994 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2995 }
2996 break;
49704364 2997 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2998#if LONGSIZE != SIZE32
fc241834 2999 while (len-- > 0) {
f337b084 3000 long along;
fc241834 3001 fromstr = NEXTFROM;
354b74ae 3002 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3003 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3004 }
49704364
LW
3005 break;
3006#else
3007 /* Fall though! */
a6ec74c1 3008#endif
49704364
LW
3009 case 'l':
3010 while (len-- > 0) {
f337b084 3011 I32 ai32;
49704364 3012 fromstr = NEXTFROM;
354b74ae 3013 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3014 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3015 }
3016 break;
c174bf3b 3017#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3018 case 'Q':
3019 while (len-- > 0) {
f337b084 3020 Uquad_t auquad;
a6ec74c1 3021 fromstr = NEXTFROM;
354b74ae 3022 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3023 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3024 }
3025 break;
3026 case 'q':
3027 while (len-- > 0) {
f337b084 3028 Quad_t aquad;
a6ec74c1 3029 fromstr = NEXTFROM;
354b74ae 3030 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3031 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3032 }
3033 break;
1640b983 3034#endif
a6ec74c1
JH
3035 case 'P':
3036 len = 1; /* assume SV is correct length */
f337b084 3037 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3038 /* FALLTHROUGH */
a6ec74c1
JH
3039 case 'p':
3040 while (len-- > 0) {
83003860 3041 const char *aptr;
f337b084 3042
a6ec74c1 3043 fromstr = NEXTFROM;
28a4f200 3044 SvGETMAGIC(fromstr);
3045 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3046 else {
a6ec74c1
JH
3047 /* XXX better yet, could spirit away the string to
3048 * a safe spot and hang on to it until the result
3049 * of pack() (and all copies of the result) are
3050 * gone.
3051 */
b199a398
FC
3052 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3053 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3054 !SvREADONLY(fromstr)))) {
3055 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3056 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3057 }
3058 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3059 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3060 else
2596d9fe 3061 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3062 }
3a88beaa 3063 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3064 }
3065 break;
fc241834 3066 case 'u': {
f7fe979e 3067 const char *aptr, *aend;
fc241834 3068 bool from_utf8;
f337b084 3069
a6ec74c1 3070 fromstr = NEXTFROM;
fc241834
RGS
3071 if (len <= 2) len = 45;
3072 else len = len / 3 * 3;
3073 if (len >= 64) {
a2a5de95
NC
3074 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3075 "Field too wide in 'u' format in pack");
fc241834
RGS
3076 len = 63;
3077 }
83003860 3078 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3079 from_utf8 = DO_UTF8(fromstr);
3080 if (from_utf8) {
3081 aend = aptr + fromlen;
3f63b0e5 3082 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3083 } else aend = NULL; /* Unused, but keep compilers happy */
3084 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3085 while (fromlen > 0) {
fc241834 3086 U8 *end;
a6ec74c1 3087 I32 todo;
fc241834 3088 U8 hunk[1+63/3*4+1];
a6ec74c1 3089
eb160463 3090 if ((I32)fromlen > len)
a6ec74c1
JH
3091 todo = len;
3092 else
3093 todo = fromlen;
fc241834
RGS
3094 if (from_utf8) {
3095 char buffer[64];
9df874cd 3096 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3097 'u' | TYPE_IS_PACK)) {
3098 *cur = '\0';
b162af07 3099 SvCUR_set(cat, cur - start);
5637ef5b
NC
3100 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3101 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3102 aptr, aend, buffer, (long) todo);
fc241834 3103 }
e68aed92 3104 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3105 } else {
e68aed92 3106 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3107 aptr += todo;
3108 }
3a88beaa 3109 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3110 fromlen -= todo;
3111 }
a6ec74c1
JH
3112 break;
3113 }
f337b084
TH
3114 }
3115 *cur = '\0';
b162af07 3116 SvCUR_set(cat, cur - start);
f337b084 3117 no_change:
49704364 3118 *symptr = lookahead;
a6ec74c1 3119 }
49704364 3120 return beglist;
18529408
IZ
3121}
3122#undef NEXTFROM
3123
3124
3125PP(pp_pack)
3126{
20b7effb 3127 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3128 SV *cat = TARG;
18529408 3129 STRLEN fromlen;
349d4f2f 3130 SV *pat_sv = *++MARK;
eb578fdb
KW
3131 const char *pat = SvPV_const(pat_sv, fromlen);
3132 const char *patend = pat + fromlen;
18529408
IZ
3133
3134 MARK++;
e8f7c79f 3135 SvPVCLEAR(cat);
f337b084 3136 SvUTF8_off(cat);
18529408 3137
7accc089 3138 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3139
a6ec74c1
JH
3140 SvSETMAGIC(cat);
3141 SP = ORIGMARK;
3142 PUSHs(cat);
3143 RETURN;
3144}
a6ec74c1 3145
73cb7263 3146/*
14d04a33 3147 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3148 */