This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Allow space for NUL is UTF-8 array decls
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
LW
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
e6006968
KW
215#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
216 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 217
66c611c5 218/* type modifiers */
62f95557 219#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
220#define TYPE_IS_BIG_ENDIAN 0x200
221#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 222#define TYPE_IS_PACK 0x800
1109a392 223#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 224#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
225#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
226
7212898e
NC
227# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
228# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
229
230# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
231
78d46eaa 232#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 233#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
234#define PACK_SIZE_MASK 0x3F
235
4ad176ef 236#include "packsizetables.inc"
78d46eaa 237
20aa3a38
NC
238static void
239S_reverse_copy(const char *src, char *dest, STRLEN len)
240{
241 dest += len;
242 while (len--)
243 *--dest = *src++;
244}
245
08ca2aa3 246STATIC U8
9df874cd 247utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 248{
08ca2aa3 249 STRLEN retlen;
df337714
KW
250 UV val;
251
252 if (*s >= end) {
253 goto croak;
254 }
255 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
257 if (retlen == (STRLEN) -1)
258 croak:
f337b084
TH
259 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
260 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 261 if (val >= 0x100) {
a2a5de95
NC
262 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
263 "Character in '%c' format wrapped in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
265 val &= 0xff;
266 }
267 *s += retlen;
fe2774ed 268 return (U8)val;
08ca2aa3
TH
269}
270
f337b084 271#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 272 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
273 *(U8 *)(s)++)
274
275STATIC bool
e1b825c1 276S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
277{
278 UV val;
279 STRLEN retlen;
f7fe979e 280 const char *from = *s;
08ca2aa3 281 int bad = 0;
f7fe979e 282 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 283 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
284 const bool needs_swap = NEEDS_SWAP(datumtype);
285
228e69a7 286 if (UNLIKELY(needs_swap))
20aa3a38
NC
287 buf += buf_len;
288
08ca2aa3
TH
289 for (;buf_len > 0; buf_len--) {
290 if (from >= end) return FALSE;
f337b084 291 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 292 if (retlen == (STRLEN) -1) {
08ca2aa3
TH
293 from += UTF8SKIP(from);
294 bad |= 1;
295 } else from += retlen;
296 if (val >= 0x100) {
297 bad |= 2;
298 val &= 0xff;
299 }
228e69a7 300 if (UNLIKELY(needs_swap))
20aa3a38
NC
301 *(U8 *)--buf = (U8)val;
302 else
303 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
304 }
305 /* We have enough characters for the buffer. Did we have problems ? */
306 if (bad) {
307 if (bad & 1) {
308 /* Rewalk the string fragment while warning */
f7fe979e 309 const char *ptr;
e1b825c1 310 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
311 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
312 if (ptr >= end) break;
c80e42f3 313 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
314 }
315 if (from > end) from = end;
316 }
a2a5de95
NC
317 if ((bad & 2))
318 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 319 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
320 "Character(s) in '%c' format wrapped in %s",
321 (int) TYPE_NO_MODIFIERS(datumtype),
322 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
323 }
324 *s = from;
325 return TRUE;
326}
327
64844641 328STATIC char *
d21d7215
KW
329S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
330 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 331
228e69a7 332 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
333 const U8 *p = start + len;
334 while (p-- > start) {
55d09dc8 335 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
336 }
337 } else {
338 const U8 * const end = start + len;
339 while (start < end) {
55d09dc8 340 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
341 start++;
342 }
f337b084 343 }
64844641 344 return dest;
f337b084
TH
345}
346
3a88beaa 347#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 348STMT_START { \
228e69a7 349 if (UNLIKELY(utf8)) \
d21d7215 350 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 351 else { \
228e69a7 352 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
353 S_reverse_copy((char *)(buf), cur, len); \
354 else \
355 Copy(buf, cur, len, char); \
230e1fce
NC
356 (cur) += (len); \
357 } \
f337b084
TH
358} STMT_END
359
360#define GROWING(utf8, cat, start, cur, in_len) \
361STMT_START { \
362 STRLEN glen = (in_len); \
3473cf63 363 if (utf8) glen *= UTF8_EXPAND; \
f337b084 364 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 365 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
366 (cur) = (start) + SvCUR(cat); \
367 } \
368} STMT_END
369
370#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
371STMT_START { \
f7fe979e 372 const STRLEN glen = (in_len); \
f337b084 373 STRLEN gl = glen; \
3473cf63 374 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
375 if ((cur) + gl >= (start) + SvLEN(cat)) { \
376 *cur = '\0'; \
b162af07 377 SvCUR_set((cat), (cur) - (start)); \
0bd48802 378 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
379 (cur) = (start) + SvCUR(cat); \
380 } \
3a88beaa 381 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
382} STMT_END
383
384#define PUSH_BYTE(utf8, s, byte) \
385STMT_START { \
386 if (utf8) { \
f7fe979e 387 const U8 au8 = (byte); \
d21d7215 388 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
389 } else *(U8 *)(s)++ = (byte); \
390} STMT_END
391
392/* Only to be used inside a loop (see the break) */
393#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
394STMT_START { \
395 STRLEN retlen; \
396 if (str >= end) break; \
397 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 398 if (retlen == (STRLEN) -1) { \
f337b084
TH
399 *cur = '\0'; \
400 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
401 } \
402 str += retlen; \
403} STMT_END
404
f7fe979e
AL
405static const char *_action( const tempsym_t* symptr )
406{
10edeb5d 407 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
408}
409
206947d2 410/* Returns the sizeof() struct described by pat */
e1b825c1 411STATIC SSize_t
f337b084 412S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 413{
e1b825c1 414 SSize_t total = 0;
206947d2 415
7918f24d
NC
416 PERL_ARGS_ASSERT_MEASURE_STRUCT;
417
49704364 418 while (next_symbol(symptr)) {
e1b825c1 419 SSize_t len, size;
f337b084
TH
420
421 switch (symptr->howlen) {
fc241834 422 case e_star:
49704364 423 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 424 _action( symptr ) );
81d52ecd 425
f337b084
TH
426 default:
427 /* e_no_len and e_number */
428 len = symptr->length;
429 break;
49704364
LW
430 }
431
a7a3cfaa 432 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 433 if (!size) {
e1b825c1 434 SSize_t star;
80a13697
NC
435 /* endianness doesn't influence the size of a type */
436 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
437 default:
438 Perl_croak(aTHX_ "Invalid type '%c' in %s",
439 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 440 _action( symptr ) );
28be1210
TH
441 case '.' | TYPE_IS_SHRIEKING:
442 case '@' | TYPE_IS_SHRIEKING:
80a13697 443 case '@':
28be1210 444 case '.':
80a13697
NC
445 case '/':
446 case 'U': /* XXXX Is it correct? */
447 case 'w':
448 case 'u':
449 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 450 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 451 _action( symptr ) );
80a13697
NC
452 case '%':
453 size = 0;
454 break;
455 case '(':
fc241834
RGS
456 {
457 tempsym_t savsym = *symptr;
458 symptr->patptr = savsym.grpbeg;
459 symptr->patend = savsym.grpend;
460 /* XXXX Theoretically, we need to measure many times at
461 different positions, since the subexpression may contain
462 alignment commands, but be not of aligned length.
463 Need to detect this and croak(). */
464 size = measure_struct(symptr);
465 *symptr = savsym;
466 break;
467 }
80a13697
NC
468 case 'X' | TYPE_IS_SHRIEKING:
469 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
470 */
471 if (!len) /* Avoid division by 0 */
472 len = 1;
473 len = total % len; /* Assumed: the start is aligned. */
924ba076 474 /* FALLTHROUGH */
80a13697
NC
475 case 'X':
476 size = -1;
477 if (total < len)
f7fe979e 478 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
479 break;
480 case 'x' | TYPE_IS_SHRIEKING:
481 if (!len) /* Avoid division by 0 */
482 len = 1;
483 star = total % len; /* Assumed: the start is aligned. */
484 if (star) /* Other portable ways? */
485 len = len - star;
486 else
487 len = 0;
924ba076 488 /* FALLTHROUGH */
80a13697
NC
489 case 'x':
490 case 'A':
491 case 'Z':
492 case 'a':
80a13697
NC
493 size = 1;
494 break;
495 case 'B':
496 case 'b':
497 len = (len + 7)/8;
498 size = 1;
499 break;
500 case 'H':
501 case 'h':
502 len = (len + 1)/2;
503 size = 1;
504 break;
78d46eaa 505
80a13697
NC
506 case 'P':
507 len = 1;
508 size = sizeof(char*);
78d46eaa
NC
509 break;
510 }
206947d2
IZ
511 }
512 total += len * size;
513 }
514 return total;
515}
516
49704364
LW
517
518/* locate matching closing parenthesis or bracket
519 * returns char pointer to char after match, or NULL
520 */
f7fe979e 521STATIC const char *
5aaab254 522S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 523{
7918f24d
NC
524 PERL_ARGS_ASSERT_GROUP_END;
525
49704364 526 while (patptr < patend) {
f7fe979e 527 const char c = *patptr++;
49704364
LW
528
529 if (isSPACE(c))
530 continue;
531 else if (c == ender)
532 return patptr-1;
533 else if (c == '#') {
534 while (patptr < patend && *patptr != '\n')
535 patptr++;
536 continue;
537 } else if (c == '(')
538 patptr = group_end(patptr, patend, ')') + 1;
539 else if (c == '[')
540 patptr = group_end(patptr, patend, ']') + 1;
18529408 541 }
49704364
LW
542 Perl_croak(aTHX_ "No group ending character '%c' found in template",
543 ender);
a25b5927 544 NOT_REACHED; /* NOTREACHED */
18529408
IZ
545}
546
49704364
LW
547
548/* Convert unsigned decimal number to binary.
549 * Expects a pointer to the first digit and address of length variable
550 * Advances char pointer to 1st non-digit char and returns number
fc241834 551 */
f7fe979e 552STATIC const char *
e1b825c1 553S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 554{
e1b825c1 555 SSize_t len = *patptr++ - '0';
7918f24d
NC
556
557 PERL_ARGS_ASSERT_GET_NUM;
558
49704364 559 while (isDIGIT(*patptr)) {
e1b825c1
Z
560 SSize_t nlen = (len * 10) + (*patptr++ - '0');
561 if (nlen < 0 || nlen/10 != len)
49704364 562 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 563 len = nlen;
49704364
LW
564 }
565 *lenptr = len;
566 return patptr;
567}
568
569/* The marvellous template parsing routine: Using state stored in *symptr,
570 * locates next template code and count
571 */
572STATIC bool
f337b084 573S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 574{
f7fe979e 575 const char* patptr = symptr->patptr;
0bcc34c2 576 const char* const patend = symptr->patend;
49704364 577
7918f24d
NC
578 PERL_ARGS_ASSERT_NEXT_SYMBOL;
579
49704364
LW
580 symptr->flags &= ~FLAG_SLASH;
581
582 while (patptr < patend) {
583 if (isSPACE(*patptr))
584 patptr++;
585 else if (*patptr == '#') {
586 patptr++;
587 while (patptr < patend && *patptr != '\n')
588 patptr++;
589 if (patptr < patend)
590 patptr++;
591 } else {
fc241834 592 /* We should have found a template code */
49704364 593 I32 code = *patptr++ & 0xFF;
66c611c5 594 U32 inherited_modifiers = 0;
49704364
LW
595
596 if (code == ','){ /* grandfather in commas but with a warning */
597 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
598 symptr->flags |= FLAG_COMMA;
599 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 600 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
601 }
602 continue;
603 }
fc241834 604
49704364 605 /* for '(', skip to ')' */
fc241834 606 if (code == '(') {
49704364
LW
607 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
608 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 609 _action( symptr ) );
49704364
LW
610 symptr->grpbeg = patptr;
611 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
612 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
613 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 614 _action( symptr ) );
49704364
LW
615 }
616
66c611c5
MHM
617 /* look for group modifiers to inherit */
618 if (TYPE_ENDIANNESS(symptr->flags)) {
619 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
620 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
621 }
622
1109a392
MHM
623 /* look for modifiers */
624 while (patptr < patend) {
b7787f18
AL
625 const char *allowed;
626 I32 modifier;
1109a392
MHM
627 switch (*patptr) {
628 case '!':
629 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 630 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
631 break;
632 case '>':
633 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 634 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
635 break;
636 case '<':
637 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 638 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
639 break;
640 default:
b7787f18
AL
641 allowed = "";
642 modifier = 0;
1109a392
MHM
643 break;
644 }
66c611c5 645
1109a392
MHM
646 if (modifier == 0)
647 break;
66c611c5 648
1109a392
MHM
649 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
650 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 651 allowed, _action( symptr ) );
66c611c5
MHM
652
653 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 654 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 655 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
656 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
657 TYPE_ENDIANNESS_MASK)
658 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 659 *patptr, _action( symptr ) );
66c611c5 660
a2a5de95
NC
661 if ((code & modifier)) {
662 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
663 "Duplicate modifier '%c' after '%c' in %s",
664 *patptr, (int) TYPE_NO_MODIFIERS(code),
665 _action( symptr ) );
1109a392 666 }
66c611c5 667
1109a392
MHM
668 code |= modifier;
669 patptr++;
49704364
LW
670 }
671
66c611c5
MHM
672 /* inherit modifiers */
673 code |= inherited_modifiers;
674
fc241834 675 /* look for count and/or / */
49704364
LW
676 if (patptr < patend) {
677 if (isDIGIT(*patptr)) {
678 patptr = get_num( patptr, &symptr->length );
679 symptr->howlen = e_number;
680
681 } else if (*patptr == '*') {
682 patptr++;
683 symptr->howlen = e_star;
684
685 } else if (*patptr == '[') {
f7fe979e 686 const char* lenptr = ++patptr;
49704364
LW
687 symptr->howlen = e_number;
688 patptr = group_end( patptr, patend, ']' ) + 1;
689 /* what kind of [] is it? */
690 if (isDIGIT(*lenptr)) {
691 lenptr = get_num( lenptr, &symptr->length );
692 if( *lenptr != ']' )
693 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 694 _action( symptr ) );
49704364
LW
695 } else {
696 tempsym_t savsym = *symptr;
697 symptr->patend = patptr-1;
698 symptr->patptr = lenptr;
699 savsym.length = measure_struct(symptr);
700 *symptr = savsym;
701 }
702 } else {
703 symptr->howlen = e_no_len;
704 symptr->length = 1;
705 }
706
707 /* try to find / */
708 while (patptr < patend) {
709 if (isSPACE(*patptr))
710 patptr++;
711 else if (*patptr == '#') {
712 patptr++;
713 while (patptr < patend && *patptr != '\n')
714 patptr++;
715 if (patptr < patend)
716 patptr++;
717 } else {
66c611c5 718 if (*patptr == '/') {
49704364
LW
719 symptr->flags |= FLAG_SLASH;
720 patptr++;
66c611c5
MHM
721 if (patptr < patend &&
722 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 723 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 724 _action( symptr ) );
49704364
LW
725 }
726 break;
727 }
18529408 728 }
49704364
LW
729 } else {
730 /* at end - no count, no / */
731 symptr->howlen = e_no_len;
732 symptr->length = 1;
733 }
734
735 symptr->code = code;
fc241834 736 symptr->patptr = patptr;
49704364 737 return TRUE;
18529408 738 }
49704364 739 }
fc241834 740 symptr->patptr = patptr;
49704364 741 return FALSE;
18529408
IZ
742}
743
18529408 744/*
fc241834 745 There is no way to cleanly handle the case where we should process the
08ca2aa3 746 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
747 (e.g. estimates like strend-s as an upper bound for the number of
748 characters left wouldn't work). So if we foresee the need of this
749 (pattern starts with U or contains U0), we want to work on the encoded
750 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
751 themselves if they need to do a lot of unpacks like this on it
752*/
fc241834 753STATIC bool
08ca2aa3
TH
754need_utf8(const char *pat, const char *patend)
755{
756 bool first = TRUE;
7918f24d
NC
757
758 PERL_ARGS_ASSERT_NEED_UTF8;
759
08ca2aa3
TH
760 while (pat < patend) {
761 if (pat[0] == '#') {
762 pat++;
f7fe979e 763 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
764 if (!pat) return FALSE;
765 } else if (pat[0] == 'U') {
766 if (first || pat[1] == '0') return TRUE;
767 } else first = FALSE;
768 pat++;
769 }
770 return FALSE;
771}
772
773STATIC char
774first_symbol(const char *pat, const char *patend) {
7918f24d
NC
775 PERL_ARGS_ASSERT_FIRST_SYMBOL;
776
08ca2aa3
TH
777 while (pat < patend) {
778 if (pat[0] != '#') return pat[0];
779 pat++;
f7fe979e 780 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
781 if (!pat) return 0;
782 pat++;
783 }
784 return 0;
785}
786
787/*
dcccc8ff
KW
788
789=head1 Pack and Unpack
790
7accc089
JH
791=for apidoc unpackstring
792
796b6530 793The engine implementing the C<unpack()> Perl function.
21ebfc7a 794
796b6530
KW
795Using the template C<pat..patend>, this function unpacks the string
796C<s..strend> into a number of mortal SVs, which it pushes onto the perl
797argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 798C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
799pushed elements.
800
796b6530
KW
801The C<strend> and C<patend> pointers should point to the byte following the
802last character of each string.
21ebfc7a
DM
803
804Although this function returns its values on the perl argument stack, it
805doesn't take any parameters from that stack (and thus in particular
796b6530 806there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 807example).
7accc089
JH
808
809=cut */
810
e1b825c1 811SSize_t
f7fe979e 812Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 813{
f7fe979e 814 tempsym_t sym;
08ca2aa3 815
7918f24d
NC
816 PERL_ARGS_ASSERT_UNPACKSTRING;
817
f337b084 818 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
819 else if (need_utf8(pat, patend)) {
820 /* We probably should try to avoid this in case a scalar context call
821 wouldn't get to the "U0" */
822 STRLEN len = strend - s;
230e1fce 823 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
824 SAVEFREEPV(s);
825 strend = s + len;
f337b084 826 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
827 }
828
f337b084
TH
829 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
830 flags |= FLAG_PARSE_UTF8;
08ca2aa3 831
f7fe979e 832 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
833
834 return unpack_rec(&sym, s, s, strend, NULL );
835}
836
e1b825c1 837STATIC SSize_t
f7fe979e 838S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 839{
20b7effb 840 dSP;
3297d27d 841 SV *sv = NULL;
e1b825c1 842 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 843 howlen_t howlen;
e1b825c1 844 SSize_t checksum = 0;
92d41999 845 UV cuv = 0;
a6ec74c1 846 NV cdouble = 0.0;
e1b825c1 847 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 848 bool beyond = FALSE;
21c16052 849 bool explicit_length;
9e27e96a 850 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 851 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
852
853 PERL_ARGS_ASSERT_UNPACK_REC;
854
28be1210 855 symptr->strbeg = s - strbeg;
49704364 856
49704364 857 while (next_symbol(symptr)) {
a7a3cfaa 858 packprops_t props;
e1b825c1 859 SSize_t len;
f337b084 860 I32 datumtype = symptr->code;
a1219b5e 861 bool needs_swap;
206947d2 862 /* do first one only unless in list context
08ca2aa3 863 / is implemented by unpacking the count, then popping it from the
206947d2 864 stack, so must check that we're not in the middle of a / */
49704364 865 if ( unpack_only_one
206947d2 866 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 867 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 868 break;
49704364 869
f337b084 870 switch (howlen = symptr->howlen) {
fc241834
RGS
871 case e_star:
872 len = strend - strbeg; /* long enough */
49704364 873 break;
f337b084
TH
874 default:
875 /* e_no_len and e_number */
876 len = symptr->length;
877 break;
49704364 878 }
18529408 879
21c16052 880 explicit_length = TRUE;
a6ec74c1 881 redo_switch:
49704364 882 beyond = s >= strend;
a7a3cfaa
TH
883
884 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
885 if (props) {
886 /* props nonzero means we can process this letter. */
e1b825c1
Z
887 const SSize_t size = props & PACK_SIZE_MASK;
888 const SSize_t howmany = (strend - s) / size;
a7a3cfaa
TH
889 if (len > howmany)
890 len = howmany;
891
892 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
893 if (len && unpack_only_one) len = 1;
894 EXTEND(SP, len);
895 EXTEND_MORTAL(len);
78d46eaa
NC
896 }
897 }
a7a3cfaa 898
a1219b5e
NC
899 needs_swap = NEEDS_SWAP(datumtype);
900
1109a392 901 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 902 default:
1109a392 903 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 904
a6ec74c1 905 case '%':
49704364 906 if (howlen == e_no_len)
18529408 907 len = 16; /* len is not specified */
a6ec74c1 908 checksum = len;
92d41999 909 cuv = 0;
a6ec74c1 910 cdouble = 0;
18529408 911 continue;
81d52ecd 912
18529408
IZ
913 case '(':
914 {
49704364 915 tempsym_t savsym = *symptr;
9e27e96a 916 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 917 symptr->flags |= group_modifiers;
49704364 918 symptr->patend = savsym.grpend;
28be1210 919 symptr->previous = &savsym;
49704364 920 symptr->level++;
18529408 921 PUTBACK;
c6f750d1 922 if (len && unpack_only_one) len = 1;
18529408 923 while (len--) {
49704364 924 symptr->patptr = savsym.grpbeg;
f337b084
TH
925 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
926 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
927 unpack_rec(symptr, s, strbeg, strend, &s);
928 if (s == strend && savsym.howlen == e_star)
49704364 929 break; /* No way to continue */
18529408
IZ
930 }
931 SPAGAIN;
28be1210 932 savsym.flags = symptr->flags & ~group_modifiers;
49704364 933 *symptr = savsym;
18529408
IZ
934 break;
935 }
28be1210 936 case '.' | TYPE_IS_SHRIEKING:
28be1210 937 case '.': {
9e27e96a 938 const char *from;
28be1210 939 SV *sv;
9e27e96a 940 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
941 if (howlen == e_star) from = strbeg;
942 else if (len <= 0) from = s;
943 else {
944 tempsym_t *group = symptr;
945
946 while (--len && group) group = group->previous;
947 from = group ? strbeg + group->strbeg : strbeg;
948 }
949 sv = from <= s ?
00646304
CB
950 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
951 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 952 mXPUSHs(sv);
28be1210
TH
953 break;
954 }
28be1210 955 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 956 case '@':
28be1210 957 s = strbeg + symptr->strbeg;
28be1210 958 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 959 {
08ca2aa3
TH
960 while (len > 0) {
961 if (s >= strend)
962 Perl_croak(aTHX_ "'@' outside of string in unpack");
963 s += UTF8SKIP(s);
964 len--;
965 }
966 if (s > strend)
967 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
968 } else {
28be1210 969 if (strend-s < len)
fc241834 970 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 971 s += len;
08ca2aa3 972 }
a6ec74c1 973 break;
62f95557
IZ
974 case 'X' | TYPE_IS_SHRIEKING:
975 if (!len) /* Avoid division by 0 */
976 len = 1;
08ca2aa3 977 if (utf8) {
f7fe979e 978 const char *hop, *last;
e1b825c1 979 SSize_t l = len;
f337b084
TH
980 hop = last = strbeg;
981 while (hop < s) {
982 hop += UTF8SKIP(hop);
983 if (--l == 0) {
08ca2aa3 984 last = hop;
f337b084
TH
985 l = len;
986 }
fc241834 987 }
f337b084
TH
988 if (last > s)
989 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
990 s = last;
991 break;
f337b084
TH
992 }
993 len = (s - strbeg) % len;
924ba076 994 /* FALLTHROUGH */
a6ec74c1 995 case 'X':
08ca2aa3
TH
996 if (utf8) {
997 while (len > 0) {
998 if (s <= strbeg)
999 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1000 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1001 if (s <= strbeg)
1002 Perl_croak(aTHX_ "'X' outside of string in unpack");
1003 }
1004 len--;
1005 }
1006 } else {
fc241834
RGS
1007 if (len > s - strbeg)
1008 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1009 s -= len;
08ca2aa3 1010 }
a6ec74c1 1011 break;
9e27e96a 1012 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1013 SSize_t ai32;
62f95557
IZ
1014 if (!len) /* Avoid division by 0 */
1015 len = 1;
230e1fce
NC
1016 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1017 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1018 if (ai32 == 0) break;
1019 len -= ai32;
9e27e96a 1020 }
924ba076 1021 /* FALLTHROUGH */
a6ec74c1 1022 case 'x':
08ca2aa3
TH
1023 if (utf8) {
1024 while (len>0) {
1025 if (s >= strend)
1026 Perl_croak(aTHX_ "'x' outside of string in unpack");
1027 s += UTF8SKIP(s);
1028 len--;
1029 }
1030 } else {
fc241834
RGS
1031 if (len > strend - s)
1032 Perl_croak(aTHX_ "'x' outside of string in unpack");
1033 s += len;
f337b084 1034 }
a6ec74c1
JH
1035 break;
1036 case '/':
49704364 1037 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1038
a6ec74c1
JH
1039 case 'A':
1040 case 'Z':
1041 case 'a':
08ca2aa3
TH
1042 if (checksum) {
1043 /* Preliminary length estimate is assumed done in 'W' */
1044 if (len > strend - s) len = strend - s;
1045 goto W_checksum;
1046 }
1047 if (utf8) {
e1b825c1 1048 SSize_t l;
f7fe979e 1049 const char *hop;
08ca2aa3
TH
1050 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1051 if (hop >= strend) {
1052 if (hop > strend)
1053 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1054 break;
fc241834 1055 }
a6ec74c1 1056 }
08ca2aa3
TH
1057 if (hop > strend)
1058 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1059 len = hop - s;
1060 } else if (len > strend - s)
1061 len = strend - s;
1062
1063 if (datumtype == 'Z') {
1064 /* 'Z' strips stuff after first null */
f7fe979e 1065 const char *ptr, *end;
f337b084
TH
1066 end = s + len;
1067 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1068 sv = newSVpvn(s, ptr-s);
1069 if (howlen == e_star) /* exact for 'Z*' */
1070 len = ptr-s + (ptr != strend ? 1 : 0);
1071 } else if (datumtype == 'A') {
1072 /* 'A' strips both nulls and spaces */
f7fe979e 1073 const char *ptr;
18bdf90a 1074 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1075 for (ptr = s+len-1; ptr >= s; ptr--) {
1076 if ( *ptr != 0
1077 && !UTF8_IS_CONTINUATION(*ptr)
1078 && !isSPACE_utf8_safe(ptr, strend))
1079 {
1080 break;
1081 }
1082 }
18bdf90a
TH
1083 if (ptr >= s) ptr += UTF8SKIP(ptr);
1084 else ptr++;
28be1210 1085 if (ptr > s+len)
18bdf90a
TH
1086 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1087 } else {
1088 for (ptr = s+len-1; ptr >= s; ptr--)
1089 if (*ptr != 0 && !isSPACE(*ptr)) break;
1090 ptr++;
1091 }
08ca2aa3
TH
1092 sv = newSVpvn(s, ptr-s);
1093 } else sv = newSVpvn(s, len);
1094
1095 if (utf8) {
1096 SvUTF8_on(sv);
1097 /* Undo any upgrade done due to need_utf8() */
f337b084 1098 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1099 sv_utf8_downgrade(sv, 0);
a6ec74c1 1100 }
6e449a3a 1101 mXPUSHs(sv);
08ca2aa3 1102 s += len;
a6ec74c1
JH
1103 break;
1104 case 'B':
08ca2aa3
TH
1105 case 'b': {
1106 char *str;
49704364 1107 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1108 len = (strend - s) * 8;
1109 if (checksum) {
f337b084 1110 if (utf8)
08ca2aa3 1111 while (len >= 8 && s < strend) {
9df874cd 1112 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1113 len -= 8;
1114 }
f337b084 1115 else
fc241834 1116 while (len >= 8) {
08ca2aa3 1117 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1118 len -= 8;
1119 }
08ca2aa3
TH
1120 if (len && s < strend) {
1121 U8 bits;
f337b084
TH
1122 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1123 if (datumtype == 'b')
a6ec74c1 1124 while (len-- > 0) {
92d41999 1125 if (bits & 1) cuv++;
a6ec74c1
JH
1126 bits >>= 1;
1127 }
f337b084 1128 else
a6ec74c1 1129 while (len-- > 0) {
08ca2aa3 1130 if (bits & 0x80) cuv++;
a6ec74c1
JH
1131 bits <<= 1;
1132 }
fc241834 1133 }
a6ec74c1
JH
1134 break;
1135 }
08ca2aa3 1136
561b68a9 1137 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1138 SvPOK_on(sv);
1139 str = SvPVX(sv);
1140 if (datumtype == 'b') {
f337b084 1141 U8 bits = 0;
e1b825c1 1142 const SSize_t ai32 = len;
08ca2aa3
TH
1143 for (len = 0; len < ai32; len++) {
1144 if (len & 7) bits >>= 1;
1145 else if (utf8) {
1146 if (s >= strend) break;
9df874cd 1147 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1148 } else bits = *(U8 *) s++;
1149 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1150 }
08ca2aa3 1151 } else {
f337b084 1152 U8 bits = 0;
e1b825c1 1153 const SSize_t ai32 = len;
08ca2aa3
TH
1154 for (len = 0; len < ai32; len++) {
1155 if (len & 7) bits <<= 1;
1156 else if (utf8) {
1157 if (s >= strend) break;
9df874cd 1158 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1159 } else bits = *(U8 *) s++;
1160 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1161 }
1162 }
1163 *str = '\0';
aa07b2f6 1164 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1165 XPUSHs(sv);
a6ec74c1 1166 break;
08ca2aa3 1167 }
a6ec74c1 1168 case 'H':
08ca2aa3 1169 case 'h': {
3297d27d 1170 char *str = NULL;
fc241834 1171 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1172 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1173 len = (strend - s) * 2;
858fe5e1
TC
1174 if (!checksum) {
1175 sv = sv_2mortal(newSV(len ? len : 1));
1176 SvPOK_on(sv);
1177 str = SvPVX(sv);
1178 }
a6ec74c1 1179 if (datumtype == 'h') {
f337b084 1180 U8 bits = 0;
e1b825c1 1181 SSize_t ai32 = len;
fc241834
RGS
1182 for (len = 0; len < ai32; len++) {
1183 if (len & 1) bits >>= 4;
1184 else if (utf8) {
1185 if (s >= strend) break;
9df874cd 1186 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1187 } else bits = * (U8 *) s++;
858fe5e1
TC
1188 if (!checksum)
1189 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1190 }
08ca2aa3 1191 } else {
f337b084 1192 U8 bits = 0;
e1b825c1 1193 const SSize_t ai32 = len;
08ca2aa3
TH
1194 for (len = 0; len < ai32; len++) {
1195 if (len & 1) bits <<= 4;
1196 else if (utf8) {
1197 if (s >= strend) break;
9df874cd 1198 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1199 } else bits = *(U8 *) s++;
858fe5e1
TC
1200 if (!checksum)
1201 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1202 }
1203 }
858fe5e1
TC
1204 if (!checksum) {
1205 *str = '\0';
1206 SvCUR_set(sv, str - SvPVX_const(sv));
1207 XPUSHs(sv);
1208 }
a6ec74c1 1209 break;
08ca2aa3 1210 }
1651fc44
ML
1211 case 'C':
1212 if (len == 0) {
1213 if (explicit_length)
1214 /* Switch to "character" mode */
1215 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1216 break;
1217 }
924ba076 1218 /* FALLTHROUGH */
a6ec74c1 1219 case 'c':
1651fc44
ML
1220 while (len-- > 0 && s < strend) {
1221 int aint;
1222 if (utf8)
1223 {
1224 STRLEN retlen;
1225 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1226 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1227 if (retlen == (STRLEN) -1)
1651fc44
ML
1228 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1229 s += retlen;
1230 }
1231 else
1232 aint = *(U8 *)(s)++;
1233 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1234 aint -= 256;
08ca2aa3 1235 if (!checksum)
6e449a3a 1236 mPUSHi(aint);
73cb7263
NC
1237 else if (checksum > bits_in_uv)
1238 cdouble += (NV)aint;
1239 else
1240 cuv += aint;
a6ec74c1
JH
1241 }
1242 break;
08ca2aa3
TH
1243 case 'W':
1244 W_checksum:
1651fc44 1245 if (utf8) {
08ca2aa3 1246 while (len-- > 0 && s < strend) {
08ca2aa3 1247 STRLEN retlen;
f7fe979e 1248 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1249 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1250 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1251 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1252 s += retlen;
1253 if (!checksum)
6e449a3a 1254 mPUSHu(val);
08ca2aa3
TH
1255 else if (checksum > bits_in_uv)
1256 cdouble += (NV) val;
d6d3e8bd 1257 else
08ca2aa3 1258 cuv += val;
fc241834 1259 }
08ca2aa3 1260 } else if (!checksum)
a6ec74c1 1261 while (len-- > 0) {
f7fe979e 1262 const U8 ch = *(U8 *) s++;
6e449a3a 1263 mPUSHu(ch);
a6ec74c1 1264 }
08ca2aa3
TH
1265 else if (checksum > bits_in_uv)
1266 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1267 else
1268 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1269 break;
1270 case 'U':
35bcd338 1271 if (len == 0) {
c5333953 1272 if (explicit_length && howlen != e_star) {
08ca2aa3 1273 /* Switch to "bytes in UTF-8" mode */
f337b084 1274 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1275 else
1276 /* Should be impossible due to the need_utf8() test */
1277 Perl_croak(aTHX_ "U0 mode on a byte string");
1278 }
35bcd338
JH
1279 break;
1280 }
08ca2aa3 1281 if (len > strend - s) len = strend - s;
fc241834 1282 if (!checksum) {
08ca2aa3
TH
1283 if (len && unpack_only_one) len = 1;
1284 EXTEND(SP, len);
1285 EXTEND_MORTAL(len);
fc241834 1286 }
08ca2aa3
TH
1287 while (len-- > 0 && s < strend) {
1288 STRLEN retlen;
1289 UV auv;
1290 if (utf8) {
81b3f640 1291 U8 result[UTF8_MAXLEN+1];
f7fe979e 1292 const char *ptr = s;
08ca2aa3 1293 STRLEN len;
08ca2aa3
TH
1294 /* Bug: warns about bad utf8 even if we are short on bytes
1295 and will break out of the loop */
9df874cd 1296 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1297 'U'))
08ca2aa3
TH
1298 break;
1299 len = UTF8SKIP(result);
9df874cd 1300 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1301 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1302 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1303 len,
1304 &retlen,
1305 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1306 s = ptr;
1307 } else {
3ece276e
KW
1308 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1309 strend - s,
1310 &retlen,
1311 UTF8_ALLOW_DEFAULT));
df337714 1312 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1313 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1314 s += retlen;
1315 }
1316 if (!checksum)
6e449a3a 1317 mPUSHu(auv);
73cb7263 1318 else if (checksum > bits_in_uv)
08ca2aa3 1319 cdouble += (NV) auv;
73cb7263 1320 else
08ca2aa3 1321 cuv += auv;
a6ec74c1
JH
1322 }
1323 break;
49704364
LW
1324 case 's' | TYPE_IS_SHRIEKING:
1325#if SHORTSIZE != SIZE16
73cb7263 1326 while (len-- > 0) {
08ca2aa3 1327 short ashort;
aaec8192 1328 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1329 if (!checksum)
6e449a3a 1330 mPUSHi(ashort);
73cb7263
NC
1331 else if (checksum > bits_in_uv)
1332 cdouble += (NV)ashort;
1333 else
1334 cuv += ashort;
49704364
LW
1335 }
1336 break;
1337#else
924ba076 1338 /* FALLTHROUGH */
a6ec74c1 1339#endif
49704364 1340 case 's':
73cb7263 1341 while (len-- > 0) {
08ca2aa3
TH
1342 I16 ai16;
1343
1344#if U16SIZE > SIZE16
1345 ai16 = 0;
1346#endif
aaec8192 1347 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1348#if U16SIZE > SIZE16
73cb7263
NC
1349 if (ai16 > 32767)
1350 ai16 -= 65536;
a6ec74c1 1351#endif
08ca2aa3 1352 if (!checksum)
6e449a3a 1353 mPUSHi(ai16);
73cb7263
NC
1354 else if (checksum > bits_in_uv)
1355 cdouble += (NV)ai16;
1356 else
1357 cuv += ai16;
a6ec74c1
JH
1358 }
1359 break;
49704364
LW
1360 case 'S' | TYPE_IS_SHRIEKING:
1361#if SHORTSIZE != SIZE16
73cb7263 1362 while (len-- > 0) {
08ca2aa3 1363 unsigned short aushort;
aaec8192
NC
1364 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1365 needs_swap);
08ca2aa3 1366 if (!checksum)
6e449a3a 1367 mPUSHu(aushort);
73cb7263
NC
1368 else if (checksum > bits_in_uv)
1369 cdouble += (NV)aushort;
1370 else
1371 cuv += aushort;
49704364
LW
1372 }
1373 break;
1374#else
924ba076 1375 /* FALLTHROUGH */
49704364 1376#endif
a6ec74c1
JH
1377 case 'v':
1378 case 'n':
1379 case 'S':
73cb7263 1380 while (len-- > 0) {
08ca2aa3
TH
1381 U16 au16;
1382#if U16SIZE > SIZE16
1383 au16 = 0;
1384#endif
aaec8192 1385 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1386 if (datumtype == 'n')
1387 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1388 if (datumtype == 'v')
1389 au16 = vtohs(au16);
08ca2aa3 1390 if (!checksum)
6e449a3a 1391 mPUSHu(au16);
73cb7263 1392 else if (checksum > bits_in_uv)
f337b084 1393 cdouble += (NV) au16;
73cb7263
NC
1394 else
1395 cuv += au16;
a6ec74c1
JH
1396 }
1397 break;
068bd2e7
MHM
1398 case 'v' | TYPE_IS_SHRIEKING:
1399 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1400 while (len-- > 0) {
08ca2aa3
TH
1401 I16 ai16;
1402# if U16SIZE > SIZE16
1403 ai16 = 0;
1404# endif
aaec8192 1405 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1406 /* There should never be any byte-swapping here. */
1407 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1408 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1409 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1410 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1411 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1412 if (!checksum)
6e449a3a 1413 mPUSHi(ai16);
73cb7263 1414 else if (checksum > bits_in_uv)
08ca2aa3 1415 cdouble += (NV) ai16;
73cb7263
NC
1416 else
1417 cuv += ai16;
068bd2e7
MHM
1418 }
1419 break;
a6ec74c1 1420 case 'i':
49704364 1421 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1422 while (len-- > 0) {
08ca2aa3 1423 int aint;
aaec8192 1424 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1425 if (!checksum)
6e449a3a 1426 mPUSHi(aint);
73cb7263
NC
1427 else if (checksum > bits_in_uv)
1428 cdouble += (NV)aint;
1429 else
1430 cuv += aint;
a6ec74c1
JH
1431 }
1432 break;
1433 case 'I':
49704364 1434 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1435 while (len-- > 0) {
08ca2aa3 1436 unsigned int auint;
aaec8192 1437 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1438 if (!checksum)
6e449a3a 1439 mPUSHu(auint);
73cb7263
NC
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)auint;
1442 else
1443 cuv += auint;
a6ec74c1
JH
1444 }
1445 break;
92d41999 1446 case 'j':
73cb7263 1447 while (len-- > 0) {
08ca2aa3 1448 IV aiv;
aaec8192 1449 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1450 if (!checksum)
6e449a3a 1451 mPUSHi(aiv);
73cb7263
NC
1452 else if (checksum > bits_in_uv)
1453 cdouble += (NV)aiv;
1454 else
1455 cuv += aiv;
92d41999
JH
1456 }
1457 break;
1458 case 'J':
73cb7263 1459 while (len-- > 0) {
08ca2aa3 1460 UV auv;
aaec8192 1461 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1462 if (!checksum)
6e449a3a 1463 mPUSHu(auv);
73cb7263
NC
1464 else if (checksum > bits_in_uv)
1465 cdouble += (NV)auv;
1466 else
1467 cuv += auv;
92d41999
JH
1468 }
1469 break;
49704364
LW
1470 case 'l' | TYPE_IS_SHRIEKING:
1471#if LONGSIZE != SIZE32
73cb7263 1472 while (len-- > 0) {
08ca2aa3 1473 long along;
aaec8192 1474 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1475 if (!checksum)
6e449a3a 1476 mPUSHi(along);
73cb7263
NC
1477 else if (checksum > bits_in_uv)
1478 cdouble += (NV)along;
1479 else
1480 cuv += along;
49704364
LW
1481 }
1482 break;
1483#else
924ba076 1484 /* FALLTHROUGH */
a6ec74c1 1485#endif
49704364 1486 case 'l':
73cb7263 1487 while (len-- > 0) {
08ca2aa3
TH
1488 I32 ai32;
1489#if U32SIZE > SIZE32
1490 ai32 = 0;
1491#endif
aaec8192 1492 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1493#if U32SIZE > SIZE32
08ca2aa3 1494 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1495#endif
08ca2aa3 1496 if (!checksum)
6e449a3a 1497 mPUSHi(ai32);
73cb7263
NC
1498 else if (checksum > bits_in_uv)
1499 cdouble += (NV)ai32;
1500 else
1501 cuv += ai32;
a6ec74c1
JH
1502 }
1503 break;
49704364
LW
1504 case 'L' | TYPE_IS_SHRIEKING:
1505#if LONGSIZE != SIZE32
73cb7263 1506 while (len-- > 0) {
08ca2aa3 1507 unsigned long aulong;
aaec8192 1508 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1509 if (!checksum)
6e449a3a 1510 mPUSHu(aulong);
73cb7263
NC
1511 else if (checksum > bits_in_uv)
1512 cdouble += (NV)aulong;
1513 else
1514 cuv += aulong;
49704364
LW
1515 }
1516 break;
1517#else
924ba076 1518 /* FALLTHROUGH */
49704364 1519#endif
a6ec74c1
JH
1520 case 'V':
1521 case 'N':
1522 case 'L':
73cb7263 1523 while (len-- > 0) {
08ca2aa3
TH
1524 U32 au32;
1525#if U32SIZE > SIZE32
1526 au32 = 0;
1527#endif
aaec8192 1528 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1529 if (datumtype == 'N')
1530 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1531 if (datumtype == 'V')
1532 au32 = vtohl(au32);
08ca2aa3 1533 if (!checksum)
6e449a3a 1534 mPUSHu(au32);
fc241834
RGS
1535 else if (checksum > bits_in_uv)
1536 cdouble += (NV)au32;
1537 else
1538 cuv += au32;
a6ec74c1
JH
1539 }
1540 break;
068bd2e7
MHM
1541 case 'V' | TYPE_IS_SHRIEKING:
1542 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1543 while (len-- > 0) {
08ca2aa3 1544 I32 ai32;
f8e5a5db 1545#if U32SIZE > SIZE32
08ca2aa3 1546 ai32 = 0;
f8e5a5db 1547#endif
aaec8192 1548 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1549 /* There should never be any byte swapping here. */
1550 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1551 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1552 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1553 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1554 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1555 if (!checksum)
6e449a3a 1556 mPUSHi(ai32);
73cb7263
NC
1557 else if (checksum > bits_in_uv)
1558 cdouble += (NV)ai32;
1559 else
1560 cuv += ai32;
068bd2e7
MHM
1561 }
1562 break;
a6ec74c1 1563 case 'p':
a6ec74c1 1564 while (len-- > 0) {
f7fe979e 1565 const char *aptr;
aaec8192 1566 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1567 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1568 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1569 }
1570 break;
1571 case 'w':
a6ec74c1
JH
1572 {
1573 UV auv = 0;
e1b825c1 1574 size_t bytes = 0;
fc241834 1575
08ca2aa3
TH
1576 while (len > 0 && s < strend) {
1577 U8 ch;
f337b084 1578 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1579 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1580 /* UTF8_IS_XXXXX not right here because this is a BER, not
1581 * UTF-8 format - using constant 0x80 */
08ca2aa3 1582 if (ch < 0x80) {
a6ec74c1 1583 bytes = 0;
6e449a3a 1584 mPUSHu(auv);
a6ec74c1
JH
1585 len--;
1586 auv = 0;
08ca2aa3 1587 continue;
a6ec74c1 1588 }
08ca2aa3 1589 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1590 const char *t;
a6ec74c1 1591
147e3846
KW
1592 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1593 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1594 while (s < strend) {
f337b084 1595 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1596 sv = mul128(sv, (U8)(ch & 0x7f));
1597 if (!(ch & 0x80)) {
a6ec74c1
JH
1598 bytes = 0;
1599 break;
1600 }
1601 }
10516c54 1602 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1603 while (*t == '0')
1604 t++;
1605 sv_chop(sv, t);
6e449a3a 1606 mPUSHs(sv);
a6ec74c1
JH
1607 len--;
1608 auv = 0;
1609 }
1610 }
1611 if ((s >= strend) && bytes)
49704364 1612 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1613 }
1614 break;
1615 case 'P':
49704364
LW
1616 if (symptr->howlen == e_star)
1617 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1618 EXTEND(SP, 1);
2d3e0934 1619 if (s + sizeof(char*) <= strend) {
08ca2aa3 1620 char *aptr;
aaec8192 1621 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1622 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1623 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1624 }
a6ec74c1 1625 break;
c174bf3b 1626#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1627 case 'q':
73cb7263 1628 while (len-- > 0) {
08ca2aa3 1629 Quad_t aquad;
aaec8192 1630 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1631 if (!checksum)
c174bf3b 1632 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV)aquad;
1635 else
1636 cuv += aquad;
1637 }
a6ec74c1
JH
1638 break;
1639 case 'Q':
73cb7263 1640 while (len-- > 0) {
08ca2aa3 1641 Uquad_t auquad;
aaec8192 1642 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1643 if (!checksum)
c174bf3b 1644 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)auquad;
1647 else
1648 cuv += auquad;
a6ec74c1
JH
1649 }
1650 break;
1640b983 1651#endif
a6ec74c1
JH
1652 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1653 case 'f':
73cb7263 1654 while (len-- > 0) {
08ca2aa3 1655 float afloat;
aaec8192 1656 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1657 if (!checksum)
6e449a3a 1658 mPUSHn(afloat);
08ca2aa3 1659 else
73cb7263 1660 cdouble += afloat;
fc241834 1661 }
a6ec74c1
JH
1662 break;
1663 case 'd':
73cb7263 1664 while (len-- > 0) {
08ca2aa3 1665 double adouble;
aaec8192 1666 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1667 if (!checksum)
6e449a3a 1668 mPUSHn(adouble);
08ca2aa3 1669 else
73cb7263 1670 cdouble += adouble;
fc241834 1671 }
a6ec74c1 1672 break;
92d41999 1673 case 'F':
73cb7263 1674 while (len-- > 0) {
275663fa 1675 NV_bytes anv;
aaec8192
NC
1676 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1677 datumtype, needs_swap);
08ca2aa3 1678 if (!checksum)
275663fa 1679 mPUSHn(anv.nv);
08ca2aa3 1680 else
275663fa 1681 cdouble += anv.nv;
fc241834 1682 }
92d41999
JH
1683 break;
1684#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1685 case 'D':
73cb7263 1686 while (len-- > 0) {
275663fa 1687 ld_bytes aldouble;
aaec8192
NC
1688 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1689 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1690 /* The most common long double format, the x86 80-bit
1691 * extended precision, has either 2 or 6 unused bytes,
1692 * which may contain garbage, which may contain
1693 * unintentional data. While we do zero the bytes of
1694 * the long double data in pack(), here in unpack() we
1695 * don't, because it's really hard to envision that
1696 * reading the long double off aldouble would be
e075ae47 1697 * affected by the unused bytes.
42262fd3
JH
1698 *
1699 * Note that trying to unpack 'long doubles' of 'long
1700 * doubles' packed in another system is in the general
1701 * case doomed without having more detail. */
08ca2aa3 1702 if (!checksum)
275663fa 1703 mPUSHn(aldouble.ld);
08ca2aa3 1704 else
275663fa 1705 cdouble += aldouble.ld;
92d41999
JH
1706 }
1707 break;
1708#endif
a6ec74c1 1709 case 'u':
858fe5e1 1710 if (!checksum) {
f7fe979e 1711 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1712 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1713 if (l) SvPOK_on(sv);
1714 }
08ca2aa3 1715
bee81d36
KW
1716 /* Note that all legal uuencoded strings are ASCII printables, so
1717 * have the same representation under UTF-8 vs not. This means we
1718 * can ignore UTF8ness on legal input. For illegal we stop at the
1719 * first failure, and don't report where/what that is, so again we
1720 * can ignore UTF8ness */
1721
9233b56b
KW
1722 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1723 I32 a, b, c, d;
1724 char hunk[3];
1725
1726 len = PL_uudmap[*(U8*)s++] & 077;
1727 while (len > 0) {
1728 if (s < strend && ISUUCHAR(*s))
1729 a = PL_uudmap[*(U8*)s++] & 077;
1730 else
1731 a = 0;
1732 if (s < strend && ISUUCHAR(*s))
1733 b = PL_uudmap[*(U8*)s++] & 077;
1734 else
1735 b = 0;
1736 if (s < strend && ISUUCHAR(*s))
1737 c = PL_uudmap[*(U8*)s++] & 077;
1738 else
1739 c = 0;
1740 if (s < strend && ISUUCHAR(*s))
1741 d = PL_uudmap[*(U8*)s++] & 077;
1742 else
1743 d = 0;
1744 hunk[0] = (char)((a << 2) | (b >> 4));
1745 hunk[1] = (char)((b << 4) | (c >> 2));
1746 hunk[2] = (char)((c << 6) | d);
1747 if (!checksum)
1748 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1749 len -= 3;
1750 }
1751 if (*s == '\n')
1752 s++;
1753 else /* possible checksum byte */
1754 if (s + 1 < strend && s[1] == '\n')
1755 s += 2;
1756 }
858fe5e1
TC
1757 if (!checksum)
1758 XPUSHs(sv);
a6ec74c1 1759 break;
99f862a1 1760 } /* End of switch */
49704364 1761
a6ec74c1 1762 if (checksum) {
1109a392 1763 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1764 (checksum > bits_in_uv &&
08ca2aa3
TH
1765 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1766 NV trouble, anv;
a6ec74c1 1767
08ca2aa3 1768 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1769 while (checksum >= 16) {
1770 checksum -= 16;
08ca2aa3 1771 anv *= 65536.0;
a6ec74c1 1772 }
a6ec74c1 1773 while (cdouble < 0.0)
08ca2aa3 1774 cdouble += anv;
09b94b1f
S
1775 cdouble = Perl_modf(cdouble / anv, &trouble);
1776#ifdef LONGDOUBLE_DOUBLEDOUBLE
1777 /* Workaround for powerpc doubledouble modfl bug:
1778 * close to 1.0L and -1.0L cdouble is 0, and trouble
1779 * is cdouble / anv. */
1780 if (trouble != Perl_ceil(trouble)) {
1781 cdouble = trouble;
1782 if (cdouble > 1.0L) cdouble -= 1.0L;
1783 if (cdouble < -1.0L) cdouble += 1.0L;
1784 }
1785#endif
1786 cdouble *= anv;
c4c5f44a 1787 sv = newSVnv(cdouble);
a6ec74c1
JH
1788 }
1789 else {
fa8ec7c1
NC
1790 if (checksum < bits_in_uv) {
1791 UV mask = ((UV)1 << checksum) - 1;
92d41999 1792 cuv &= mask;
a6ec74c1 1793 }
c4c5f44a 1794 sv = newSVuv(cuv);
a6ec74c1 1795 }
6e449a3a 1796 mXPUSHs(sv);
a6ec74c1
JH
1797 checksum = 0;
1798 }
fc241834 1799
49704364
LW
1800 if (symptr->flags & FLAG_SLASH){
1801 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1802 break;
49704364
LW
1803 if( next_symbol(symptr) ){
1804 if( symptr->howlen == e_number )
1805 Perl_croak(aTHX_ "Count after length/code in unpack" );
1806 if( beyond ){
1807 /* ...end of char buffer then no decent length available */
1808 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1809 } else {
1810 /* take top of stack (hope it's numeric) */
1811 len = POPi;
1812 if( len < 0 )
1813 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1814 }
1815 } else {
1816 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1817 }
1818 datumtype = symptr->code;
21c16052 1819 explicit_length = FALSE;
49704364
LW
1820 goto redo_switch;
1821 }
a6ec74c1 1822 }
49704364 1823
18529408
IZ
1824 if (new_s)
1825 *new_s = s;
1826 PUTBACK;
1827 return SP - PL_stack_base - start_sp_offset;
1828}
1829
1830PP(pp_unpack)
1831{
1832 dSP;
bab9c0ac 1833 dPOPPOPssrl;
1c23e2bd 1834 U8 gimme = GIMME_V;
18529408
IZ
1835 STRLEN llen;
1836 STRLEN rlen;
5c144d81
NC
1837 const char *pat = SvPV_const(left, llen);
1838 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1839 const char *strend = s + rlen;
1840 const char *patend = pat + llen;
e1b825c1 1841 SSize_t cnt;
18529408
IZ
1842
1843 PUTBACK;
7accc089 1844 cnt = unpackstring(pat, patend, s, strend,
49704364 1845 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1846 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1847
18529408
IZ
1848 SPAGAIN;
1849 if ( !cnt && gimme == G_SCALAR )
1850 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1851 RETURN;
1852}
1853
f337b084 1854STATIC U8 *
e1b825c1 1855doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1856{
f337b084 1857 *h++ = PL_uuemap[len];
a6ec74c1 1858 while (len > 2) {
f337b084
TH
1859 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1860 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1861 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1862 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1863 s += 3;
1864 len -= 3;
1865 }
1866 if (len > 0) {
e68aed92 1867 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1868 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1869 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1870 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1871 *h++ = PL_uuemap[0];
a6ec74c1 1872 }
f337b084
TH
1873 *h++ = '\n';
1874 return h;
a6ec74c1
JH
1875}
1876
1877STATIC SV *
f7fe979e 1878S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1879{
8b6e33c7
AL
1880 SV *result = newSVpvn(s, l);
1881 char *const result_c = SvPV_nolen(result); /* convenience */
1882 char *out = result_c;
1883 bool skip = 1;
1884 bool ignore = 0;
a6ec74c1 1885
7918f24d
NC
1886 PERL_ARGS_ASSERT_IS_AN_INT;
1887
a6ec74c1
JH
1888 while (*s) {
1889 switch (*s) {
1890 case ' ':
1891 break;
1892 case '+':
1893 if (!skip) {
1894 SvREFCNT_dec(result);
1895 return (NULL);
1896 }
1897 break;
1898 case '0':
1899 case '1':
1900 case '2':
1901 case '3':
1902 case '4':
1903 case '5':
1904 case '6':
1905 case '7':
1906 case '8':
1907 case '9':
1908 skip = 0;
1909 if (!ignore) {
1910 *(out++) = *s;
1911 }
1912 break;
1913 case '.':
1914 ignore = 1;
1915 break;
1916 default:
1917 SvREFCNT_dec(result);
1918 return (NULL);
1919 }
1920 s++;
1921 }
1922 *(out++) = '\0';
1923 SvCUR_set(result, out - result_c);
1924 return (result);
1925}
1926
1927/* pnum must be '\0' terminated */
1928STATIC int
1929S_div128(pTHX_ SV *pnum, bool *done)
1930{
8b6e33c7
AL
1931 STRLEN len;
1932 char * const s = SvPV(pnum, len);
1933 char *t = s;
1934 int m = 0;
1935
7918f24d
NC
1936 PERL_ARGS_ASSERT_DIV128;
1937
8b6e33c7
AL
1938 *done = 1;
1939 while (*t) {
1940 const int i = m * 10 + (*t - '0');
1941 const int r = (i >> 7); /* r < 10 */
1942 m = i & 0x7F;
1943 if (r) {
1944 *done = 0;
1945 }
1946 *(t++) = '0' + r;
a6ec74c1 1947 }
8b6e33c7
AL
1948 *(t++) = '\0';
1949 SvCUR_set(pnum, (STRLEN) (t - s));
1950 return (m);
a6ec74c1
JH
1951}
1952
18529408 1953/*
7accc089
JH
1954=for apidoc packlist
1955
796b6530 1956The engine implementing C<pack()> Perl function.
7accc089 1957
bfce84ec
AL
1958=cut
1959*/
7accc089
JH
1960
1961void
5aaab254 1962Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1963{
aadb217d
JH
1964 tempsym_t sym;
1965
7918f24d
NC
1966 PERL_ARGS_ASSERT_PACKLIST;
1967
f7fe979e 1968 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1969
f337b084
TH
1970 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1971 Also make sure any UTF8 flag is loaded */
56eb0262 1972 SvPV_force_nolen(cat);
bfce84ec
AL
1973 if (DO_UTF8(cat))
1974 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1975
49704364
LW
1976 (void)pack_rec( cat, &sym, beglist, endlist );
1977}
1978
f337b084
TH
1979/* like sv_utf8_upgrade, but also repoint the group start markers */
1980STATIC void
1981marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1982 STRLEN len;
1983 tempsym_t *group;
f7fe979e
AL
1984 const char *from_ptr, *from_start, *from_end, **marks, **m;
1985 char *to_start, *to_ptr;
f337b084
TH
1986
1987 if (SvUTF8(sv)) return;
1988
aa07b2f6 1989 from_start = SvPVX_const(sv);
f337b084
TH
1990 from_end = from_start + SvCUR(sv);
1991 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 1992 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
1993 if (from_ptr == from_end) {
1994 /* Simple case: no character needs to be changed */
1995 SvUTF8_on(sv);
1996 return;
1997 }
1998
3473cf63 1999 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2000 Newx(to_start, len, char);
f337b084
TH
2001 Copy(from_start, to_start, from_ptr-from_start, char);
2002 to_ptr = to_start + (from_ptr-from_start);
2003
a02a5408 2004 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2005 for (group=sym_ptr; group; group = group->previous)
2006 marks[group->level] = from_start + group->strbeg;
2007 marks[sym_ptr->level+1] = from_end+1;
2008 for (m = marks; *m < from_ptr; m++)
2009 *m = to_start + (*m-from_start);
2010
2011 for (;from_ptr < from_end; from_ptr++) {
2012 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2013 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2014 }
2015 *to_ptr = 0;
2016
2017 while (*m == from_ptr) *m++ = to_ptr;
2018 if (m != marks + sym_ptr->level+1) {
2019 Safefree(marks);
2020 Safefree(to_start);
5637ef5b
NC
2021 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2022 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2023 }
2024 for (group=sym_ptr; group; group = group->previous)
2025 group->strbeg = marks[group->level] - to_start;
2026 Safefree(marks);
2027
2028 if (SvOOK(sv)) {
2029 if (SvIVX(sv)) {
b162af07 2030 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2031 from_start -= SvIVX(sv);
2032 SvIV_set(sv, 0);
2033 }
2034 SvFLAGS(sv) &= ~SVf_OOK;
2035 }
2036 if (SvLEN(sv) != 0)
2037 Safefree(from_start);
f880fe2f 2038 SvPV_set(sv, to_start);
b162af07
SP
2039 SvCUR_set(sv, to_ptr - to_start);
2040 SvLEN_set(sv, len);
f337b084
TH
2041 SvUTF8_on(sv);
2042}
2043
2044/* Exponential string grower. Makes string extension effectively O(n)
2045 needed says how many extra bytes we need (not counting the final '\0')
2046 Only grows the string if there is an actual lack of space
2047*/
2048STATIC char *
0bd48802 2049S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2050 const STRLEN cur = SvCUR(sv);
2051 const STRLEN len = SvLEN(sv);
f337b084 2052 STRLEN extend;
7918f24d
NC
2053
2054 PERL_ARGS_ASSERT_SV_EXP_GROW;
2055
f337b084
TH
2056 if (len - cur > needed) return SvPVX(sv);
2057 extend = needed > len ? needed : len;
2058 return SvGROW(sv, len+extend+1);
2059}
49704364 2060
93f6e112 2061static SV *
b197e565 2062S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2063{
2064 SvGETMAGIC(sv);
93f6e112
FC
2065 if (UNLIKELY(SvAMAGIC(sv)))
2066 sv = sv_2num(sv);
99f450cc 2067 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2068 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2069 const NV nv = SvNV_nomg(sv);
2070 if (c == 'w')
147e3846 2071 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2072 else
147e3846 2073 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2074 }
93f6e112 2075 return sv;
354b74ae
FC
2076}
2077
93f6e112
FC
2078#define SvIV_no_inf(sv,d) \
2079 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2080#define SvUV_no_inf(sv,d) \
2081 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2082
49704364
LW
2083STATIC
2084SV **
f337b084 2085S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2086{
49704364 2087 tempsym_t lookahead;
e1b825c1 2088 SSize_t items = endlist - beglist;
f337b084
TH
2089 bool found = next_symbol(symptr);
2090 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2091 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2092 char* from;
f337b084 2093
7918f24d
NC
2094 PERL_ARGS_ASSERT_PACK_REC;
2095
f337b084
TH
2096 if (symptr->level == 0 && found && symptr->code == 'U') {
2097 marked_upgrade(aTHX_ cat, symptr);
2098 symptr->flags |= FLAG_DO_UTF8;
2099 utf8 = 0;
49704364 2100 }
f337b084 2101 symptr->strbeg = SvCUR(cat);
49704364
LW
2102
2103 while (found) {
f337b084
TH
2104 SV *fromstr;
2105 STRLEN fromlen;
e1b825c1 2106 SSize_t len;
a0714e2c 2107 SV *lengthcode = NULL;
49704364 2108 I32 datumtype = symptr->code;
f337b084
TH
2109 howlen_t howlen = symptr->howlen;
2110 char *start = SvPVX(cat);
2111 char *cur = start + SvCUR(cat);
a1219b5e 2112 bool needs_swap;
49704364 2113
fc1bb3f2 2114#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2115#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2116
2117 switch (howlen) {
fc241834 2118 case e_star:
f337b084
TH
2119 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2120 0 : items;
2121 break;
2122 default:
2123 /* e_no_len and e_number */
2124 len = symptr->length;
49704364
LW
2125 break;
2126 }
2127
f337b084 2128 if (len) {
a7a3cfaa 2129 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2130
a7a3cfaa
TH
2131 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2132 /* We can process this letter. */
2133 STRLEN size = props & PACK_SIZE_MASK;
2134 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2135 }
f337b084
TH
2136 }
2137
49704364
LW
2138 /* Look ahead for next symbol. Do we have code/code? */
2139 lookahead = *symptr;
2140 found = next_symbol(&lookahead);
246f24af
TH
2141 if (symptr->flags & FLAG_SLASH) {
2142 IV count;
f337b084 2143 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2144 if (strchr("aAZ", lookahead.code)) {
2145 if (lookahead.howlen == e_number) count = lookahead.length;
2146 else {
ce399ba6 2147 if (items > 0) {
48a5da33 2148 count = sv_len_utf8(*beglist);
ce399ba6 2149 }
246f24af
TH
2150 else count = 0;
2151 if (lookahead.code == 'Z') count++;
2152 }
2153 } else {
2154 if (lookahead.howlen == e_number && lookahead.length < items)
2155 count = lookahead.length;
2156 else count = items;
2157 }
2158 lookahead.howlen = e_number;
2159 lookahead.length = count;
2160 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2161 }
49704364 2162
a1219b5e
NC
2163 needs_swap = NEEDS_SWAP(datumtype);
2164
fc241834
RGS
2165 /* Code inside the switch must take care to properly update
2166 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2167 doesn't simply leave using break */
0c7df902 2168 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2169 default:
f337b084
TH
2170 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2171 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2172 case '%':
49704364 2173 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2174
28be1210 2175 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2176 case '.':
2177 if (howlen == e_star) from = start;
2178 else if (len == 0) from = cur;
2179 else {
2180 tempsym_t *group = symptr;
2181
2182 while (--len && group) group = group->previous;
2183 from = group ? start + group->strbeg : start;
2184 }
2185 fromstr = NEXTFROM;
354b74ae 2186 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2187 goto resize;
28be1210 2188 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2189 case '@':
28be1210
TH
2190 from = start + symptr->strbeg;
2191 resize:
28be1210 2192 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2193 if (len >= 0) {
2194 while (len && from < cur) {
2195 from += UTF8SKIP(from);
2196 len--;
2197 }
2198 if (from > cur)
2199 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2200 if (len) {
2201 /* Here we know from == cur */
2202 grow:
2203 GROWING(0, cat, start, cur, len);
2204 Zero(cur, len, char);
2205 cur += len;
2206 } else if (from < cur) {
2207 len = cur - from;
2208 goto shrink;
2209 } else goto no_change;
2210 } else {
2211 cur = from;
2212 len = -len;
2213 goto utf8_shrink;
f337b084 2214 }
28be1210
TH
2215 else {
2216 len -= cur - from;
f337b084 2217 if (len > 0) goto grow;
28be1210 2218 if (len == 0) goto no_change;
fc241834 2219 len = -len;
28be1210 2220 goto shrink;
f337b084 2221 }
a6ec74c1 2222 break;
81d52ecd 2223
fc241834 2224 case '(': {
49704364 2225 tempsym_t savsym = *symptr;
66c611c5
MHM
2226 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2227 symptr->flags |= group_modifiers;
49704364
LW
2228 symptr->patend = savsym.grpend;
2229 symptr->level++;
f337b084 2230 symptr->previous = &lookahead;
18529408 2231 while (len--) {
f337b084
TH
2232 U32 was_utf8;
2233 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2234 else symptr->flags &= ~FLAG_PARSE_UTF8;
2235 was_utf8 = SvUTF8(cat);
49704364 2236 symptr->patptr = savsym.grpbeg;
f337b084
TH
2237 beglist = pack_rec(cat, symptr, beglist, endlist);
2238 if (SvUTF8(cat) != was_utf8)
2239 /* This had better be an upgrade while in utf8==0 mode */
2240 utf8 = 1;
2241
49704364 2242 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2243 break; /* No way to continue */
2244 }
ee790063 2245 items = endlist - beglist;
f337b084
TH
2246 lookahead.flags = symptr->flags & ~group_modifiers;
2247 goto no_change;
18529408 2248 }
62f95557
IZ
2249 case 'X' | TYPE_IS_SHRIEKING:
2250 if (!len) /* Avoid division by 0 */
2251 len = 1;
f337b084
TH
2252 if (utf8) {
2253 char *hop, *last;
e1b825c1 2254 SSize_t l = len;
f337b084
TH
2255 hop = last = start;
2256 while (hop < cur) {
2257 hop += UTF8SKIP(hop);
2258 if (--l == 0) {
2259 last = hop;
2260 l = len;
2261 }
2262 }
2263 if (last > cur)
2264 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2265 cur = last;
2266 break;
2267 }
2268 len = (cur-start) % len;
924ba076 2269 /* FALLTHROUGH */
a6ec74c1 2270 case 'X':
f337b084
TH
2271 if (utf8) {
2272 if (len < 1) goto no_change;
28be1210 2273 utf8_shrink:
f337b084
TH
2274 while (len > 0) {
2275 if (cur <= start)
28be1210
TH
2276 Perl_croak(aTHX_ "'%c' outside of string in pack",
2277 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2278 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2279 if (cur <= start)
28be1210
TH
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2282 }
2283 len--;
2284 }
2285 } else {
fc241834 2286 shrink:
f337b084 2287 if (cur - start < len)
28be1210
TH
2288 Perl_croak(aTHX_ "'%c' outside of string in pack",
2289 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2290 cur -= len;
2291 }
2292 if (cur < start+symptr->strbeg) {
2293 /* Make sure group starts don't point into the void */
2294 tempsym_t *group;
9e27e96a 2295 const STRLEN length = cur-start;
f337b084
TH
2296 for (group = symptr;
2297 group && length < group->strbeg;
2298 group = group->previous) group->strbeg = length;
2299 lookahead.strbeg = length;
2300 }
a6ec74c1 2301 break;
fc241834 2302 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 2303 SSize_t ai32;
62f95557
IZ
2304 if (!len) /* Avoid division by 0 */
2305 len = 1;
230e1fce 2306 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2307 else ai32 = (cur - start) % len;
2308 if (ai32 == 0) goto no_change;
2309 len -= ai32;
2310 }
924ba076 2311 /* FALLTHROUGH */
a6ec74c1 2312 case 'x':
f337b084 2313 goto grow;
a6ec74c1
JH
2314 case 'A':
2315 case 'Z':
f337b084 2316 case 'a': {
f7fe979e 2317 const char *aptr;
f337b084 2318
a6ec74c1 2319 fromstr = NEXTFROM;
e62f0680 2320 aptr = SvPV_const(fromstr, fromlen);
f337b084 2321 if (DO_UTF8(fromstr)) {
f7fe979e 2322 const char *end, *s;
f337b084
TH
2323
2324 if (!utf8 && !SvUTF8(cat)) {
2325 marked_upgrade(aTHX_ cat, symptr);
2326 lookahead.flags |= FLAG_DO_UTF8;
2327 lookahead.strbeg = symptr->strbeg;
2328 utf8 = 1;
2329 start = SvPVX(cat);
2330 cur = start + SvCUR(cat);
2331 }
fc241834 2332 if (howlen == e_star) {
f337b084
TH
2333 if (utf8) goto string_copy;
2334 len = fromlen+1;
2335 }
2336 s = aptr;
2337 end = aptr + fromlen;
2338 fromlen = datumtype == 'Z' ? len-1 : len;
e1b825c1 2339 while ((SSize_t) fromlen > 0 && s < end) {
f337b084
TH
2340 s += UTF8SKIP(s);
2341 fromlen--;
2342 }
2343 if (s > end)
2344 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2345 if (utf8) {
fc241834 2346 len = fromlen;
f337b084
TH
2347 if (datumtype == 'Z') len++;
2348 fromlen = s-aptr;
2349 len += fromlen;
fc241834 2350
f337b084 2351 goto string_copy;
fc241834 2352 }
f337b084
TH
2353 fromlen = len - fromlen;
2354 if (datumtype == 'Z') fromlen--;
2355 if (howlen == e_star) {
2356 len = fromlen;
2357 if (datumtype == 'Z') len++;
fc241834 2358 }
f337b084 2359 GROWING(0, cat, start, cur, len);
9df874cd 2360 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2361 datumtype | TYPE_IS_PACK))
5637ef5b 2362 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
e1b825c1
Z
2363 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2364 (int)datumtype, aptr, end, cur, fromlen);
f337b084 2365 cur += fromlen;
a6ec74c1 2366 len -= fromlen;
f337b084
TH
2367 } else if (utf8) {
2368 if (howlen == e_star) {
2369 len = fromlen;
2370 if (datumtype == 'Z') len++;
a6ec74c1 2371 }
e1b825c1 2372 if (len <= (SSize_t) fromlen) {
f337b084
TH
2373 fromlen = len;
2374 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2375 }
fc241834 2376 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2377 upgrade, so:
2378 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2379 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2380 len -= fromlen;
2381 while (fromlen > 0) {
230e1fce 2382 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2383 aptr++;
2384 fromlen--;
fc241834 2385 }
f337b084
TH
2386 } else {
2387 string_copy:
2388 if (howlen == e_star) {
2389 len = fromlen;
2390 if (datumtype == 'Z') len++;
2391 }
e1b825c1 2392 if (len <= (SSize_t) fromlen) {
f337b084
TH
2393 fromlen = len;
2394 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2395 }
f337b084
TH
2396 GROWING(0, cat, start, cur, len);
2397 Copy(aptr, cur, fromlen, char);
2398 cur += fromlen;
2399 len -= fromlen;
a6ec74c1 2400 }
f337b084
TH
2401 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2402 cur += len;
3c4fb04a 2403 SvTAINT(cat);
a6ec74c1 2404 break;
f337b084 2405 }
a6ec74c1 2406 case 'B':
f337b084 2407 case 'b': {
b83604b4 2408 const char *str, *end;
e1b825c1 2409 SSize_t l, field_len;
f337b084
TH
2410 U8 bits;
2411 bool utf8_source;
2412 U32 utf8_flags;
a6ec74c1 2413
fc241834 2414 fromstr = NEXTFROM;
b83604b4 2415 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2416 end = str + fromlen;
2417 if (DO_UTF8(fromstr)) {
2418 utf8_source = TRUE;
041457d9 2419 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2420 } else {
2421 utf8_source = FALSE;
2422 utf8_flags = 0; /* Unused, but keep compilers happy */
2423 }
2424 if (howlen == e_star) len = fromlen;
2425 field_len = (len+7)/8;
2426 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2427 if (len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2428 bits = 0;
2429 l = 0;
2430 if (datumtype == 'B')
2431 while (l++ < len) {
2432 if (utf8_source) {
95b63a38 2433 UV val = 0;
f337b084
TH
2434 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2435 bits |= val & 1;
2436 } else bits |= *str++ & 1;
2437 if (l & 7) bits <<= 1;
fc241834 2438 else {
f337b084
TH
2439 PUSH_BYTE(utf8, cur, bits);
2440 bits = 0;
a6ec74c1
JH
2441 }
2442 }
f337b084
TH
2443 else
2444 /* datumtype == 'b' */
2445 while (l++ < len) {
2446 if (utf8_source) {
95b63a38 2447 UV val = 0;
f337b084
TH
2448 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2449 if (val & 1) bits |= 0x80;
2450 } else if (*str++ & 1)
2451 bits |= 0x80;
2452 if (l & 7) bits >>= 1;
fc241834 2453 else {
f337b084
TH
2454 PUSH_BYTE(utf8, cur, bits);
2455 bits = 0;
a6ec74c1
JH
2456 }
2457 }
f337b084
TH
2458 l--;
2459 if (l & 7) {
fc241834 2460 if (datumtype == 'B')
f337b084 2461 bits <<= 7 - (l & 7);
fc241834 2462 else
f337b084
TH
2463 bits >>= 7 - (l & 7);
2464 PUSH_BYTE(utf8, cur, bits);
2465 l += 7;
a6ec74c1 2466 }
f337b084
TH
2467 /* Determine how many chars are left in the requested field */
2468 l /= 8;
2469 if (howlen == e_star) field_len = 0;
2470 else field_len -= l;
2471 Zero(cur, field_len, char);
2472 cur += field_len;
a6ec74c1 2473 break;
f337b084 2474 }
a6ec74c1 2475 case 'H':
f337b084 2476 case 'h': {
10516c54 2477 const char *str, *end;
e1b825c1 2478 SSize_t l, field_len;
f337b084
TH
2479 U8 bits;
2480 bool utf8_source;
2481 U32 utf8_flags;
a6ec74c1 2482
fc241834 2483 fromstr = NEXTFROM;
10516c54 2484 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2485 end = str + fromlen;
2486 if (DO_UTF8(fromstr)) {
2487 utf8_source = TRUE;
041457d9 2488 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2489 } else {
2490 utf8_source = FALSE;
2491 utf8_flags = 0; /* Unused, but keep compilers happy */
2492 }
2493 if (howlen == e_star) len = fromlen;
2494 field_len = (len+1)/2;
2495 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2496 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2497 bits = 0;
2498 l = 0;
2499 if (datumtype == 'H')
2500 while (l++ < len) {
2501 if (utf8_source) {
95b63a38 2502 UV val = 0;
f337b084
TH
2503 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2504 if (val < 256 && isALPHA(val))
2505 bits |= (val + 9) & 0xf;
a6ec74c1 2506 else
f337b084
TH
2507 bits |= val & 0xf;
2508 } else if (isALPHA(*str))
2509 bits |= (*str++ + 9) & 0xf;
2510 else
2511 bits |= *str++ & 0xf;
2512 if (l & 1) bits <<= 4;
fc241834 2513 else {
f337b084
TH
2514 PUSH_BYTE(utf8, cur, bits);
2515 bits = 0;
a6ec74c1
JH
2516 }
2517 }
f337b084
TH
2518 else
2519 while (l++ < len) {
2520 if (utf8_source) {
95b63a38 2521 UV val = 0;
f337b084
TH
2522 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2523 if (val < 256 && isALPHA(val))
2524 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2525 else
f337b084
TH
2526 bits |= (val & 0xf) << 4;
2527 } else if (isALPHA(*str))
2528 bits |= ((*str++ + 9) & 0xf) << 4;
2529 else
2530 bits |= (*str++ & 0xf) << 4;
2531 if (l & 1) bits >>= 4;
fc241834 2532 else {
f337b084
TH
2533 PUSH_BYTE(utf8, cur, bits);
2534 bits = 0;
a6ec74c1 2535 }
fc241834 2536 }
f337b084
TH
2537 l--;
2538 if (l & 1) {
2539 PUSH_BYTE(utf8, cur, bits);
2540 l++;
2541 }
2542 /* Determine how many chars are left in the requested field */
2543 l /= 2;
2544 if (howlen == e_star) field_len = 0;
2545 else field_len -= l;
2546 Zero(cur, field_len, char);
2547 cur += field_len;
2548 break;
fc241834
RGS
2549 }
2550 case 'c':
f337b084
TH
2551 while (len-- > 0) {
2552 IV aiv;
2553 fromstr = NEXTFROM;
354b74ae 2554 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2555 if ((-128 > aiv || aiv > 127))
2556 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2557 "Character in 'c' format wrapped in pack");
585ec06d 2558 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2559 }
2560 break;
2561 case 'C':
f337b084
TH
2562 if (len == 0) {
2563 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2564 break;
2565 }
a6ec74c1 2566 while (len-- > 0) {
f337b084 2567 IV aiv;
a6ec74c1 2568 fromstr = NEXTFROM;
354b74ae 2569 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2570 if ((0 > aiv || aiv > 0xff))
2571 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2572 "Character in 'C' format wrapped in pack");
1651fc44 2573 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2574 }
fc241834
RGS
2575 break;
2576 case 'W': {
2577 char *end;
670f1322 2578 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2579
2580 end = start+SvLEN(cat)-1;
2581 if (utf8) end -= UTF8_MAXLEN-1;
2582 while (len-- > 0) {
2583 UV auv;
2584 fromstr = NEXTFROM;
354b74ae 2585 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2586 if (in_bytes) auv = auv % 0x100;
2587 if (utf8) {
2588 W_utf8:
bf4a926a 2589 if (cur >= end) {
fc241834 2590 *cur = '\0';
b162af07 2591 SvCUR_set(cat, cur - start);
fc241834
RGS
2592
2593 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2594 end = start+SvLEN(cat)-UTF8_MAXLEN;
2595 }
05fefba9 2596 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2597 } else {
2598 if (auv >= 0x100) {
2599 if (!SvUTF8(cat)) {
2600 *cur = '\0';
b162af07 2601 SvCUR_set(cat, cur - start);
fc241834
RGS
2602 marked_upgrade(aTHX_ cat, symptr);
2603 lookahead.flags |= FLAG_DO_UTF8;
2604 lookahead.strbeg = symptr->strbeg;
2605 utf8 = 1;
2606 start = SvPVX(cat);
2607 cur = start + SvCUR(cat);
2608 end = start+SvLEN(cat)-UTF8_MAXLEN;
2609 goto W_utf8;
2610 }
a2a5de95
NC
2611 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2612 "Character in 'W' format wrapped in pack");
fc241834
RGS
2613 auv &= 0xff;
2614 }
2615 if (cur >= end) {
2616 *cur = '\0';
b162af07 2617 SvCUR_set(cat, cur - start);
fc241834
RGS
2618 GROWING(0, cat, start, cur, len+1);
2619 end = start+SvLEN(cat)-1;
2620 }
fe2774ed 2621 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2622 }
2623 }
2624 break;
fc241834
RGS
2625 }
2626 case 'U': {
2627 char *end;
2628
2629 if (len == 0) {
2630 if (!(symptr->flags & FLAG_DO_UTF8)) {
2631 marked_upgrade(aTHX_ cat, symptr);
2632 lookahead.flags |= FLAG_DO_UTF8;
2633 lookahead.strbeg = symptr->strbeg;
2634 }
2635 utf8 = 0;
2636 goto no_change;
2637 }
2638
2639 end = start+SvLEN(cat);
2640 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2641 while (len-- > 0) {
fc241834 2642 UV auv;
a6ec74c1 2643 fromstr = NEXTFROM;
354b74ae 2644 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2645 if (utf8) {
81b3f640 2646 U8 buffer[UTF8_MAXLEN+1], *endb;
05fefba9 2647 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2648 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2649 *cur = '\0';
b162af07 2650 SvCUR_set(cat, cur - start);
fc241834
RGS
2651 GROWING(0, cat, start, cur,
2652 len+(endb-buffer)*UTF8_EXPAND);
2653 end = start+SvLEN(cat);
2654 }
d21d7215 2655 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2656 } else {
2657 if (cur >= end) {
2658 *cur = '\0';
b162af07 2659 SvCUR_set(cat, cur - start);
fc241834
RGS
2660 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2661 end = start+SvLEN(cat)-UTF8_MAXLEN;
2662 }
05fefba9
KW
2663 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2664 UNI_TO_NATIVE(auv),
2665 0);
fc241834 2666 }
a6ec74c1 2667 }
a6ec74c1 2668 break;
fc241834 2669 }
a6ec74c1
JH
2670 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2671 case 'f':
a6ec74c1 2672 while (len-- > 0) {
f337b084
TH
2673 float afloat;
2674 NV anv;
a6ec74c1 2675 fromstr = NEXTFROM;
f337b084 2676 anv = SvNV(fromstr);
a7157111 2677# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2678 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2679 * on Alpha; fake it if we don't have them.
2680 */
f337b084 2681 if (anv > FLT_MAX)
fc241834 2682 afloat = FLT_MAX;
f337b084 2683 else if (anv < -FLT_MAX)
fc241834 2684 afloat = -FLT_MAX;
f337b084 2685 else afloat = (float)anv;
baf3cf9c 2686# else
a7157111 2687# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2688 if(Perl_isnan(anv))
2689 afloat = (float)NV_NAN;
2690 else
a7157111
JH
2691# endif
2692# ifdef NV_INF
919894b7
DM
2693 /* a simple cast to float is undefined if outside
2694 * the range of values that can be represented */
2695 afloat = (float)(anv > FLT_MAX ? NV_INF :
2696 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2697# endif
baf3cf9c 2698# endif
3a88beaa 2699 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2700 }
2701 break;
2702 case 'd':
a6ec74c1 2703 while (len-- > 0) {
f337b084
TH
2704 double adouble;
2705 NV anv;
a6ec74c1 2706 fromstr = NEXTFROM;
f337b084 2707 anv = SvNV(fromstr);
a7157111 2708# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2709 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2710 * on Alpha; fake it if we don't have them.
2711 */
f337b084 2712 if (anv > DBL_MAX)
fc241834 2713 adouble = DBL_MAX;
f337b084 2714 else if (anv < -DBL_MAX)
fc241834 2715 adouble = -DBL_MAX;
f337b084 2716 else adouble = (double)anv;
baf3cf9c 2717# else
f337b084 2718 adouble = (double)anv;
baf3cf9c 2719# endif
3a88beaa 2720 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2721 }
2722 break;
fc241834 2723 case 'F': {
275663fa 2724 NV_bytes anv;
1109a392 2725 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2726 while (len-- > 0) {
2727 fromstr = NEXTFROM;
cd07c537
DM
2728#ifdef __GNUC__
2729 /* to work round a gcc/x86 bug; don't use SvNV */
2730 anv.nv = sv_2nv(fromstr);
070e2677
TC
2731# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2732 && LONG_DOUBLESIZE > 10
2733 /* GCC sometimes overwrites the padding in the
2734 assignment above */
2735 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2736# endif
cd07c537 2737#else
275663fa 2738 anv.nv = SvNV(fromstr);
cd07c537 2739#endif
3a88beaa 2740 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2741 }
2742 break;
fc241834 2743 }
92d41999 2744#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2745 case 'D': {
275663fa 2746 ld_bytes aldouble;
1109a392
MHM
2747 /* long doubles can have unused bits, which may be nonzero */
2748 Zero(&aldouble, 1, long double);
92d41999
JH
2749 while (len-- > 0) {
2750 fromstr = NEXTFROM;
cd07c537
DM
2751# ifdef __GNUC__
2752 /* to work round a gcc/x86 bug; don't use SvNV */
2753 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2754# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2755 /* GCC sometimes overwrites the padding in the
2756 assignment above */
2757 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2758# endif
cd07c537 2759# else
275663fa 2760 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2761# endif
3a88beaa
NC
2762 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2763 needs_swap);
92d41999
JH
2764 }
2765 break;
fc241834 2766 }
92d41999 2767#endif
068bd2e7 2768 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2769 case 'n':
2770 while (len-- > 0) {
f337b084 2771 I16 ai16;
a6ec74c1 2772 fromstr = NEXTFROM;
354b74ae 2773 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2774 ai16 = PerlSock_htons(ai16);
3a88beaa 2775 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2776 }
2777 break;
068bd2e7 2778 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2779 case 'v':
2780 while (len-- > 0) {
f337b084 2781 I16 ai16;
a6ec74c1 2782 fromstr = NEXTFROM;
354b74ae 2783 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2784 ai16 = htovs(ai16);
3a88beaa 2785 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2786 }
2787 break;
49704364 2788 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2789#if SHORTSIZE != SIZE16
fc241834 2790 while (len-- > 0) {
f337b084 2791 unsigned short aushort;
fc241834 2792 fromstr = NEXTFROM;
354b74ae 2793 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2794 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2795 }
49704364
LW
2796 break;
2797#else
924ba076 2798 /* FALLTHROUGH */
a6ec74c1 2799#endif
49704364 2800 case 'S':
fc241834 2801 while (len-- > 0) {
f337b084 2802 U16 au16;
fc241834 2803 fromstr = NEXTFROM;
354b74ae 2804 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2805 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2806 }
2807 break;
49704364 2808 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2809#if SHORTSIZE != SIZE16
fc241834 2810 while (len-- > 0) {
f337b084 2811 short ashort;
fc241834 2812 fromstr = NEXTFROM;
354b74ae 2813 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2814 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2815 }
49704364
LW
2816 break;
2817#else
924ba076 2818 /* FALLTHROUGH */
a6ec74c1 2819#endif
49704364
LW
2820 case 's':
2821 while (len-- > 0) {
f337b084 2822 I16 ai16;
49704364 2823 fromstr = NEXTFROM;
354b74ae 2824 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2825 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2826 }
2827 break;
2828 case 'I':
49704364 2829 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2830 while (len-- > 0) {
f337b084 2831 unsigned int auint;
a6ec74c1 2832 fromstr = NEXTFROM;
354b74ae 2833 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2834 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2835 }
2836 break;
92d41999
JH
2837 case 'j':
2838 while (len-- > 0) {
f337b084 2839 IV aiv;
92d41999 2840 fromstr = NEXTFROM;
354b74ae 2841 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2842 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2843 }
2844 break;
2845 case 'J':
2846 while (len-- > 0) {
f337b084 2847 UV auv;
92d41999 2848 fromstr = NEXTFROM;
354b74ae 2849 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2850 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2851 }
2852 break;
a6ec74c1
JH
2853 case 'w':
2854 while (len-- > 0) {
f337b084 2855 NV anv;
a6ec74c1 2856 fromstr = NEXTFROM;
83388ae2 2857 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2858 anv = SvNV_nomg(fromstr);
a6ec74c1 2859
f337b084
TH
2860 if (anv < 0) {
2861 *cur = '\0';
b162af07 2862 SvCUR_set(cat, cur - start);
49704364 2863 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2864 }
a6ec74c1 2865
196b62db
NC
2866 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2867 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2868 any negative IVs will have already been got by the croak()
2869 above. IOK is untrue for fractions, so we test them
2870 against UV_MAX_P1. */
f337b084
TH
2871 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2872 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2873 char *in = buf + sizeof(buf);
1e9a122e 2874 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2875
2876 do {
eb160463 2877 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2878 auv >>= 7;
2879 } while (auv);
2880 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2881 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2882 in, (buf + sizeof(buf)) - in);
2883 } else if (SvPOKp(fromstr))
2884 goto w_string;
a6ec74c1 2885 else if (SvNOKp(fromstr)) {
0258719b 2886 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2887 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2888 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2889 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2890 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2891 Some C compilers are strict about integral constant
2892 expressions so we conservatively divide by a slightly
2893 smaller integer instead of multiplying by the exact
2894 floating-point value.
0258719b
NC
2895 */
2896#ifdef NV_MAX_10_EXP
f337b084 2897 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2898 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2899#else
f337b084 2900 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2901 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2902#endif
a6ec74c1
JH
2903 char *in = buf + sizeof(buf);
2904
8b6e33c7 2905 anv = Perl_floor(anv);
a6ec74c1 2906 do {
8b6e33c7 2907 const NV next = Perl_floor(anv / 128);
a6ec74c1 2908 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2909 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2910 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2911 anv = next;
2912 } while (anv > 0);
a6ec74c1 2913 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2914 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2915 in, (buf + sizeof(buf)) - in);
2916 } else {
8b6e33c7
AL
2917 const char *from;
2918 char *result, *in;
735b914b
JH
2919 SV *norm;
2920 STRLEN len;
2921 bool done;
2922
f337b084 2923 w_string:
735b914b 2924 /* Copy string and check for compliance */
1e9a122e 2925 from = SvPV_nomg_const(fromstr, len);
735b914b 2926 if ((norm = is_an_int(from, len)) == NULL)
49704364 2927 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2928
a02a5408 2929 Newx(result, len, char);
735b914b
JH
2930 in = result + len;
2931 done = FALSE;
f337b084 2932 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2933 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2934 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 in, (result + len) - in);
735b914b
JH
2936 Safefree(result);
2937 SvREFCNT_dec(norm); /* free norm */
fc241834 2938 }
a6ec74c1
JH
2939 }
2940 break;
2941 case 'i':
49704364 2942 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2943 while (len-- > 0) {
f337b084 2944 int aint;
a6ec74c1 2945 fromstr = NEXTFROM;
354b74ae 2946 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2947 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2948 }
2949 break;
068bd2e7 2950 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2951 case 'N':
2952 while (len-- > 0) {
f337b084 2953 U32 au32;
a6ec74c1 2954 fromstr = NEXTFROM;
354b74ae 2955 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2956 au32 = PerlSock_htonl(au32);
3a88beaa 2957 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2958 }
2959 break;
068bd2e7 2960 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2961 case 'V':
2962 while (len-- > 0) {
f337b084 2963 U32 au32;
a6ec74c1 2964 fromstr = NEXTFROM;
354b74ae 2965 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2966 au32 = htovl(au32);
3a88beaa 2967 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2968 }
2969 break;
49704364 2970 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2971#if LONGSIZE != SIZE32
fc241834 2972 while (len-- > 0) {
f337b084 2973 unsigned long aulong;
fc241834 2974 fromstr = NEXTFROM;
354b74ae 2975 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2976 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2977 }
49704364
LW
2978 break;
2979#else
2980 /* Fall though! */
a6ec74c1 2981#endif
49704364 2982 case 'L':
fc241834 2983 while (len-- > 0) {
f337b084 2984 U32 au32;
fc241834 2985 fromstr = NEXTFROM;
354b74ae 2986 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2987 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2988 }
2989 break;
49704364 2990 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2991#if LONGSIZE != SIZE32
fc241834 2992 while (len-- > 0) {
f337b084 2993 long along;
fc241834 2994 fromstr = NEXTFROM;
354b74ae 2995 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2996 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2997 }
49704364
LW
2998 break;
2999#else
3000 /* Fall though! */
a6ec74c1 3001#endif
49704364
LW
3002 case 'l':
3003 while (len-- > 0) {
f337b084 3004 I32 ai32;
49704364 3005 fromstr = NEXTFROM;
354b74ae 3006 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3007 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3008 }
3009 break;
c174bf3b 3010#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3011 case 'Q':
3012 while (len-- > 0) {
f337b084 3013 Uquad_t auquad;
a6ec74c1 3014 fromstr = NEXTFROM;
354b74ae 3015 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3016 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3017 }
3018 break;
3019 case 'q':
3020 while (len-- > 0) {
f337b084 3021 Quad_t aquad;
a6ec74c1 3022 fromstr = NEXTFROM;
354b74ae 3023 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3024 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3025 }
3026 break;
1640b983 3027#endif
a6ec74c1
JH
3028 case 'P':
3029 len = 1; /* assume SV is correct length */
f337b084 3030 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3031 /* FALLTHROUGH */
a6ec74c1
JH
3032 case 'p':
3033 while (len-- > 0) {
83003860 3034 const char *aptr;
f337b084 3035
a6ec74c1 3036 fromstr = NEXTFROM;
28a4f200 3037 SvGETMAGIC(fromstr);
3038 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3039 else {
a6ec74c1
JH
3040 /* XXX better yet, could spirit away the string to
3041 * a safe spot and hang on to it until the result
3042 * of pack() (and all copies of the result) are
3043 * gone.
3044 */
b199a398
FC
3045 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3046 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3047 !SvREADONLY(fromstr)))) {
3048 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3049 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3050 }
3051 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3052 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3053 else
2596d9fe 3054 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3055 }
3a88beaa 3056 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3057 }
3058 break;
fc241834 3059 case 'u': {
f7fe979e 3060 const char *aptr, *aend;
fc241834 3061 bool from_utf8;
f337b084 3062
a6ec74c1 3063 fromstr = NEXTFROM;
fc241834
RGS
3064 if (len <= 2) len = 45;
3065 else len = len / 3 * 3;
3066 if (len >= 64) {
a2a5de95
NC
3067 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3068 "Field too wide in 'u' format in pack");
fc241834
RGS
3069 len = 63;
3070 }
83003860 3071 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3072 from_utf8 = DO_UTF8(fromstr);
3073 if (from_utf8) {
3074 aend = aptr + fromlen;
3f63b0e5 3075 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3076 } else aend = NULL; /* Unused, but keep compilers happy */
3077 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3078 while (fromlen > 0) {
fc241834 3079 U8 *end;
e1b825c1 3080 SSize_t todo;
fc241834 3081 U8 hunk[1+63/3*4+1];
a6ec74c1 3082
e1b825c1 3083 if ((SSize_t)fromlen > len)
a6ec74c1
JH
3084 todo = len;
3085 else
3086 todo = fromlen;
fc241834
RGS
3087 if (from_utf8) {
3088 char buffer[64];
9df874cd 3089 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3090 'u' | TYPE_IS_PACK)) {
3091 *cur = '\0';
b162af07 3092 SvCUR_set(cat, cur - start);
5637ef5b 3093 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
e1b825c1
Z
3094 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3095 aptr, aend, buffer, todo);
fc241834 3096 }
e68aed92 3097 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3098 } else {
e68aed92 3099 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3100 aptr += todo;
3101 }
3a88beaa 3102 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3103 fromlen -= todo;
3104 }
a6ec74c1
JH
3105 break;
3106 }
f337b084
TH
3107 }
3108 *cur = '\0';
b162af07 3109 SvCUR_set(cat, cur - start);
f337b084 3110 no_change:
49704364 3111 *symptr = lookahead;
a6ec74c1 3112 }
49704364 3113 return beglist;
18529408
IZ
3114}
3115#undef NEXTFROM
3116
3117
3118PP(pp_pack)
3119{
20b7effb 3120 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3121 SV *cat = TARG;
18529408 3122 STRLEN fromlen;
349d4f2f 3123 SV *pat_sv = *++MARK;
eb578fdb
KW
3124 const char *pat = SvPV_const(pat_sv, fromlen);
3125 const char *patend = pat + fromlen;
18529408
IZ
3126
3127 MARK++;
e8f7c79f 3128 SvPVCLEAR(cat);
f337b084 3129 SvUTF8_off(cat);
18529408 3130
7accc089 3131 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3132
a6ec74c1
JH
3133 SvSETMAGIC(cat);
3134 SP = ORIGMARK;
3135 PUSHs(cat);
3136 RETURN;
3137}
a6ec74c1 3138
73cb7263 3139/*
14d04a33 3140 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3141 */