This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
study_chunk: honour mutate_ok over recursion
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
0860e090
KW
215#define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
a6ec74c1 218
66c611c5 219/* type modifiers */
62f95557 220#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
221#define TYPE_IS_BIG_ENDIAN 0x200
222#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 223#define TYPE_IS_PACK 0x800
1109a392 224#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 225#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
226#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
227
7212898e
NC
228# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
230
231# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
232
78d46eaa 233#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 234#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
235#define PACK_SIZE_MASK 0x3F
236
4ad176ef 237#include "packsizetables.inc"
78d46eaa 238
20aa3a38
NC
239static void
240S_reverse_copy(const char *src, char *dest, STRLEN len)
241{
242 dest += len;
243 while (len--)
244 *--dest = *src++;
245}
246
08ca2aa3 247STATIC U8
9df874cd 248utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 249{
08ca2aa3 250 STRLEN retlen;
df337714
KW
251 UV val;
252
253 if (*s >= end) {
254 goto croak;
255 }
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
258 if (retlen == (STRLEN) -1)
259 croak:
f337b084
TH
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
a2a5de95
NC
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
266 val &= 0xff;
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
274 *(U8 *)(s)++)
275
276STATIC bool
e1b825c1 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3
TH
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
f337b084 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 293 if (retlen == (STRLEN) -1) {
18c47def 294 from += UTF8_SAFE_SKIP(from, end);
08ca2aa3
TH
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
f7fe979e 310 const char *ptr;
e1b825c1 311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
c80e42f3 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
315 }
316 if (from > end) from = end;
317 }
a2a5de95
NC
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 320 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
d21d7215 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
230e1fce
NC
357 (cur) += (len); \
358 } \
f337b084
TH
359} STMT_END
360
f17fed50
TC
361#define SAFE_UTF8_EXPAND(var) \
362STMT_START { \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
366} STMT_END
367
368#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
369STMT_START { \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373} STMT_END
374
f337b084
TH
375#define GROWING(utf8, cat, start, cur, in_len) \
376STMT_START { \
377 STRLEN glen = (in_len); \
f17fed50
TC
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
0bd48802 383 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
384 (cur) = (start) + SvCUR(cat); \
385 } \
386} STMT_END
387
388#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389STMT_START { \
f7fe979e 390 const STRLEN glen = (in_len); \
f337b084 391 STRLEN gl = glen; \
f17fed50 392 if (utf8) SAFE_UTF8_EXPAND(gl); \
f337b084
TH
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
394 *cur = '\0'; \
b162af07 395 SvCUR_set((cat), (cur) - (start)); \
0bd48802 396 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
397 (cur) = (start) + SvCUR(cat); \
398 } \
3a88beaa 399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
400} STMT_END
401
402#define PUSH_BYTE(utf8, s, byte) \
403STMT_START { \
404 if (utf8) { \
f7fe979e 405 const U8 au8 = (byte); \
d21d7215 406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
407 } else *(U8 *)(s)++ = (byte); \
408} STMT_END
409
410/* Only to be used inside a loop (see the break) */
411#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
412STMT_START { \
413 STRLEN retlen; \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 416 if (retlen == (STRLEN) -1) { \
f337b084
TH
417 *cur = '\0'; \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
419 } \
420 str += retlen; \
421} STMT_END
422
f7fe979e
AL
423static const char *_action( const tempsym_t* symptr )
424{
10edeb5d 425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
426}
427
206947d2 428/* Returns the sizeof() struct described by pat */
e1b825c1 429STATIC SSize_t
f337b084 430S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 431{
e1b825c1 432 SSize_t total = 0;
206947d2 433
7918f24d
NC
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
435
49704364 436 while (next_symbol(symptr)) {
e1b825c1 437 SSize_t len, size;
f337b084
TH
438
439 switch (symptr->howlen) {
fc241834 440 case e_star:
49704364 441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 442 _action( symptr ) );
81d52ecd 443
f337b084
TH
444 default:
445 /* e_no_len and e_number */
446 len = symptr->length;
447 break;
49704364
WL
448 }
449
a7a3cfaa 450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 451 if (!size) {
e1b825c1 452 SSize_t star;
80a13697
NC
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455 default:
456 Perl_croak(aTHX_ "Invalid type '%c' in %s",
457 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 458 _action( symptr ) );
28be1210
TH
459 case '.' | TYPE_IS_SHRIEKING:
460 case '@' | TYPE_IS_SHRIEKING:
80a13697 461 case '@':
28be1210 462 case '.':
80a13697
NC
463 case '/':
464 case 'U': /* XXXX Is it correct? */
465 case 'w':
466 case 'u':
467 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 468 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 469 _action( symptr ) );
80a13697
NC
470 case '%':
471 size = 0;
472 break;
473 case '(':
fc241834
RGS
474 {
475 tempsym_t savsym = *symptr;
476 symptr->patptr = savsym.grpbeg;
477 symptr->patend = savsym.grpend;
478 /* XXXX Theoretically, we need to measure many times at
479 different positions, since the subexpression may contain
480 alignment commands, but be not of aligned length.
481 Need to detect this and croak(). */
482 size = measure_struct(symptr);
483 *symptr = savsym;
484 break;
485 }
80a13697
NC
486 case 'X' | TYPE_IS_SHRIEKING:
487 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
488 */
489 if (!len) /* Avoid division by 0 */
490 len = 1;
491 len = total % len; /* Assumed: the start is aligned. */
924ba076 492 /* FALLTHROUGH */
80a13697
NC
493 case 'X':
494 size = -1;
495 if (total < len)
f7fe979e 496 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
497 break;
498 case 'x' | TYPE_IS_SHRIEKING:
499 if (!len) /* Avoid division by 0 */
500 len = 1;
501 star = total % len; /* Assumed: the start is aligned. */
502 if (star) /* Other portable ways? */
503 len = len - star;
504 else
505 len = 0;
924ba076 506 /* FALLTHROUGH */
80a13697
NC
507 case 'x':
508 case 'A':
509 case 'Z':
510 case 'a':
80a13697
NC
511 size = 1;
512 break;
513 case 'B':
514 case 'b':
515 len = (len + 7)/8;
516 size = 1;
517 break;
518 case 'H':
519 case 'h':
520 len = (len + 1)/2;
521 size = 1;
522 break;
78d46eaa 523
80a13697
NC
524 case 'P':
525 len = 1;
526 size = sizeof(char*);
78d46eaa
NC
527 break;
528 }
206947d2
IZ
529 }
530 total += len * size;
531 }
532 return total;
533}
534
49704364
WL
535
536/* locate matching closing parenthesis or bracket
537 * returns char pointer to char after match, or NULL
538 */
f7fe979e 539STATIC const char *
5aaab254 540S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 541{
7918f24d
NC
542 PERL_ARGS_ASSERT_GROUP_END;
543
49704364 544 while (patptr < patend) {
f7fe979e 545 const char c = *patptr++;
49704364
WL
546
547 if (isSPACE(c))
548 continue;
549 else if (c == ender)
550 return patptr-1;
551 else if (c == '#') {
552 while (patptr < patend && *patptr != '\n')
553 patptr++;
554 continue;
555 } else if (c == '(')
556 patptr = group_end(patptr, patend, ')') + 1;
557 else if (c == '[')
558 patptr = group_end(patptr, patend, ']') + 1;
18529408 559 }
49704364
WL
560 Perl_croak(aTHX_ "No group ending character '%c' found in template",
561 ender);
a25b5927 562 NOT_REACHED; /* NOTREACHED */
18529408
IZ
563}
564
49704364
WL
565
566/* Convert unsigned decimal number to binary.
567 * Expects a pointer to the first digit and address of length variable
568 * Advances char pointer to 1st non-digit char and returns number
fc241834 569 */
f7fe979e 570STATIC const char *
e1b825c1 571S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 572{
e1b825c1 573 SSize_t len = *patptr++ - '0';
7918f24d
NC
574
575 PERL_ARGS_ASSERT_GET_NUM;
576
49704364 577 while (isDIGIT(*patptr)) {
e1b825c1
Z
578 SSize_t nlen = (len * 10) + (*patptr++ - '0');
579 if (nlen < 0 || nlen/10 != len)
49704364 580 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 581 len = nlen;
49704364
WL
582 }
583 *lenptr = len;
584 return patptr;
585}
586
587/* The marvellous template parsing routine: Using state stored in *symptr,
588 * locates next template code and count
589 */
590STATIC bool
f337b084 591S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 592{
f7fe979e 593 const char* patptr = symptr->patptr;
0bcc34c2 594 const char* const patend = symptr->patend;
49704364 595
7918f24d
NC
596 PERL_ARGS_ASSERT_NEXT_SYMBOL;
597
49704364
WL
598 symptr->flags &= ~FLAG_SLASH;
599
600 while (patptr < patend) {
601 if (isSPACE(*patptr))
602 patptr++;
603 else if (*patptr == '#') {
604 patptr++;
605 while (patptr < patend && *patptr != '\n')
606 patptr++;
607 if (patptr < patend)
608 patptr++;
609 } else {
fc241834 610 /* We should have found a template code */
49704364 611 I32 code = *patptr++ & 0xFF;
66c611c5 612 U32 inherited_modifiers = 0;
49704364
WL
613
614 if (code == ','){ /* grandfather in commas but with a warning */
615 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
616 symptr->flags |= FLAG_COMMA;
617 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 618 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
619 }
620 continue;
621 }
fc241834 622
49704364 623 /* for '(', skip to ')' */
fc241834 624 if (code == '(') {
49704364
WL
625 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
626 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 627 _action( symptr ) );
49704364
WL
628 symptr->grpbeg = patptr;
629 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
630 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
631 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 632 _action( symptr ) );
49704364
WL
633 }
634
66c611c5
MHM
635 /* look for group modifiers to inherit */
636 if (TYPE_ENDIANNESS(symptr->flags)) {
637 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
638 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
639 }
640
1109a392
MHM
641 /* look for modifiers */
642 while (patptr < patend) {
b7787f18
AL
643 const char *allowed;
644 I32 modifier;
1109a392
MHM
645 switch (*patptr) {
646 case '!':
647 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 648 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
649 break;
650 case '>':
651 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 652 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
653 break;
654 case '<':
655 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 656 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
657 break;
658 default:
b7787f18
AL
659 allowed = "";
660 modifier = 0;
1109a392
MHM
661 break;
662 }
66c611c5 663
1109a392
MHM
664 if (modifier == 0)
665 break;
66c611c5 666
1109a392
MHM
667 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
668 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 669 allowed, _action( symptr ) );
66c611c5
MHM
670
671 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 672 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 673 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
674 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
675 TYPE_ENDIANNESS_MASK)
676 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 677 *patptr, _action( symptr ) );
66c611c5 678
a2a5de95
NC
679 if ((code & modifier)) {
680 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
681 "Duplicate modifier '%c' after '%c' in %s",
682 *patptr, (int) TYPE_NO_MODIFIERS(code),
683 _action( symptr ) );
1109a392 684 }
66c611c5 685
1109a392
MHM
686 code |= modifier;
687 patptr++;
49704364
WL
688 }
689
66c611c5
MHM
690 /* inherit modifiers */
691 code |= inherited_modifiers;
692
fc241834 693 /* look for count and/or / */
49704364
WL
694 if (patptr < patend) {
695 if (isDIGIT(*patptr)) {
696 patptr = get_num( patptr, &symptr->length );
697 symptr->howlen = e_number;
698
699 } else if (*patptr == '*') {
700 patptr++;
701 symptr->howlen = e_star;
702
703 } else if (*patptr == '[') {
f7fe979e 704 const char* lenptr = ++patptr;
49704364
WL
705 symptr->howlen = e_number;
706 patptr = group_end( patptr, patend, ']' ) + 1;
707 /* what kind of [] is it? */
708 if (isDIGIT(*lenptr)) {
709 lenptr = get_num( lenptr, &symptr->length );
710 if( *lenptr != ']' )
711 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 712 _action( symptr ) );
49704364
WL
713 } else {
714 tempsym_t savsym = *symptr;
715 symptr->patend = patptr-1;
716 symptr->patptr = lenptr;
717 savsym.length = measure_struct(symptr);
718 *symptr = savsym;
719 }
720 } else {
721 symptr->howlen = e_no_len;
722 symptr->length = 1;
723 }
724
725 /* try to find / */
726 while (patptr < patend) {
727 if (isSPACE(*patptr))
728 patptr++;
729 else if (*patptr == '#') {
730 patptr++;
731 while (patptr < patend && *patptr != '\n')
732 patptr++;
733 if (patptr < patend)
734 patptr++;
735 } else {
66c611c5 736 if (*patptr == '/') {
49704364
WL
737 symptr->flags |= FLAG_SLASH;
738 patptr++;
66c611c5
MHM
739 if (patptr < patend &&
740 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 741 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 742 _action( symptr ) );
49704364
WL
743 }
744 break;
745 }
18529408 746 }
49704364
WL
747 } else {
748 /* at end - no count, no / */
749 symptr->howlen = e_no_len;
750 symptr->length = 1;
751 }
752
753 symptr->code = code;
fc241834 754 symptr->patptr = patptr;
49704364 755 return TRUE;
18529408 756 }
49704364 757 }
fc241834 758 symptr->patptr = patptr;
49704364 759 return FALSE;
18529408
IZ
760}
761
18529408 762/*
fc241834 763 There is no way to cleanly handle the case where we should process the
08ca2aa3 764 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
765 (e.g. estimates like strend-s as an upper bound for the number of
766 characters left wouldn't work). So if we foresee the need of this
767 (pattern starts with U or contains U0), we want to work on the encoded
768 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
769 themselves if they need to do a lot of unpacks like this on it
770*/
fc241834 771STATIC bool
08ca2aa3
TH
772need_utf8(const char *pat, const char *patend)
773{
774 bool first = TRUE;
7918f24d
NC
775
776 PERL_ARGS_ASSERT_NEED_UTF8;
777
08ca2aa3
TH
778 while (pat < patend) {
779 if (pat[0] == '#') {
780 pat++;
f7fe979e 781 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
782 if (!pat) return FALSE;
783 } else if (pat[0] == 'U') {
784 if (first || pat[1] == '0') return TRUE;
785 } else first = FALSE;
786 pat++;
787 }
788 return FALSE;
789}
790
791STATIC char
792first_symbol(const char *pat, const char *patend) {
7918f24d
NC
793 PERL_ARGS_ASSERT_FIRST_SYMBOL;
794
08ca2aa3
TH
795 while (pat < patend) {
796 if (pat[0] != '#') return pat[0];
797 pat++;
f7fe979e 798 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
799 if (!pat) return 0;
800 pat++;
801 }
802 return 0;
803}
804
805/*
dcccc8ff
KW
806
807=head1 Pack and Unpack
808
7accc089
JH
809=for apidoc unpackstring
810
796b6530 811The engine implementing the C<unpack()> Perl function.
21ebfc7a 812
796b6530
KW
813Using the template C<pat..patend>, this function unpacks the string
814C<s..strend> into a number of mortal SVs, which it pushes onto the perl
815argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 816C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
817pushed elements.
818
796b6530
KW
819The C<strend> and C<patend> pointers should point to the byte following the
820last character of each string.
21ebfc7a
DM
821
822Although this function returns its values on the perl argument stack, it
823doesn't take any parameters from that stack (and thus in particular
796b6530 824there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 825example).
7accc089
JH
826
827=cut */
828
e1b825c1 829SSize_t
f7fe979e 830Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 831{
f7fe979e 832 tempsym_t sym;
08ca2aa3 833
7918f24d
NC
834 PERL_ARGS_ASSERT_UNPACKSTRING;
835
f337b084 836 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
837 else if (need_utf8(pat, patend)) {
838 /* We probably should try to avoid this in case a scalar context call
839 wouldn't get to the "U0" */
840 STRLEN len = strend - s;
230e1fce 841 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
842 SAVEFREEPV(s);
843 strend = s + len;
f337b084 844 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
845 }
846
f337b084
TH
847 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
848 flags |= FLAG_PARSE_UTF8;
08ca2aa3 849
f7fe979e 850 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
851
852 return unpack_rec(&sym, s, s, strend, NULL );
853}
854
e1b825c1 855STATIC SSize_t
f7fe979e 856S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 857{
20b7effb 858 dSP;
3297d27d 859 SV *sv = NULL;
e1b825c1 860 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 861 howlen_t howlen;
e1b825c1 862 SSize_t checksum = 0;
92d41999 863 UV cuv = 0;
a6ec74c1 864 NV cdouble = 0.0;
e1b825c1 865 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 866 bool beyond = FALSE;
21c16052 867 bool explicit_length;
9e27e96a 868 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 869 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
870
871 PERL_ARGS_ASSERT_UNPACK_REC;
872
28be1210 873 symptr->strbeg = s - strbeg;
49704364 874
49704364 875 while (next_symbol(symptr)) {
a7a3cfaa 876 packprops_t props;
e1b825c1 877 SSize_t len;
f337b084 878 I32 datumtype = symptr->code;
a1219b5e 879 bool needs_swap;
206947d2 880 /* do first one only unless in list context
08ca2aa3 881 / is implemented by unpacking the count, then popping it from the
206947d2 882 stack, so must check that we're not in the middle of a / */
49704364 883 if ( unpack_only_one
206947d2 884 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 885 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 886 break;
49704364 887
f337b084 888 switch (howlen = symptr->howlen) {
fc241834
RGS
889 case e_star:
890 len = strend - strbeg; /* long enough */
49704364 891 break;
f337b084
TH
892 default:
893 /* e_no_len and e_number */
894 len = symptr->length;
895 break;
49704364 896 }
18529408 897
21c16052 898 explicit_length = TRUE;
a6ec74c1 899 redo_switch:
49704364 900 beyond = s >= strend;
a7a3cfaa
TH
901
902 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
903 if (props) {
904 /* props nonzero means we can process this letter. */
e1b825c1
Z
905 const SSize_t size = props & PACK_SIZE_MASK;
906 const SSize_t howmany = (strend - s) / size;
a7a3cfaa
TH
907 if (len > howmany)
908 len = howmany;
909
910 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
911 if (len && unpack_only_one) len = 1;
912 EXTEND(SP, len);
913 EXTEND_MORTAL(len);
78d46eaa
NC
914 }
915 }
a7a3cfaa 916
a1219b5e
NC
917 needs_swap = NEEDS_SWAP(datumtype);
918
1109a392 919 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 920 default:
1109a392 921 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 922
a6ec74c1 923 case '%':
49704364 924 if (howlen == e_no_len)
18529408 925 len = 16; /* len is not specified */
a6ec74c1 926 checksum = len;
92d41999 927 cuv = 0;
a6ec74c1 928 cdouble = 0;
18529408 929 continue;
81d52ecd 930
18529408
IZ
931 case '(':
932 {
49704364 933 tempsym_t savsym = *symptr;
9e27e96a 934 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 935 symptr->flags |= group_modifiers;
49704364 936 symptr->patend = savsym.grpend;
28be1210 937 symptr->previous = &savsym;
49704364 938 symptr->level++;
18529408 939 PUTBACK;
c6f750d1 940 if (len && unpack_only_one) len = 1;
18529408 941 while (len--) {
49704364 942 symptr->patptr = savsym.grpbeg;
f337b084
TH
943 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
944 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
945 unpack_rec(symptr, s, strbeg, strend, &s);
946 if (s == strend && savsym.howlen == e_star)
49704364 947 break; /* No way to continue */
18529408
IZ
948 }
949 SPAGAIN;
28be1210 950 savsym.flags = symptr->flags & ~group_modifiers;
49704364 951 *symptr = savsym;
18529408
IZ
952 break;
953 }
28be1210 954 case '.' | TYPE_IS_SHRIEKING:
28be1210 955 case '.': {
9e27e96a 956 const char *from;
28be1210 957 SV *sv;
9e27e96a 958 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
959 if (howlen == e_star) from = strbeg;
960 else if (len <= 0) from = s;
961 else {
962 tempsym_t *group = symptr;
963
964 while (--len && group) group = group->previous;
965 from = group ? strbeg + group->strbeg : strbeg;
966 }
967 sv = from <= s ?
00646304
CB
968 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
969 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 970 mXPUSHs(sv);
28be1210
TH
971 break;
972 }
28be1210 973 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 974 case '@':
28be1210 975 s = strbeg + symptr->strbeg;
28be1210 976 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 977 {
08ca2aa3
TH
978 while (len > 0) {
979 if (s >= strend)
980 Perl_croak(aTHX_ "'@' outside of string in unpack");
981 s += UTF8SKIP(s);
982 len--;
983 }
984 if (s > strend)
985 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
986 } else {
28be1210 987 if (strend-s < len)
fc241834 988 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 989 s += len;
08ca2aa3 990 }
a6ec74c1 991 break;
62f95557
IZ
992 case 'X' | TYPE_IS_SHRIEKING:
993 if (!len) /* Avoid division by 0 */
994 len = 1;
08ca2aa3 995 if (utf8) {
f7fe979e 996 const char *hop, *last;
e1b825c1 997 SSize_t l = len;
f337b084
TH
998 hop = last = strbeg;
999 while (hop < s) {
1000 hop += UTF8SKIP(hop);
1001 if (--l == 0) {
08ca2aa3 1002 last = hop;
f337b084
TH
1003 l = len;
1004 }
fc241834 1005 }
f337b084
TH
1006 if (last > s)
1007 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1008 s = last;
1009 break;
f337b084
TH
1010 }
1011 len = (s - strbeg) % len;
924ba076 1012 /* FALLTHROUGH */
a6ec74c1 1013 case 'X':
08ca2aa3
TH
1014 if (utf8) {
1015 while (len > 0) {
1016 if (s <= strbeg)
1017 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1018 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1019 if (s <= strbeg)
1020 Perl_croak(aTHX_ "'X' outside of string in unpack");
1021 }
1022 len--;
1023 }
1024 } else {
fc241834
RGS
1025 if (len > s - strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1027 s -= len;
08ca2aa3 1028 }
a6ec74c1 1029 break;
9e27e96a 1030 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1031 SSize_t ai32;
62f95557
IZ
1032 if (!len) /* Avoid division by 0 */
1033 len = 1;
230e1fce
NC
1034 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1035 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1036 if (ai32 == 0) break;
1037 len -= ai32;
9e27e96a 1038 }
924ba076 1039 /* FALLTHROUGH */
a6ec74c1 1040 case 'x':
08ca2aa3
TH
1041 if (utf8) {
1042 while (len>0) {
1043 if (s >= strend)
1044 Perl_croak(aTHX_ "'x' outside of string in unpack");
1045 s += UTF8SKIP(s);
1046 len--;
1047 }
1048 } else {
fc241834
RGS
1049 if (len > strend - s)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += len;
f337b084 1052 }
a6ec74c1
JH
1053 break;
1054 case '/':
49704364 1055 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1056
a6ec74c1
JH
1057 case 'A':
1058 case 'Z':
1059 case 'a':
08ca2aa3
TH
1060 if (checksum) {
1061 /* Preliminary length estimate is assumed done in 'W' */
1062 if (len > strend - s) len = strend - s;
1063 goto W_checksum;
1064 }
1065 if (utf8) {
e1b825c1 1066 SSize_t l;
f7fe979e 1067 const char *hop;
08ca2aa3
TH
1068 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1069 if (hop >= strend) {
1070 if (hop > strend)
1071 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1072 break;
fc241834 1073 }
a6ec74c1 1074 }
08ca2aa3
TH
1075 if (hop > strend)
1076 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1077 len = hop - s;
1078 } else if (len > strend - s)
1079 len = strend - s;
1080
1081 if (datumtype == 'Z') {
1082 /* 'Z' strips stuff after first null */
f7fe979e 1083 const char *ptr, *end;
f337b084
TH
1084 end = s + len;
1085 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1086 sv = newSVpvn(s, ptr-s);
1087 if (howlen == e_star) /* exact for 'Z*' */
1088 len = ptr-s + (ptr != strend ? 1 : 0);
1089 } else if (datumtype == 'A') {
1090 /* 'A' strips both nulls and spaces */
f7fe979e 1091 const char *ptr;
18bdf90a 1092 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1093 for (ptr = s+len-1; ptr >= s; ptr--) {
1094 if ( *ptr != 0
1095 && !UTF8_IS_CONTINUATION(*ptr)
1096 && !isSPACE_utf8_safe(ptr, strend))
1097 {
1098 break;
1099 }
1100 }
18bdf90a
TH
1101 if (ptr >= s) ptr += UTF8SKIP(ptr);
1102 else ptr++;
28be1210 1103 if (ptr > s+len)
18bdf90a
TH
1104 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1105 } else {
1106 for (ptr = s+len-1; ptr >= s; ptr--)
1107 if (*ptr != 0 && !isSPACE(*ptr)) break;
1108 ptr++;
1109 }
08ca2aa3
TH
1110 sv = newSVpvn(s, ptr-s);
1111 } else sv = newSVpvn(s, len);
1112
1113 if (utf8) {
1114 SvUTF8_on(sv);
1115 /* Undo any upgrade done due to need_utf8() */
f337b084 1116 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1117 sv_utf8_downgrade(sv, 0);
a6ec74c1 1118 }
6e449a3a 1119 mXPUSHs(sv);
08ca2aa3 1120 s += len;
a6ec74c1
JH
1121 break;
1122 case 'B':
08ca2aa3
TH
1123 case 'b': {
1124 char *str;
49704364 1125 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1126 len = (strend - s) * 8;
1127 if (checksum) {
f337b084 1128 if (utf8)
08ca2aa3 1129 while (len >= 8 && s < strend) {
9df874cd 1130 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1131 len -= 8;
1132 }
f337b084 1133 else
fc241834 1134 while (len >= 8) {
08ca2aa3 1135 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1136 len -= 8;
1137 }
08ca2aa3
TH
1138 if (len && s < strend) {
1139 U8 bits;
f337b084
TH
1140 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1141 if (datumtype == 'b')
a6ec74c1 1142 while (len-- > 0) {
92d41999 1143 if (bits & 1) cuv++;
a6ec74c1
JH
1144 bits >>= 1;
1145 }
f337b084 1146 else
a6ec74c1 1147 while (len-- > 0) {
08ca2aa3 1148 if (bits & 0x80) cuv++;
a6ec74c1
JH
1149 bits <<= 1;
1150 }
fc241834 1151 }
a6ec74c1
JH
1152 break;
1153 }
08ca2aa3 1154
561b68a9 1155 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1156 SvPOK_on(sv);
1157 str = SvPVX(sv);
1158 if (datumtype == 'b') {
f337b084 1159 U8 bits = 0;
e1b825c1 1160 const SSize_t ai32 = len;
08ca2aa3
TH
1161 for (len = 0; len < ai32; len++) {
1162 if (len & 7) bits >>= 1;
1163 else if (utf8) {
1164 if (s >= strend) break;
9df874cd 1165 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1166 } else bits = *(U8 *) s++;
1167 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1168 }
08ca2aa3 1169 } else {
f337b084 1170 U8 bits = 0;
e1b825c1 1171 const SSize_t ai32 = len;
08ca2aa3
TH
1172 for (len = 0; len < ai32; len++) {
1173 if (len & 7) bits <<= 1;
1174 else if (utf8) {
1175 if (s >= strend) break;
9df874cd 1176 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1177 } else bits = *(U8 *) s++;
1178 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1179 }
1180 }
1181 *str = '\0';
aa07b2f6 1182 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1183 XPUSHs(sv);
a6ec74c1 1184 break;
08ca2aa3 1185 }
a6ec74c1 1186 case 'H':
08ca2aa3 1187 case 'h': {
3297d27d 1188 char *str = NULL;
fc241834 1189 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1190 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1191 len = (strend - s) * 2;
858fe5e1
TC
1192 if (!checksum) {
1193 sv = sv_2mortal(newSV(len ? len : 1));
1194 SvPOK_on(sv);
1195 str = SvPVX(sv);
1196 }
a6ec74c1 1197 if (datumtype == 'h') {
f337b084 1198 U8 bits = 0;
e1b825c1 1199 SSize_t ai32 = len;
fc241834
RGS
1200 for (len = 0; len < ai32; len++) {
1201 if (len & 1) bits >>= 4;
1202 else if (utf8) {
1203 if (s >= strend) break;
9df874cd 1204 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1205 } else bits = * (U8 *) s++;
858fe5e1
TC
1206 if (!checksum)
1207 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1208 }
08ca2aa3 1209 } else {
f337b084 1210 U8 bits = 0;
e1b825c1 1211 const SSize_t ai32 = len;
08ca2aa3
TH
1212 for (len = 0; len < ai32; len++) {
1213 if (len & 1) bits <<= 4;
1214 else if (utf8) {
1215 if (s >= strend) break;
9df874cd 1216 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1217 } else bits = *(U8 *) s++;
858fe5e1
TC
1218 if (!checksum)
1219 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1220 }
1221 }
858fe5e1
TC
1222 if (!checksum) {
1223 *str = '\0';
1224 SvCUR_set(sv, str - SvPVX_const(sv));
1225 XPUSHs(sv);
1226 }
a6ec74c1 1227 break;
08ca2aa3 1228 }
1651fc44
ML
1229 case 'C':
1230 if (len == 0) {
1231 if (explicit_length)
1232 /* Switch to "character" mode */
1233 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1234 break;
1235 }
924ba076 1236 /* FALLTHROUGH */
a6ec74c1 1237 case 'c':
1651fc44
ML
1238 while (len-- > 0 && s < strend) {
1239 int aint;
1240 if (utf8)
1241 {
1242 STRLEN retlen;
1243 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1244 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1245 if (retlen == (STRLEN) -1)
1651fc44
ML
1246 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1247 s += retlen;
1248 }
1249 else
1250 aint = *(U8 *)(s)++;
1251 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1252 aint -= 256;
08ca2aa3 1253 if (!checksum)
6e449a3a 1254 mPUSHi(aint);
73cb7263
NC
1255 else if (checksum > bits_in_uv)
1256 cdouble += (NV)aint;
1257 else
1258 cuv += aint;
a6ec74c1
JH
1259 }
1260 break;
08ca2aa3
TH
1261 case 'W':
1262 W_checksum:
1651fc44 1263 if (utf8) {
08ca2aa3 1264 while (len-- > 0 && s < strend) {
08ca2aa3 1265 STRLEN retlen;
f7fe979e 1266 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1267 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1268 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1269 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1270 s += retlen;
1271 if (!checksum)
6e449a3a 1272 mPUSHu(val);
08ca2aa3
TH
1273 else if (checksum > bits_in_uv)
1274 cdouble += (NV) val;
d6d3e8bd 1275 else
08ca2aa3 1276 cuv += val;
fc241834 1277 }
08ca2aa3 1278 } else if (!checksum)
a6ec74c1 1279 while (len-- > 0) {
f7fe979e 1280 const U8 ch = *(U8 *) s++;
6e449a3a 1281 mPUSHu(ch);
a6ec74c1 1282 }
08ca2aa3
TH
1283 else if (checksum > bits_in_uv)
1284 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1285 else
1286 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1287 break;
1288 case 'U':
35bcd338 1289 if (len == 0) {
c5333953 1290 if (explicit_length && howlen != e_star) {
08ca2aa3 1291 /* Switch to "bytes in UTF-8" mode */
f337b084 1292 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1293 else
1294 /* Should be impossible due to the need_utf8() test */
1295 Perl_croak(aTHX_ "U0 mode on a byte string");
1296 }
35bcd338
JH
1297 break;
1298 }
08ca2aa3 1299 if (len > strend - s) len = strend - s;
fc241834 1300 if (!checksum) {
08ca2aa3
TH
1301 if (len && unpack_only_one) len = 1;
1302 EXTEND(SP, len);
1303 EXTEND_MORTAL(len);
fc241834 1304 }
08ca2aa3
TH
1305 while (len-- > 0 && s < strend) {
1306 STRLEN retlen;
1307 UV auv;
1308 if (utf8) {
81b3f640 1309 U8 result[UTF8_MAXLEN+1];
f7fe979e 1310 const char *ptr = s;
08ca2aa3 1311 STRLEN len;
08ca2aa3
TH
1312 /* Bug: warns about bad utf8 even if we are short on bytes
1313 and will break out of the loop */
9df874cd 1314 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1315 'U'))
08ca2aa3
TH
1316 break;
1317 len = UTF8SKIP(result);
9df874cd 1318 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1319 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1320 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1321 len,
1322 &retlen,
1323 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1324 s = ptr;
1325 } else {
3ece276e
KW
1326 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1327 strend - s,
1328 &retlen,
1329 UTF8_ALLOW_DEFAULT));
df337714 1330 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1331 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1332 s += retlen;
1333 }
1334 if (!checksum)
6e449a3a 1335 mPUSHu(auv);
73cb7263 1336 else if (checksum > bits_in_uv)
08ca2aa3 1337 cdouble += (NV) auv;
73cb7263 1338 else
08ca2aa3 1339 cuv += auv;
a6ec74c1
JH
1340 }
1341 break;
49704364
WL
1342 case 's' | TYPE_IS_SHRIEKING:
1343#if SHORTSIZE != SIZE16
73cb7263 1344 while (len-- > 0) {
08ca2aa3 1345 short ashort;
aaec8192 1346 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1347 if (!checksum)
6e449a3a 1348 mPUSHi(ashort);
73cb7263
NC
1349 else if (checksum > bits_in_uv)
1350 cdouble += (NV)ashort;
1351 else
1352 cuv += ashort;
49704364
WL
1353 }
1354 break;
1355#else
924ba076 1356 /* FALLTHROUGH */
a6ec74c1 1357#endif
49704364 1358 case 's':
73cb7263 1359 while (len-- > 0) {
08ca2aa3
TH
1360 I16 ai16;
1361
1362#if U16SIZE > SIZE16
1363 ai16 = 0;
1364#endif
aaec8192 1365 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1366#if U16SIZE > SIZE16
73cb7263
NC
1367 if (ai16 > 32767)
1368 ai16 -= 65536;
a6ec74c1 1369#endif
08ca2aa3 1370 if (!checksum)
6e449a3a 1371 mPUSHi(ai16);
73cb7263
NC
1372 else if (checksum > bits_in_uv)
1373 cdouble += (NV)ai16;
1374 else
1375 cuv += ai16;
a6ec74c1
JH
1376 }
1377 break;
49704364
WL
1378 case 'S' | TYPE_IS_SHRIEKING:
1379#if SHORTSIZE != SIZE16
73cb7263 1380 while (len-- > 0) {
08ca2aa3 1381 unsigned short aushort;
c0f2b636 1382 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
08ca2aa3 1383 if (!checksum)
6e449a3a 1384 mPUSHu(aushort);
73cb7263
NC
1385 else if (checksum > bits_in_uv)
1386 cdouble += (NV)aushort;
1387 else
1388 cuv += aushort;
49704364
WL
1389 }
1390 break;
1391#else
924ba076 1392 /* FALLTHROUGH */
49704364 1393#endif
a6ec74c1
JH
1394 case 'v':
1395 case 'n':
1396 case 'S':
73cb7263 1397 while (len-- > 0) {
08ca2aa3
TH
1398 U16 au16;
1399#if U16SIZE > SIZE16
1400 au16 = 0;
1401#endif
aaec8192 1402 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1403 if (datumtype == 'n')
1404 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1405 if (datumtype == 'v')
1406 au16 = vtohs(au16);
08ca2aa3 1407 if (!checksum)
6e449a3a 1408 mPUSHu(au16);
73cb7263 1409 else if (checksum > bits_in_uv)
f337b084 1410 cdouble += (NV) au16;
73cb7263
NC
1411 else
1412 cuv += au16;
a6ec74c1
JH
1413 }
1414 break;
068bd2e7
MHM
1415 case 'v' | TYPE_IS_SHRIEKING:
1416 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1417 while (len-- > 0) {
08ca2aa3
TH
1418 I16 ai16;
1419# if U16SIZE > SIZE16
1420 ai16 = 0;
1421# endif
aaec8192 1422 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1423 /* There should never be any byte-swapping here. */
1424 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1425 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1426 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1427 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1428 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1429 if (!checksum)
6e449a3a 1430 mPUSHi(ai16);
73cb7263 1431 else if (checksum > bits_in_uv)
08ca2aa3 1432 cdouble += (NV) ai16;
73cb7263
NC
1433 else
1434 cuv += ai16;
068bd2e7
MHM
1435 }
1436 break;
a6ec74c1 1437 case 'i':
49704364 1438 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1439 while (len-- > 0) {
08ca2aa3 1440 int aint;
aaec8192 1441 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1442 if (!checksum)
6e449a3a 1443 mPUSHi(aint);
73cb7263
NC
1444 else if (checksum > bits_in_uv)
1445 cdouble += (NV)aint;
1446 else
1447 cuv += aint;
a6ec74c1
JH
1448 }
1449 break;
1450 case 'I':
49704364 1451 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1452 while (len-- > 0) {
08ca2aa3 1453 unsigned int auint;
aaec8192 1454 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1455 if (!checksum)
6e449a3a 1456 mPUSHu(auint);
73cb7263
NC
1457 else if (checksum > bits_in_uv)
1458 cdouble += (NV)auint;
1459 else
1460 cuv += auint;
a6ec74c1
JH
1461 }
1462 break;
92d41999 1463 case 'j':
73cb7263 1464 while (len-- > 0) {
08ca2aa3 1465 IV aiv;
aaec8192 1466 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1467 if (!checksum)
6e449a3a 1468 mPUSHi(aiv);
73cb7263
NC
1469 else if (checksum > bits_in_uv)
1470 cdouble += (NV)aiv;
1471 else
1472 cuv += aiv;
92d41999
JH
1473 }
1474 break;
1475 case 'J':
73cb7263 1476 while (len-- > 0) {
08ca2aa3 1477 UV auv;
aaec8192 1478 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1479 if (!checksum)
6e449a3a 1480 mPUSHu(auv);
73cb7263
NC
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)auv;
1483 else
1484 cuv += auv;
92d41999
JH
1485 }
1486 break;
49704364
WL
1487 case 'l' | TYPE_IS_SHRIEKING:
1488#if LONGSIZE != SIZE32
73cb7263 1489 while (len-- > 0) {
08ca2aa3 1490 long along;
aaec8192 1491 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1492 if (!checksum)
6e449a3a 1493 mPUSHi(along);
73cb7263
NC
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)along;
1496 else
1497 cuv += along;
49704364
WL
1498 }
1499 break;
1500#else
924ba076 1501 /* FALLTHROUGH */
a6ec74c1 1502#endif
49704364 1503 case 'l':
73cb7263 1504 while (len-- > 0) {
08ca2aa3
TH
1505 I32 ai32;
1506#if U32SIZE > SIZE32
1507 ai32 = 0;
1508#endif
aaec8192 1509 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1510#if U32SIZE > SIZE32
08ca2aa3 1511 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1512#endif
08ca2aa3 1513 if (!checksum)
6e449a3a 1514 mPUSHi(ai32);
73cb7263
NC
1515 else if (checksum > bits_in_uv)
1516 cdouble += (NV)ai32;
1517 else
1518 cuv += ai32;
a6ec74c1
JH
1519 }
1520 break;
49704364
WL
1521 case 'L' | TYPE_IS_SHRIEKING:
1522#if LONGSIZE != SIZE32
73cb7263 1523 while (len-- > 0) {
08ca2aa3 1524 unsigned long aulong;
aaec8192 1525 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1526 if (!checksum)
6e449a3a 1527 mPUSHu(aulong);
73cb7263
NC
1528 else if (checksum > bits_in_uv)
1529 cdouble += (NV)aulong;
1530 else
1531 cuv += aulong;
49704364
WL
1532 }
1533 break;
1534#else
924ba076 1535 /* FALLTHROUGH */
49704364 1536#endif
a6ec74c1
JH
1537 case 'V':
1538 case 'N':
1539 case 'L':
73cb7263 1540 while (len-- > 0) {
08ca2aa3
TH
1541 U32 au32;
1542#if U32SIZE > SIZE32
1543 au32 = 0;
1544#endif
aaec8192 1545 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1546 if (datumtype == 'N')
1547 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1548 if (datumtype == 'V')
1549 au32 = vtohl(au32);
08ca2aa3 1550 if (!checksum)
6e449a3a 1551 mPUSHu(au32);
fc241834
RGS
1552 else if (checksum > bits_in_uv)
1553 cdouble += (NV)au32;
1554 else
1555 cuv += au32;
a6ec74c1
JH
1556 }
1557 break;
068bd2e7
MHM
1558 case 'V' | TYPE_IS_SHRIEKING:
1559 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1560 while (len-- > 0) {
08ca2aa3 1561 I32 ai32;
f8e5a5db 1562#if U32SIZE > SIZE32
08ca2aa3 1563 ai32 = 0;
f8e5a5db 1564#endif
aaec8192 1565 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1566 /* There should never be any byte swapping here. */
1567 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1568 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1569 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1570 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1572 if (!checksum)
6e449a3a 1573 mPUSHi(ai32);
73cb7263
NC
1574 else if (checksum > bits_in_uv)
1575 cdouble += (NV)ai32;
1576 else
1577 cuv += ai32;
068bd2e7
MHM
1578 }
1579 break;
a6ec74c1 1580 case 'p':
a6ec74c1 1581 while (len-- > 0) {
f7fe979e 1582 const char *aptr;
aaec8192 1583 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1584 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1585 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1586 }
1587 break;
1588 case 'w':
a6ec74c1
JH
1589 {
1590 UV auv = 0;
e1b825c1 1591 size_t bytes = 0;
fc241834 1592
08ca2aa3
TH
1593 while (len > 0 && s < strend) {
1594 U8 ch;
f337b084 1595 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1596 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1597 /* UTF8_IS_XXXXX not right here because this is a BER, not
1598 * UTF-8 format - using constant 0x80 */
08ca2aa3 1599 if (ch < 0x80) {
a6ec74c1 1600 bytes = 0;
6e449a3a 1601 mPUSHu(auv);
a6ec74c1
JH
1602 len--;
1603 auv = 0;
08ca2aa3 1604 continue;
a6ec74c1 1605 }
08ca2aa3 1606 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1607 const char *t;
a6ec74c1 1608
147e3846
KW
1609 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1610 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1611 while (s < strend) {
f337b084 1612 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1613 sv = mul128(sv, (U8)(ch & 0x7f));
1614 if (!(ch & 0x80)) {
a6ec74c1
JH
1615 bytes = 0;
1616 break;
1617 }
1618 }
10516c54 1619 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1620 while (*t == '0')
1621 t++;
1622 sv_chop(sv, t);
6e449a3a 1623 mPUSHs(sv);
a6ec74c1
JH
1624 len--;
1625 auv = 0;
1626 }
1627 }
1628 if ((s >= strend) && bytes)
49704364 1629 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1630 }
1631 break;
1632 case 'P':
49704364
WL
1633 if (symptr->howlen == e_star)
1634 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1635 EXTEND(SP, 1);
2d3e0934 1636 if (s + sizeof(char*) <= strend) {
08ca2aa3 1637 char *aptr;
aaec8192 1638 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1639 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1640 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1641 }
a6ec74c1 1642 break;
c174bf3b 1643#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1644 case 'q':
73cb7263 1645 while (len-- > 0) {
08ca2aa3 1646 Quad_t aquad;
aaec8192 1647 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1648 if (!checksum)
c174bf3b 1649 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aquad;
1652 else
1653 cuv += aquad;
1654 }
a6ec74c1
JH
1655 break;
1656 case 'Q':
73cb7263 1657 while (len-- > 0) {
08ca2aa3 1658 Uquad_t auquad;
aaec8192 1659 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1660 if (!checksum)
c174bf3b 1661 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)auquad;
1664 else
1665 cuv += auquad;
a6ec74c1
JH
1666 }
1667 break;
1640b983 1668#endif
a6ec74c1
JH
1669 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1670 case 'f':
73cb7263 1671 while (len-- > 0) {
08ca2aa3 1672 float afloat;
aaec8192 1673 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1674 if (!checksum)
6e449a3a 1675 mPUSHn(afloat);
08ca2aa3 1676 else
73cb7263 1677 cdouble += afloat;
fc241834 1678 }
a6ec74c1
JH
1679 break;
1680 case 'd':
73cb7263 1681 while (len-- > 0) {
08ca2aa3 1682 double adouble;
aaec8192 1683 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1684 if (!checksum)
6e449a3a 1685 mPUSHn(adouble);
08ca2aa3 1686 else
73cb7263 1687 cdouble += adouble;
fc241834 1688 }
a6ec74c1 1689 break;
92d41999 1690 case 'F':
73cb7263 1691 while (len-- > 0) {
275663fa 1692 NV_bytes anv;
aaec8192
NC
1693 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1694 datumtype, needs_swap);
08ca2aa3 1695 if (!checksum)
275663fa 1696 mPUSHn(anv.nv);
08ca2aa3 1697 else
275663fa 1698 cdouble += anv.nv;
fc241834 1699 }
92d41999
JH
1700 break;
1701#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1702 case 'D':
73cb7263 1703 while (len-- > 0) {
275663fa 1704 ld_bytes aldouble;
aaec8192
NC
1705 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1706 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1707 /* The most common long double format, the x86 80-bit
1708 * extended precision, has either 2 or 6 unused bytes,
1709 * which may contain garbage, which may contain
1710 * unintentional data. While we do zero the bytes of
1711 * the long double data in pack(), here in unpack() we
1712 * don't, because it's really hard to envision that
1713 * reading the long double off aldouble would be
e075ae47 1714 * affected by the unused bytes.
42262fd3
JH
1715 *
1716 * Note that trying to unpack 'long doubles' of 'long
1717 * doubles' packed in another system is in the general
1718 * case doomed without having more detail. */
08ca2aa3 1719 if (!checksum)
275663fa 1720 mPUSHn(aldouble.ld);
08ca2aa3 1721 else
275663fa 1722 cdouble += aldouble.ld;
92d41999
JH
1723 }
1724 break;
1725#endif
a6ec74c1 1726 case 'u':
858fe5e1 1727 if (!checksum) {
f7fe979e 1728 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1729 sv = sv_2mortal(newSV(l));
12cad9bd
TC
1730 if (l) {
1731 SvPOK_on(sv);
1732 *SvEND(sv) = '\0';
1733 }
08ca2aa3 1734 }
08ca2aa3 1735
bee81d36
KW
1736 /* Note that all legal uuencoded strings are ASCII printables, so
1737 * have the same representation under UTF-8 vs not. This means we
1738 * can ignore UTF8ness on legal input. For illegal we stop at the
1739 * first failure, and don't report where/what that is, so again we
1740 * can ignore UTF8ness */
1741
9233b56b
KW
1742 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1743 I32 a, b, c, d;
1744 char hunk[3];
1745
1746 len = PL_uudmap[*(U8*)s++] & 077;
1747 while (len > 0) {
1748 if (s < strend && ISUUCHAR(*s))
1749 a = PL_uudmap[*(U8*)s++] & 077;
1750 else
1751 a = 0;
1752 if (s < strend && ISUUCHAR(*s))
1753 b = PL_uudmap[*(U8*)s++] & 077;
1754 else
1755 b = 0;
1756 if (s < strend && ISUUCHAR(*s))
1757 c = PL_uudmap[*(U8*)s++] & 077;
1758 else
1759 c = 0;
1760 if (s < strend && ISUUCHAR(*s))
1761 d = PL_uudmap[*(U8*)s++] & 077;
1762 else
1763 d = 0;
1764 hunk[0] = (char)((a << 2) | (b >> 4));
1765 hunk[1] = (char)((b << 4) | (c >> 2));
1766 hunk[2] = (char)((c << 6) | d);
1767 if (!checksum)
1768 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1769 len -= 3;
1770 }
1771 if (*s == '\n')
1772 s++;
1773 else /* possible checksum byte */
1774 if (s + 1 < strend && s[1] == '\n')
1775 s += 2;
1776 }
858fe5e1
TC
1777 if (!checksum)
1778 XPUSHs(sv);
a6ec74c1 1779 break;
99f862a1 1780 } /* End of switch */
49704364 1781
a6ec74c1 1782 if (checksum) {
4aada8b9 1783 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1784 (checksum > bits_in_uv &&
4aada8b9 1785 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
08ca2aa3 1786 NV trouble, anv;
a6ec74c1 1787
08ca2aa3 1788 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1789 while (checksum >= 16) {
1790 checksum -= 16;
08ca2aa3 1791 anv *= 65536.0;
a6ec74c1 1792 }
a6ec74c1 1793 while (cdouble < 0.0)
08ca2aa3 1794 cdouble += anv;
09b94b1f
S
1795 cdouble = Perl_modf(cdouble / anv, &trouble);
1796#ifdef LONGDOUBLE_DOUBLEDOUBLE
1797 /* Workaround for powerpc doubledouble modfl bug:
1798 * close to 1.0L and -1.0L cdouble is 0, and trouble
1799 * is cdouble / anv. */
1800 if (trouble != Perl_ceil(trouble)) {
1801 cdouble = trouble;
1802 if (cdouble > 1.0L) cdouble -= 1.0L;
1803 if (cdouble < -1.0L) cdouble += 1.0L;
1804 }
1805#endif
1806 cdouble *= anv;
c4c5f44a 1807 sv = newSVnv(cdouble);
a6ec74c1
JH
1808 }
1809 else {
fa8ec7c1
NC
1810 if (checksum < bits_in_uv) {
1811 UV mask = ((UV)1 << checksum) - 1;
92d41999 1812 cuv &= mask;
a6ec74c1 1813 }
c4c5f44a 1814 sv = newSVuv(cuv);
a6ec74c1 1815 }
6e449a3a 1816 mXPUSHs(sv);
a6ec74c1
JH
1817 checksum = 0;
1818 }
fc241834 1819
49704364
WL
1820 if (symptr->flags & FLAG_SLASH){
1821 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1822 break;
49704364
WL
1823 if( next_symbol(symptr) ){
1824 if( symptr->howlen == e_number )
1825 Perl_croak(aTHX_ "Count after length/code in unpack" );
1826 if( beyond ){
1827 /* ...end of char buffer then no decent length available */
1828 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1829 } else {
1830 /* take top of stack (hope it's numeric) */
1831 len = POPi;
1832 if( len < 0 )
1833 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1834 }
1835 } else {
1836 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1837 }
1838 datumtype = symptr->code;
21c16052 1839 explicit_length = FALSE;
49704364
WL
1840 goto redo_switch;
1841 }
a6ec74c1 1842 }
49704364 1843
18529408
IZ
1844 if (new_s)
1845 *new_s = s;
1846 PUTBACK;
1847 return SP - PL_stack_base - start_sp_offset;
1848}
1849
1850PP(pp_unpack)
1851{
1852 dSP;
bab9c0ac 1853 dPOPPOPssrl;
1c23e2bd 1854 U8 gimme = GIMME_V;
18529408
IZ
1855 STRLEN llen;
1856 STRLEN rlen;
5c144d81
NC
1857 const char *pat = SvPV_const(left, llen);
1858 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1859 const char *strend = s + rlen;
1860 const char *patend = pat + llen;
e1b825c1 1861 SSize_t cnt;
18529408
IZ
1862
1863 PUTBACK;
7accc089 1864 cnt = unpackstring(pat, patend, s, strend,
49704364 1865 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1866 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1867
18529408
IZ
1868 SPAGAIN;
1869 if ( !cnt && gimme == G_SCALAR )
1870 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1871 RETURN;
1872}
1873
f337b084 1874STATIC U8 *
e1b825c1 1875doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1876{
f337b084 1877 *h++ = PL_uuemap[len];
a6ec74c1 1878 while (len > 2) {
f337b084
TH
1879 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1880 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1881 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1882 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1883 s += 3;
1884 len -= 3;
1885 }
1886 if (len > 0) {
e68aed92 1887 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1888 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1889 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1890 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1891 *h++ = PL_uuemap[0];
a6ec74c1 1892 }
f337b084
TH
1893 *h++ = '\n';
1894 return h;
a6ec74c1
JH
1895}
1896
1897STATIC SV *
f7fe979e 1898S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1899{
8b6e33c7
AL
1900 SV *result = newSVpvn(s, l);
1901 char *const result_c = SvPV_nolen(result); /* convenience */
1902 char *out = result_c;
1903 bool skip = 1;
1904 bool ignore = 0;
a6ec74c1 1905
7918f24d
NC
1906 PERL_ARGS_ASSERT_IS_AN_INT;
1907
a6ec74c1
JH
1908 while (*s) {
1909 switch (*s) {
1910 case ' ':
1911 break;
1912 case '+':
1913 if (!skip) {
1914 SvREFCNT_dec(result);
1915 return (NULL);
1916 }
1917 break;
1918 case '0':
1919 case '1':
1920 case '2':
1921 case '3':
1922 case '4':
1923 case '5':
1924 case '6':
1925 case '7':
1926 case '8':
1927 case '9':
1928 skip = 0;
1929 if (!ignore) {
1930 *(out++) = *s;
1931 }
1932 break;
1933 case '.':
1934 ignore = 1;
1935 break;
1936 default:
1937 SvREFCNT_dec(result);
1938 return (NULL);
1939 }
1940 s++;
1941 }
1942 *(out++) = '\0';
1943 SvCUR_set(result, out - result_c);
1944 return (result);
1945}
1946
1947/* pnum must be '\0' terminated */
1948STATIC int
1949S_div128(pTHX_ SV *pnum, bool *done)
1950{
8b6e33c7
AL
1951 STRLEN len;
1952 char * const s = SvPV(pnum, len);
1953 char *t = s;
1954 int m = 0;
1955
7918f24d
NC
1956 PERL_ARGS_ASSERT_DIV128;
1957
8b6e33c7
AL
1958 *done = 1;
1959 while (*t) {
1960 const int i = m * 10 + (*t - '0');
1961 const int r = (i >> 7); /* r < 10 */
1962 m = i & 0x7F;
1963 if (r) {
1964 *done = 0;
1965 }
1966 *(t++) = '0' + r;
a6ec74c1 1967 }
8b6e33c7
AL
1968 *(t++) = '\0';
1969 SvCUR_set(pnum, (STRLEN) (t - s));
1970 return (m);
a6ec74c1
JH
1971}
1972
18529408 1973/*
7accc089
JH
1974=for apidoc packlist
1975
796b6530 1976The engine implementing C<pack()> Perl function.
7accc089 1977
bfce84ec
AL
1978=cut
1979*/
7accc089
JH
1980
1981void
5aaab254 1982Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1983{
aadb217d
JH
1984 tempsym_t sym;
1985
7918f24d
NC
1986 PERL_ARGS_ASSERT_PACKLIST;
1987
f7fe979e 1988 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1989
f337b084
TH
1990 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1991 Also make sure any UTF8 flag is loaded */
56eb0262 1992 SvPV_force_nolen(cat);
bfce84ec
AL
1993 if (DO_UTF8(cat))
1994 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1995
49704364
WL
1996 (void)pack_rec( cat, &sym, beglist, endlist );
1997}
1998
f337b084
TH
1999/* like sv_utf8_upgrade, but also repoint the group start markers */
2000STATIC void
2001marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2002 STRLEN len;
2003 tempsym_t *group;
f7fe979e
AL
2004 const char *from_ptr, *from_start, *from_end, **marks, **m;
2005 char *to_start, *to_ptr;
f337b084
TH
2006
2007 if (SvUTF8(sv)) return;
2008
aa07b2f6 2009 from_start = SvPVX_const(sv);
f337b084
TH
2010 from_end = from_start + SvCUR(sv);
2011 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2012 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2013 if (from_ptr == from_end) {
2014 /* Simple case: no character needs to be changed */
2015 SvUTF8_on(sv);
2016 return;
2017 }
2018
3473cf63 2019 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2020 Newx(to_start, len, char);
f337b084
TH
2021 Copy(from_start, to_start, from_ptr-from_start, char);
2022 to_ptr = to_start + (from_ptr-from_start);
2023
a02a5408 2024 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2025 for (group=sym_ptr; group; group = group->previous)
2026 marks[group->level] = from_start + group->strbeg;
2027 marks[sym_ptr->level+1] = from_end+1;
2028 for (m = marks; *m < from_ptr; m++)
2029 *m = to_start + (*m-from_start);
2030
2031 for (;from_ptr < from_end; from_ptr++) {
2032 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2033 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2034 }
2035 *to_ptr = 0;
2036
2037 while (*m == from_ptr) *m++ = to_ptr;
2038 if (m != marks + sym_ptr->level+1) {
2039 Safefree(marks);
2040 Safefree(to_start);
5637ef5b
NC
2041 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2042 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2043 }
2044 for (group=sym_ptr; group; group = group->previous)
2045 group->strbeg = marks[group->level] - to_start;
2046 Safefree(marks);
2047
2048 if (SvOOK(sv)) {
2049 if (SvIVX(sv)) {
b162af07 2050 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2051 from_start -= SvIVX(sv);
2052 SvIV_set(sv, 0);
2053 }
2054 SvFLAGS(sv) &= ~SVf_OOK;
2055 }
2056 if (SvLEN(sv) != 0)
2057 Safefree(from_start);
f880fe2f 2058 SvPV_set(sv, to_start);
b162af07
SP
2059 SvCUR_set(sv, to_ptr - to_start);
2060 SvLEN_set(sv, len);
f337b084
TH
2061 SvUTF8_on(sv);
2062}
2063
2064/* Exponential string grower. Makes string extension effectively O(n)
2065 needed says how many extra bytes we need (not counting the final '\0')
2066 Only grows the string if there is an actual lack of space
2067*/
2068STATIC char *
0bd48802 2069S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2070 const STRLEN cur = SvCUR(sv);
2071 const STRLEN len = SvLEN(sv);
f337b084 2072 STRLEN extend;
7918f24d
NC
2073
2074 PERL_ARGS_ASSERT_SV_EXP_GROW;
2075
f337b084
TH
2076 if (len - cur > needed) return SvPVX(sv);
2077 extend = needed > len ? needed : len;
2078 return SvGROW(sv, len+extend+1);
2079}
49704364 2080
93f6e112 2081static SV *
b197e565 2082S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2083{
2084 SvGETMAGIC(sv);
93f6e112
FC
2085 if (UNLIKELY(SvAMAGIC(sv)))
2086 sv = sv_2num(sv);
99f450cc 2087 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2088 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2089 const NV nv = SvNV_nomg(sv);
2090 if (c == 'w')
147e3846 2091 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2092 else
147e3846 2093 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2094 }
93f6e112 2095 return sv;
354b74ae
FC
2096}
2097
93f6e112
FC
2098#define SvIV_no_inf(sv,d) \
2099 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2100#define SvUV_no_inf(sv,d) \
2101 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2102
49704364
WL
2103STATIC
2104SV **
f337b084 2105S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2106{
49704364 2107 tempsym_t lookahead;
e1b825c1 2108 SSize_t items = endlist - beglist;
f337b084
TH
2109 bool found = next_symbol(symptr);
2110 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2111 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2112 char* from;
f337b084 2113
7918f24d
NC
2114 PERL_ARGS_ASSERT_PACK_REC;
2115
f337b084
TH
2116 if (symptr->level == 0 && found && symptr->code == 'U') {
2117 marked_upgrade(aTHX_ cat, symptr);
2118 symptr->flags |= FLAG_DO_UTF8;
2119 utf8 = 0;
49704364 2120 }
f337b084 2121 symptr->strbeg = SvCUR(cat);
49704364
WL
2122
2123 while (found) {
f337b084
TH
2124 SV *fromstr;
2125 STRLEN fromlen;
e1b825c1 2126 SSize_t len;
a0714e2c 2127 SV *lengthcode = NULL;
49704364 2128 I32 datumtype = symptr->code;
f337b084
TH
2129 howlen_t howlen = symptr->howlen;
2130 char *start = SvPVX(cat);
2131 char *cur = start + SvCUR(cat);
a1219b5e 2132 bool needs_swap;
49704364 2133
fc1bb3f2 2134#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2135#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2136
2137 switch (howlen) {
fc241834 2138 case e_star:
4aada8b9 2139 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
f337b084
TH
2140 0 : items;
2141 break;
2142 default:
2143 /* e_no_len and e_number */
2144 len = symptr->length;
49704364
WL
2145 break;
2146 }
2147
f337b084 2148 if (len) {
a7a3cfaa 2149 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2150
a7a3cfaa
TH
2151 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2152 /* We can process this letter. */
2153 STRLEN size = props & PACK_SIZE_MASK;
f17fed50 2154 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
a7a3cfaa 2155 }
f337b084
TH
2156 }
2157
49704364
WL
2158 /* Look ahead for next symbol. Do we have code/code? */
2159 lookahead = *symptr;
2160 found = next_symbol(&lookahead);
246f24af
TH
2161 if (symptr->flags & FLAG_SLASH) {
2162 IV count;
f337b084 2163 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
4aada8b9 2164 if (memCHRs("aAZ", lookahead.code)) {
246f24af
TH
2165 if (lookahead.howlen == e_number) count = lookahead.length;
2166 else {
ce399ba6 2167 if (items > 0) {
48a5da33 2168 count = sv_len_utf8(*beglist);
ce399ba6 2169 }
246f24af
TH
2170 else count = 0;
2171 if (lookahead.code == 'Z') count++;
2172 }
2173 } else {
2174 if (lookahead.howlen == e_number && lookahead.length < items)
2175 count = lookahead.length;
2176 else count = items;
2177 }
2178 lookahead.howlen = e_number;
2179 lookahead.length = count;
2180 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2181 }
49704364 2182
a1219b5e
NC
2183 needs_swap = NEEDS_SWAP(datumtype);
2184
fc241834
RGS
2185 /* Code inside the switch must take care to properly update
2186 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2187 doesn't simply leave using break */
0c7df902 2188 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2189 default:
f337b084
TH
2190 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2191 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2192 case '%':
49704364 2193 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2194
28be1210 2195 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2196 case '.':
2197 if (howlen == e_star) from = start;
2198 else if (len == 0) from = cur;
2199 else {
2200 tempsym_t *group = symptr;
2201
2202 while (--len && group) group = group->previous;
2203 from = group ? start + group->strbeg : start;
2204 }
2205 fromstr = NEXTFROM;
354b74ae 2206 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2207 goto resize;
28be1210 2208 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2209 case '@':
28be1210
TH
2210 from = start + symptr->strbeg;
2211 resize:
28be1210 2212 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2213 if (len >= 0) {
2214 while (len && from < cur) {
2215 from += UTF8SKIP(from);
2216 len--;
2217 }
2218 if (from > cur)
2219 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2220 if (len) {
2221 /* Here we know from == cur */
2222 grow:
2223 GROWING(0, cat, start, cur, len);
2224 Zero(cur, len, char);
2225 cur += len;
2226 } else if (from < cur) {
2227 len = cur - from;
2228 goto shrink;
2229 } else goto no_change;
2230 } else {
2231 cur = from;
2232 len = -len;
2233 goto utf8_shrink;
f337b084 2234 }
28be1210
TH
2235 else {
2236 len -= cur - from;
f337b084 2237 if (len > 0) goto grow;
28be1210 2238 if (len == 0) goto no_change;
fc241834 2239 len = -len;
28be1210 2240 goto shrink;
f337b084 2241 }
a6ec74c1 2242 break;
81d52ecd 2243
fc241834 2244 case '(': {
49704364 2245 tempsym_t savsym = *symptr;
66c611c5
MHM
2246 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2247 symptr->flags |= group_modifiers;
49704364
WL
2248 symptr->patend = savsym.grpend;
2249 symptr->level++;
f337b084 2250 symptr->previous = &lookahead;
18529408 2251 while (len--) {
f337b084
TH
2252 U32 was_utf8;
2253 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2254 else symptr->flags &= ~FLAG_PARSE_UTF8;
2255 was_utf8 = SvUTF8(cat);
49704364 2256 symptr->patptr = savsym.grpbeg;
f337b084
TH
2257 beglist = pack_rec(cat, symptr, beglist, endlist);
2258 if (SvUTF8(cat) != was_utf8)
2259 /* This had better be an upgrade while in utf8==0 mode */
2260 utf8 = 1;
2261
49704364 2262 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2263 break; /* No way to continue */
2264 }
ee790063 2265 items = endlist - beglist;
f337b084
TH
2266 lookahead.flags = symptr->flags & ~group_modifiers;
2267 goto no_change;
18529408 2268 }
62f95557
IZ
2269 case 'X' | TYPE_IS_SHRIEKING:
2270 if (!len) /* Avoid division by 0 */
2271 len = 1;
f337b084
TH
2272 if (utf8) {
2273 char *hop, *last;
e1b825c1 2274 SSize_t l = len;
f337b084
TH
2275 hop = last = start;
2276 while (hop < cur) {
2277 hop += UTF8SKIP(hop);
2278 if (--l == 0) {
2279 last = hop;
2280 l = len;
2281 }
2282 }
2283 if (last > cur)
2284 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2285 cur = last;
2286 break;
2287 }
2288 len = (cur-start) % len;
924ba076 2289 /* FALLTHROUGH */
a6ec74c1 2290 case 'X':
f337b084
TH
2291 if (utf8) {
2292 if (len < 1) goto no_change;
28be1210 2293 utf8_shrink:
f337b084
TH
2294 while (len > 0) {
2295 if (cur <= start)
28be1210
TH
2296 Perl_croak(aTHX_ "'%c' outside of string in pack",
2297 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2298 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2299 if (cur <= start)
28be1210
TH
2300 Perl_croak(aTHX_ "'%c' outside of string in pack",
2301 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2302 }
2303 len--;
2304 }
2305 } else {
fc241834 2306 shrink:
f337b084 2307 if (cur - start < len)
28be1210
TH
2308 Perl_croak(aTHX_ "'%c' outside of string in pack",
2309 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2310 cur -= len;
2311 }
2312 if (cur < start+symptr->strbeg) {
2313 /* Make sure group starts don't point into the void */
2314 tempsym_t *group;
9e27e96a 2315 const STRLEN length = cur-start;
f337b084
TH
2316 for (group = symptr;
2317 group && length < group->strbeg;
2318 group = group->previous) group->strbeg = length;
2319 lookahead.strbeg = length;
2320 }
a6ec74c1 2321 break;
fc241834 2322 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 2323 SSize_t ai32;
62f95557
IZ
2324 if (!len) /* Avoid division by 0 */
2325 len = 1;
230e1fce 2326 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2327 else ai32 = (cur - start) % len;
2328 if (ai32 == 0) goto no_change;
2329 len -= ai32;
2330 }
924ba076 2331 /* FALLTHROUGH */
a6ec74c1 2332 case 'x':
f337b084 2333 goto grow;
a6ec74c1
JH
2334 case 'A':
2335 case 'Z':
f337b084 2336 case 'a': {
f7fe979e 2337 const char *aptr;
f337b084 2338
a6ec74c1 2339 fromstr = NEXTFROM;
e62f0680 2340 aptr = SvPV_const(fromstr, fromlen);
f337b084 2341 if (DO_UTF8(fromstr)) {
f7fe979e 2342 const char *end, *s;
f337b084
TH
2343
2344 if (!utf8 && !SvUTF8(cat)) {
2345 marked_upgrade(aTHX_ cat, symptr);
2346 lookahead.flags |= FLAG_DO_UTF8;
2347 lookahead.strbeg = symptr->strbeg;
2348 utf8 = 1;
2349 start = SvPVX(cat);
2350 cur = start + SvCUR(cat);
2351 }
fc241834 2352 if (howlen == e_star) {
f337b084
TH
2353 if (utf8) goto string_copy;
2354 len = fromlen+1;
2355 }
2356 s = aptr;
2357 end = aptr + fromlen;
2358 fromlen = datumtype == 'Z' ? len-1 : len;
e1b825c1 2359 while ((SSize_t) fromlen > 0 && s < end) {
f337b084
TH
2360 s += UTF8SKIP(s);
2361 fromlen--;
2362 }
2363 if (s > end)
2364 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2365 if (utf8) {
fc241834 2366 len = fromlen;
f337b084
TH
2367 if (datumtype == 'Z') len++;
2368 fromlen = s-aptr;
2369 len += fromlen;
fc241834 2370
f337b084 2371 goto string_copy;
fc241834 2372 }
f337b084
TH
2373 fromlen = len - fromlen;
2374 if (datumtype == 'Z') fromlen--;
2375 if (howlen == e_star) {
2376 len = fromlen;
2377 if (datumtype == 'Z') len++;
fc241834 2378 }
f337b084 2379 GROWING(0, cat, start, cur, len);
9df874cd 2380 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2381 datumtype | TYPE_IS_PACK))
5637ef5b 2382 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
e1b825c1
Z
2383 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2384 (int)datumtype, aptr, end, cur, fromlen);
f337b084 2385 cur += fromlen;
a6ec74c1 2386 len -= fromlen;
f337b084
TH
2387 } else if (utf8) {
2388 if (howlen == e_star) {
2389 len = fromlen;
2390 if (datumtype == 'Z') len++;
a6ec74c1 2391 }
e1b825c1 2392 if (len <= (SSize_t) fromlen) {
f337b084
TH
2393 fromlen = len;
2394 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2395 }
fc241834 2396 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2397 upgrade, so:
2398 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2399 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2400 len -= fromlen;
2401 while (fromlen > 0) {
230e1fce 2402 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2403 aptr++;
2404 fromlen--;
fc241834 2405 }
f337b084
TH
2406 } else {
2407 string_copy:
2408 if (howlen == e_star) {
2409 len = fromlen;
2410 if (datumtype == 'Z') len++;
2411 }
e1b825c1 2412 if (len <= (SSize_t) fromlen) {
f337b084
TH
2413 fromlen = len;
2414 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2415 }
f337b084
TH
2416 GROWING(0, cat, start, cur, len);
2417 Copy(aptr, cur, fromlen, char);
2418 cur += fromlen;
2419 len -= fromlen;
a6ec74c1 2420 }
f337b084
TH
2421 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2422 cur += len;
3c4fb04a 2423 SvTAINT(cat);
a6ec74c1 2424 break;
f337b084 2425 }
a6ec74c1 2426 case 'B':
f337b084 2427 case 'b': {
b83604b4 2428 const char *str, *end;
e1b825c1 2429 SSize_t l, field_len;
f337b084
TH
2430 U8 bits;
2431 bool utf8_source;
2432 U32 utf8_flags;
a6ec74c1 2433
fc241834 2434 fromstr = NEXTFROM;
b83604b4 2435 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2436 end = str + fromlen;
2437 if (DO_UTF8(fromstr)) {
2438 utf8_source = TRUE;
041457d9 2439 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2440 } else {
2441 utf8_source = FALSE;
2442 utf8_flags = 0; /* Unused, but keep compilers happy */
2443 }
2444 if (howlen == e_star) len = fromlen;
2445 field_len = (len+7)/8;
2446 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2447 if (len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2448 bits = 0;
2449 l = 0;
2450 if (datumtype == 'B')
2451 while (l++ < len) {
2452 if (utf8_source) {
95b63a38 2453 UV val = 0;
f337b084
TH
2454 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2455 bits |= val & 1;
2456 } else bits |= *str++ & 1;
2457 if (l & 7) bits <<= 1;
fc241834 2458 else {
f337b084
TH
2459 PUSH_BYTE(utf8, cur, bits);
2460 bits = 0;
a6ec74c1
JH
2461 }
2462 }
f337b084
TH
2463 else
2464 /* datumtype == 'b' */
2465 while (l++ < len) {
2466 if (utf8_source) {
95b63a38 2467 UV val = 0;
f337b084
TH
2468 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2469 if (val & 1) bits |= 0x80;
2470 } else if (*str++ & 1)
2471 bits |= 0x80;
2472 if (l & 7) bits >>= 1;
fc241834 2473 else {
f337b084
TH
2474 PUSH_BYTE(utf8, cur, bits);
2475 bits = 0;
a6ec74c1
JH
2476 }
2477 }
f337b084
TH
2478 l--;
2479 if (l & 7) {
fc241834 2480 if (datumtype == 'B')
f337b084 2481 bits <<= 7 - (l & 7);
fc241834 2482 else
f337b084
TH
2483 bits >>= 7 - (l & 7);
2484 PUSH_BYTE(utf8, cur, bits);
2485 l += 7;
a6ec74c1 2486 }
f337b084
TH
2487 /* Determine how many chars are left in the requested field */
2488 l /= 8;
2489 if (howlen == e_star) field_len = 0;
2490 else field_len -= l;
2491 Zero(cur, field_len, char);
2492 cur += field_len;
a6ec74c1 2493 break;
f337b084 2494 }
a6ec74c1 2495 case 'H':
f337b084 2496 case 'h': {
10516c54 2497 const char *str, *end;
e1b825c1 2498 SSize_t l, field_len;
f337b084
TH
2499 U8 bits;
2500 bool utf8_source;
2501 U32 utf8_flags;
a6ec74c1 2502
fc241834 2503 fromstr = NEXTFROM;
10516c54 2504 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2505 end = str + fromlen;
2506 if (DO_UTF8(fromstr)) {
2507 utf8_source = TRUE;
041457d9 2508 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2509 } else {
2510 utf8_source = FALSE;
2511 utf8_flags = 0; /* Unused, but keep compilers happy */
2512 }
2513 if (howlen == e_star) len = fromlen;
2514 field_len = (len+1)/2;
2515 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2516 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2517 bits = 0;
2518 l = 0;
2519 if (datumtype == 'H')
2520 while (l++ < len) {
2521 if (utf8_source) {
95b63a38 2522 UV val = 0;
f337b084
TH
2523 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2524 if (val < 256 && isALPHA(val))
2525 bits |= (val + 9) & 0xf;
a6ec74c1 2526 else
f337b084
TH
2527 bits |= val & 0xf;
2528 } else if (isALPHA(*str))
2529 bits |= (*str++ + 9) & 0xf;
2530 else
2531 bits |= *str++ & 0xf;
2532 if (l & 1) bits <<= 4;
fc241834 2533 else {
f337b084
TH
2534 PUSH_BYTE(utf8, cur, bits);
2535 bits = 0;
a6ec74c1
JH
2536 }
2537 }
f337b084
TH
2538 else
2539 while (l++ < len) {
2540 if (utf8_source) {
95b63a38 2541 UV val = 0;
f337b084
TH
2542 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2543 if (val < 256 && isALPHA(val))
2544 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2545 else
f337b084
TH
2546 bits |= (val & 0xf) << 4;
2547 } else if (isALPHA(*str))
2548 bits |= ((*str++ + 9) & 0xf) << 4;
2549 else
2550 bits |= (*str++ & 0xf) << 4;
2551 if (l & 1) bits >>= 4;
fc241834 2552 else {
f337b084
TH
2553 PUSH_BYTE(utf8, cur, bits);
2554 bits = 0;
a6ec74c1 2555 }
fc241834 2556 }
f337b084
TH
2557 l--;
2558 if (l & 1) {
2559 PUSH_BYTE(utf8, cur, bits);
2560 l++;
2561 }
2562 /* Determine how many chars are left in the requested field */
2563 l /= 2;
2564 if (howlen == e_star) field_len = 0;
2565 else field_len -= l;
2566 Zero(cur, field_len, char);
2567 cur += field_len;
2568 break;
fc241834
RGS
2569 }
2570 case 'c':
f337b084
TH
2571 while (len-- > 0) {
2572 IV aiv;
2573 fromstr = NEXTFROM;
354b74ae 2574 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2575 if ((-128 > aiv || aiv > 127))
2576 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2577 "Character in 'c' format wrapped in pack");
585ec06d 2578 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2579 }
2580 break;
2581 case 'C':
f337b084
TH
2582 if (len == 0) {
2583 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2584 break;
2585 }
a6ec74c1 2586 while (len-- > 0) {
f337b084 2587 IV aiv;
a6ec74c1 2588 fromstr = NEXTFROM;
354b74ae 2589 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2590 if ((0 > aiv || aiv > 0xff))
2591 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2592 "Character in 'C' format wrapped in pack");
1651fc44 2593 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2594 }
fc241834
RGS
2595 break;
2596 case 'W': {
2597 char *end;
670f1322 2598 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2599
2600 end = start+SvLEN(cat)-1;
2601 if (utf8) end -= UTF8_MAXLEN-1;
2602 while (len-- > 0) {
2603 UV auv;
2604 fromstr = NEXTFROM;
354b74ae 2605 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2606 if (in_bytes) auv = auv % 0x100;
2607 if (utf8) {
2608 W_utf8:
bf4a926a 2609 if (cur >= end) {
fc241834 2610 *cur = '\0';
b162af07 2611 SvCUR_set(cat, cur - start);
fc241834
RGS
2612
2613 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2614 end = start+SvLEN(cat)-UTF8_MAXLEN;
2615 }
05fefba9 2616 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2617 } else {
2618 if (auv >= 0x100) {
2619 if (!SvUTF8(cat)) {
2620 *cur = '\0';
b162af07 2621 SvCUR_set(cat, cur - start);
fc241834
RGS
2622 marked_upgrade(aTHX_ cat, symptr);
2623 lookahead.flags |= FLAG_DO_UTF8;
2624 lookahead.strbeg = symptr->strbeg;
2625 utf8 = 1;
2626 start = SvPVX(cat);
2627 cur = start + SvCUR(cat);
2628 end = start+SvLEN(cat)-UTF8_MAXLEN;
2629 goto W_utf8;
2630 }
a2a5de95
NC
2631 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2632 "Character in 'W' format wrapped in pack");
fc241834
RGS
2633 auv &= 0xff;
2634 }
2635 if (cur >= end) {
2636 *cur = '\0';
b162af07 2637 SvCUR_set(cat, cur - start);
fc241834
RGS
2638 GROWING(0, cat, start, cur, len+1);
2639 end = start+SvLEN(cat)-1;
2640 }
fe2774ed 2641 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2642 }
2643 }
2644 break;
fc241834
RGS
2645 }
2646 case 'U': {
2647 char *end;
2648
2649 if (len == 0) {
2650 if (!(symptr->flags & FLAG_DO_UTF8)) {
2651 marked_upgrade(aTHX_ cat, symptr);
2652 lookahead.flags |= FLAG_DO_UTF8;
2653 lookahead.strbeg = symptr->strbeg;
2654 }
2655 utf8 = 0;
2656 goto no_change;
2657 }
2658
2659 end = start+SvLEN(cat);
2660 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2661 while (len-- > 0) {
fc241834 2662 UV auv;
a6ec74c1 2663 fromstr = NEXTFROM;
354b74ae 2664 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2665 if (utf8) {
81b3f640 2666 U8 buffer[UTF8_MAXLEN+1], *endb;
05fefba9 2667 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2668 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2669 *cur = '\0';
b162af07 2670 SvCUR_set(cat, cur - start);
fc241834
RGS
2671 GROWING(0, cat, start, cur,
2672 len+(endb-buffer)*UTF8_EXPAND);
2673 end = start+SvLEN(cat);
2674 }
d21d7215 2675 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2676 } else {
2677 if (cur >= end) {
2678 *cur = '\0';
b162af07 2679 SvCUR_set(cat, cur - start);
fc241834
RGS
2680 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2681 end = start+SvLEN(cat)-UTF8_MAXLEN;
2682 }
05fefba9
KW
2683 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2684 UNI_TO_NATIVE(auv),
2685 0);
fc241834 2686 }
a6ec74c1 2687 }
a6ec74c1 2688 break;
fc241834 2689 }
a6ec74c1
JH
2690 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2691 case 'f':
a6ec74c1 2692 while (len-- > 0) {
f337b084
TH
2693 float afloat;
2694 NV anv;
a6ec74c1 2695 fromstr = NEXTFROM;
f337b084 2696 anv = SvNV(fromstr);
a7157111 2697# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2698 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2699 * on Alpha; fake it if we don't have them.
2700 */
f337b084 2701 if (anv > FLT_MAX)
fc241834 2702 afloat = FLT_MAX;
f337b084 2703 else if (anv < -FLT_MAX)
fc241834 2704 afloat = -FLT_MAX;
f337b084 2705 else afloat = (float)anv;
baf3cf9c 2706# else
a7157111 2707# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2708 if(Perl_isnan(anv))
2709 afloat = (float)NV_NAN;
2710 else
a7157111
JH
2711# endif
2712# ifdef NV_INF
919894b7
DM
2713 /* a simple cast to float is undefined if outside
2714 * the range of values that can be represented */
2715 afloat = (float)(anv > FLT_MAX ? NV_INF :
2716 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2717# endif
baf3cf9c 2718# endif
3a88beaa 2719 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2720 }
2721 break;
2722 case 'd':
a6ec74c1 2723 while (len-- > 0) {
f337b084
TH
2724 double adouble;
2725 NV anv;
a6ec74c1 2726 fromstr = NEXTFROM;
f337b084 2727 anv = SvNV(fromstr);
a7157111 2728# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2729 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2730 * on Alpha; fake it if we don't have them.
2731 */
f337b084 2732 if (anv > DBL_MAX)
fc241834 2733 adouble = DBL_MAX;
f337b084 2734 else if (anv < -DBL_MAX)
fc241834 2735 adouble = -DBL_MAX;
f337b084 2736 else adouble = (double)anv;
baf3cf9c 2737# else
f337b084 2738 adouble = (double)anv;
baf3cf9c 2739# endif
3a88beaa 2740 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2741 }
2742 break;
fc241834 2743 case 'F': {
275663fa 2744 NV_bytes anv;
1109a392 2745 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2746 while (len-- > 0) {
2747 fromstr = NEXTFROM;
cd07c537
DM
2748#ifdef __GNUC__
2749 /* to work round a gcc/x86 bug; don't use SvNV */
2750 anv.nv = sv_2nv(fromstr);
070e2677
TC
2751# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2752 && LONG_DOUBLESIZE > 10
2753 /* GCC sometimes overwrites the padding in the
2754 assignment above */
2755 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2756# endif
cd07c537 2757#else
275663fa 2758 anv.nv = SvNV(fromstr);
cd07c537 2759#endif
3a88beaa 2760 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2761 }
2762 break;
fc241834 2763 }
92d41999 2764#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2765 case 'D': {
275663fa 2766 ld_bytes aldouble;
1109a392
MHM
2767 /* long doubles can have unused bits, which may be nonzero */
2768 Zero(&aldouble, 1, long double);
92d41999
JH
2769 while (len-- > 0) {
2770 fromstr = NEXTFROM;
cd07c537
DM
2771# ifdef __GNUC__
2772 /* to work round a gcc/x86 bug; don't use SvNV */
2773 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2774# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2775 /* GCC sometimes overwrites the padding in the
2776 assignment above */
2777 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2778# endif
cd07c537 2779# else
275663fa 2780 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2781# endif
3a88beaa
NC
2782 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2783 needs_swap);
92d41999
JH
2784 }
2785 break;
fc241834 2786 }
92d41999 2787#endif
068bd2e7 2788 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2789 case 'n':
2790 while (len-- > 0) {
f337b084 2791 I16 ai16;
a6ec74c1 2792 fromstr = NEXTFROM;
354b74ae 2793 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2794 ai16 = PerlSock_htons(ai16);
3a88beaa 2795 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2796 }
2797 break;
068bd2e7 2798 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2799 case 'v':
2800 while (len-- > 0) {
f337b084 2801 I16 ai16;
a6ec74c1 2802 fromstr = NEXTFROM;
354b74ae 2803 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2804 ai16 = htovs(ai16);
3a88beaa 2805 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2806 }
2807 break;
49704364 2808 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2809#if SHORTSIZE != SIZE16
fc241834 2810 while (len-- > 0) {
f337b084 2811 unsigned short aushort;
fc241834 2812 fromstr = NEXTFROM;
354b74ae 2813 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2814 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2815 }
49704364
WL
2816 break;
2817#else
924ba076 2818 /* FALLTHROUGH */
a6ec74c1 2819#endif
49704364 2820 case 'S':
fc241834 2821 while (len-- > 0) {
f337b084 2822 U16 au16;
fc241834 2823 fromstr = NEXTFROM;
354b74ae 2824 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2825 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2826 }
2827 break;
49704364 2828 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2829#if SHORTSIZE != SIZE16
fc241834 2830 while (len-- > 0) {
f337b084 2831 short ashort;
fc241834 2832 fromstr = NEXTFROM;
354b74ae 2833 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2834 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2835 }
49704364
WL
2836 break;
2837#else
924ba076 2838 /* FALLTHROUGH */
a6ec74c1 2839#endif
49704364
WL
2840 case 's':
2841 while (len-- > 0) {
f337b084 2842 I16 ai16;
49704364 2843 fromstr = NEXTFROM;
354b74ae 2844 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2845 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2846 }
2847 break;
2848 case 'I':
49704364 2849 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2850 while (len-- > 0) {
f337b084 2851 unsigned int auint;
a6ec74c1 2852 fromstr = NEXTFROM;
354b74ae 2853 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2854 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2855 }
2856 break;
92d41999
JH
2857 case 'j':
2858 while (len-- > 0) {
f337b084 2859 IV aiv;
92d41999 2860 fromstr = NEXTFROM;
354b74ae 2861 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2862 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2863 }
2864 break;
2865 case 'J':
2866 while (len-- > 0) {
f337b084 2867 UV auv;
92d41999 2868 fromstr = NEXTFROM;
354b74ae 2869 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2870 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2871 }
2872 break;
a6ec74c1
JH
2873 case 'w':
2874 while (len-- > 0) {
f337b084 2875 NV anv;
a6ec74c1 2876 fromstr = NEXTFROM;
83388ae2 2877 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2878 anv = SvNV_nomg(fromstr);
a6ec74c1 2879
f337b084
TH
2880 if (anv < 0) {
2881 *cur = '\0';
b162af07 2882 SvCUR_set(cat, cur - start);
49704364 2883 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2884 }
a6ec74c1 2885
196b62db
NC
2886 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2887 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2888 any negative IVs will have already been got by the croak()
2889 above. IOK is untrue for fractions, so we test them
2890 against UV_MAX_P1. */
f337b084
TH
2891 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2892 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2893 char *in = buf + sizeof(buf);
1e9a122e 2894 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2895
2896 do {
eb160463 2897 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2898 auv >>= 7;
2899 } while (auv);
2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2903 } else if (SvPOKp(fromstr))
2904 goto w_string;
a6ec74c1 2905 else if (SvNOKp(fromstr)) {
0258719b 2906 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2907 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2908 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2909 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2910 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2911 Some C compilers are strict about integral constant
2912 expressions so we conservatively divide by a slightly
2913 smaller integer instead of multiplying by the exact
2914 floating-point value.
0258719b
NC
2915 */
2916#ifdef NV_MAX_10_EXP
f337b084 2917 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2918 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2919#else
f337b084 2920 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2921 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2922#endif
a6ec74c1
JH
2923 char *in = buf + sizeof(buf);
2924
8b6e33c7 2925 anv = Perl_floor(anv);
a6ec74c1 2926 do {
8b6e33c7 2927 const NV next = Perl_floor(anv / 128);
a6ec74c1 2928 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2929 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2930 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2931 anv = next;
2932 } while (anv > 0);
a6ec74c1 2933 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2934 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 in, (buf + sizeof(buf)) - in);
2936 } else {
8b6e33c7
AL
2937 const char *from;
2938 char *result, *in;
735b914b
JH
2939 SV *norm;
2940 STRLEN len;
2941 bool done;
2942
f337b084 2943 w_string:
735b914b 2944 /* Copy string and check for compliance */
1e9a122e 2945 from = SvPV_nomg_const(fromstr, len);
735b914b 2946 if ((norm = is_an_int(from, len)) == NULL)
49704364 2947 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2948
a02a5408 2949 Newx(result, len, char);
735b914b
JH
2950 in = result + len;
2951 done = FALSE;
f337b084 2952 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2953 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2954 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2955 in, (result + len) - in);
735b914b
JH
2956 Safefree(result);
2957 SvREFCNT_dec(norm); /* free norm */
fc241834 2958 }
a6ec74c1
JH
2959 }
2960 break;
2961 case 'i':
49704364 2962 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2963 while (len-- > 0) {
f337b084 2964 int aint;
a6ec74c1 2965 fromstr = NEXTFROM;
354b74ae 2966 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2967 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2968 }
2969 break;
068bd2e7 2970 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2971 case 'N':
2972 while (len-- > 0) {
f337b084 2973 U32 au32;
a6ec74c1 2974 fromstr = NEXTFROM;
354b74ae 2975 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2976 au32 = PerlSock_htonl(au32);
3a88beaa 2977 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2978 }
2979 break;
068bd2e7 2980 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2981 case 'V':
2982 while (len-- > 0) {
f337b084 2983 U32 au32;
a6ec74c1 2984 fromstr = NEXTFROM;
354b74ae 2985 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2986 au32 = htovl(au32);
3a88beaa 2987 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2988 }
2989 break;
49704364 2990 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2991#if LONGSIZE != SIZE32
fc241834 2992 while (len-- > 0) {
f337b084 2993 unsigned long aulong;
fc241834 2994 fromstr = NEXTFROM;
354b74ae 2995 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2996 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2997 }
49704364
WL
2998 break;
2999#else
3000 /* Fall though! */
a6ec74c1 3001#endif
49704364 3002 case 'L':
fc241834 3003 while (len-- > 0) {
f337b084 3004 U32 au32;
fc241834 3005 fromstr = NEXTFROM;
354b74ae 3006 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3007 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
3008 }
3009 break;
49704364 3010 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3011#if LONGSIZE != SIZE32
fc241834 3012 while (len-- > 0) {
f337b084 3013 long along;
fc241834 3014 fromstr = NEXTFROM;
354b74ae 3015 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3016 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3017 }
49704364
WL
3018 break;
3019#else
3020 /* Fall though! */
a6ec74c1 3021#endif
49704364
WL
3022 case 'l':
3023 while (len-- > 0) {
f337b084 3024 I32 ai32;
49704364 3025 fromstr = NEXTFROM;
354b74ae 3026 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3027 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3028 }
3029 break;
c174bf3b 3030#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3031 case 'Q':
3032 while (len-- > 0) {
f337b084 3033 Uquad_t auquad;
a6ec74c1 3034 fromstr = NEXTFROM;
354b74ae 3035 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3036 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3037 }
3038 break;
3039 case 'q':
3040 while (len-- > 0) {
f337b084 3041 Quad_t aquad;
a6ec74c1 3042 fromstr = NEXTFROM;
354b74ae 3043 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3044 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3045 }
3046 break;
1640b983 3047#endif
a6ec74c1
JH
3048 case 'P':
3049 len = 1; /* assume SV is correct length */
f337b084 3050 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3051 /* FALLTHROUGH */
a6ec74c1
JH
3052 case 'p':
3053 while (len-- > 0) {
83003860 3054 const char *aptr;
f337b084 3055
a6ec74c1 3056 fromstr = NEXTFROM;
28a4f200
TH
3057 SvGETMAGIC(fromstr);
3058 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3059 else {
a6ec74c1
JH
3060 /* XXX better yet, could spirit away the string to
3061 * a safe spot and hang on to it until the result
3062 * of pack() (and all copies of the result) are
3063 * gone.
3064 */
b199a398
FC
3065 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3066 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3067 !SvREADONLY(fromstr)))) {
3068 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3069 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3070 }
3071 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3072 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3073 else
2596d9fe 3074 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3075 }
3a88beaa 3076 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3077 }
3078 break;
fc241834 3079 case 'u': {
f7fe979e 3080 const char *aptr, *aend;
fc241834 3081 bool from_utf8;
f337b084 3082
a6ec74c1 3083 fromstr = NEXTFROM;
fc241834
RGS
3084 if (len <= 2) len = 45;
3085 else len = len / 3 * 3;
3086 if (len >= 64) {
a2a5de95
NC
3087 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3088 "Field too wide in 'u' format in pack");
fc241834
RGS
3089 len = 63;
3090 }
83003860 3091 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3092 from_utf8 = DO_UTF8(fromstr);
3093 if (from_utf8) {
3094 aend = aptr + fromlen;
3f63b0e5 3095 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3096 } else aend = NULL; /* Unused, but keep compilers happy */
3097 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3098 while (fromlen > 0) {
fc241834 3099 U8 *end;
e1b825c1 3100 SSize_t todo;
fc241834 3101 U8 hunk[1+63/3*4+1];
a6ec74c1 3102
e1b825c1 3103 if ((SSize_t)fromlen > len)
a6ec74c1
JH
3104 todo = len;
3105 else
3106 todo = fromlen;
fc241834
RGS
3107 if (from_utf8) {
3108 char buffer[64];
9df874cd 3109 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3110 'u' | TYPE_IS_PACK)) {
3111 *cur = '\0';
b162af07 3112 SvCUR_set(cat, cur - start);
5637ef5b 3113 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
e1b825c1
Z
3114 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3115 aptr, aend, buffer, todo);
fc241834 3116 }
e68aed92 3117 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3118 } else {
e68aed92 3119 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3120 aptr += todo;
3121 }
3a88beaa 3122 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3123 fromlen -= todo;
3124 }
a6ec74c1
JH
3125 break;
3126 }
f337b084
TH
3127 }
3128 *cur = '\0';
b162af07 3129 SvCUR_set(cat, cur - start);
f337b084 3130 no_change:
49704364 3131 *symptr = lookahead;
a6ec74c1 3132 }
49704364 3133 return beglist;
18529408
IZ
3134}
3135#undef NEXTFROM
3136
3137
3138PP(pp_pack)
3139{
20b7effb 3140 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3141 SV *cat = TARG;
18529408 3142 STRLEN fromlen;
349d4f2f 3143 SV *pat_sv = *++MARK;
eb578fdb
KW
3144 const char *pat = SvPV_const(pat_sv, fromlen);
3145 const char *patend = pat + fromlen;
18529408
IZ
3146
3147 MARK++;
e8f7c79f 3148 SvPVCLEAR(cat);
f337b084 3149 SvUTF8_off(cat);
18529408 3150
7accc089 3151 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3152
fd879d93
KW
3153 if (SvUTF8(cat)) {
3154 STRLEN result_len;
3155 const char * result = SvPV_nomg(cat, result_len);
3156 const U8 * error_pos;
3157
3158 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3159 _force_out_malformed_utf8_message(error_pos,
3160 (U8 *) result + result_len,
3161 0, /* no flags */
3162 1 /* Die */
3163 );
3164 NOT_REACHED; /* NOTREACHED */
3165 }
3166 }
3167
a6ec74c1
JH
3168 SvSETMAGIC(cat);
3169 SP = ORIGMARK;
3170 PUSHs(cat);
3171 RETURN;
3172}
a6ec74c1 3173
73cb7263 3174/*
14d04a33 3175 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3176 */