This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Finalise perldelta for 5.33.5
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
0860e090
KW
215#define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
a6ec74c1 218
66c611c5 219/* type modifiers */
62f95557 220#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
221#define TYPE_IS_BIG_ENDIAN 0x200
222#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 223#define TYPE_IS_PACK 0x800
1109a392 224#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 225#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
226#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
227
7212898e
NC
228# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
230
231# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
232
78d46eaa 233#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 234#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
235#define PACK_SIZE_MASK 0x3F
236
4ad176ef 237#include "packsizetables.inc"
78d46eaa 238
20aa3a38
NC
239static void
240S_reverse_copy(const char *src, char *dest, STRLEN len)
241{
242 dest += len;
243 while (len--)
244 *--dest = *src++;
245}
246
08ca2aa3 247STATIC U8
9df874cd 248utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 249{
08ca2aa3 250 STRLEN retlen;
df337714
KW
251 UV val;
252
253 if (*s >= end) {
254 goto croak;
255 }
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
258 if (retlen == (STRLEN) -1)
259 croak:
f337b084
TH
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
a2a5de95
NC
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
266 val &= 0xff;
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
274 *(U8 *)(s)++)
275
276STATIC bool
e1b825c1 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3
TH
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
f337b084 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 293 if (retlen == (STRLEN) -1) {
18c47def 294 from += UTF8_SAFE_SKIP(from, end);
08ca2aa3
TH
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
f7fe979e 310 const char *ptr;
e1b825c1 311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
c80e42f3 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
315 }
316 if (from > end) from = end;
317 }
a2a5de95
NC
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 320 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
d21d7215 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
230e1fce
NC
357 (cur) += (len); \
358 } \
f337b084
TH
359} STMT_END
360
f17fed50
TC
361#define SAFE_UTF8_EXPAND(var) \
362STMT_START { \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
366} STMT_END
367
368#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
369STMT_START { \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373} STMT_END
374
f337b084
TH
375#define GROWING(utf8, cat, start, cur, in_len) \
376STMT_START { \
377 STRLEN glen = (in_len); \
f17fed50
TC
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
0bd48802 383 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
384 (cur) = (start) + SvCUR(cat); \
385 } \
386} STMT_END
387
388#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389STMT_START { \
f7fe979e 390 const STRLEN glen = (in_len); \
f337b084 391 STRLEN gl = glen; \
f17fed50 392 if (utf8) SAFE_UTF8_EXPAND(gl); \
f337b084
TH
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
394 *cur = '\0'; \
b162af07 395 SvCUR_set((cat), (cur) - (start)); \
0bd48802 396 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
397 (cur) = (start) + SvCUR(cat); \
398 } \
3a88beaa 399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
400} STMT_END
401
402#define PUSH_BYTE(utf8, s, byte) \
403STMT_START { \
404 if (utf8) { \
f7fe979e 405 const U8 au8 = (byte); \
d21d7215 406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
407 } else *(U8 *)(s)++ = (byte); \
408} STMT_END
409
410/* Only to be used inside a loop (see the break) */
411#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
412STMT_START { \
413 STRLEN retlen; \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 416 if (retlen == (STRLEN) -1) { \
f337b084
TH
417 *cur = '\0'; \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
419 } \
420 str += retlen; \
421} STMT_END
422
f7fe979e
AL
423static const char *_action( const tempsym_t* symptr )
424{
10edeb5d 425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
426}
427
206947d2 428/* Returns the sizeof() struct described by pat */
e1b825c1 429STATIC SSize_t
f337b084 430S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 431{
e1b825c1 432 SSize_t total = 0;
206947d2 433
7918f24d
NC
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
435
49704364 436 while (next_symbol(symptr)) {
e1b825c1 437 SSize_t len, size;
f337b084
TH
438
439 switch (symptr->howlen) {
fc241834 440 case e_star:
49704364 441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 442 _action( symptr ) );
81d52ecd 443
f337b084
TH
444 default:
445 /* e_no_len and e_number */
446 len = symptr->length;
447 break;
49704364
WL
448 }
449
a7a3cfaa 450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 451 if (!size) {
e1b825c1 452 SSize_t star;
80a13697
NC
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455 default:
456 Perl_croak(aTHX_ "Invalid type '%c' in %s",
457 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 458 _action( symptr ) );
28be1210
TH
459 case '.' | TYPE_IS_SHRIEKING:
460 case '@' | TYPE_IS_SHRIEKING:
80a13697 461 case '@':
28be1210 462 case '.':
80a13697
NC
463 case '/':
464 case 'U': /* XXXX Is it correct? */
465 case 'w':
466 case 'u':
467 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 468 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 469 _action( symptr ) );
80a13697
NC
470 case '%':
471 size = 0;
472 break;
473 case '(':
fc241834
RGS
474 {
475 tempsym_t savsym = *symptr;
476 symptr->patptr = savsym.grpbeg;
477 symptr->patend = savsym.grpend;
478 /* XXXX Theoretically, we need to measure many times at
479 different positions, since the subexpression may contain
480 alignment commands, but be not of aligned length.
481 Need to detect this and croak(). */
482 size = measure_struct(symptr);
483 *symptr = savsym;
484 break;
485 }
80a13697
NC
486 case 'X' | TYPE_IS_SHRIEKING:
487 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
488 */
489 if (!len) /* Avoid division by 0 */
490 len = 1;
491 len = total % len; /* Assumed: the start is aligned. */
924ba076 492 /* FALLTHROUGH */
80a13697
NC
493 case 'X':
494 size = -1;
495 if (total < len)
f7fe979e 496 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
497 break;
498 case 'x' | TYPE_IS_SHRIEKING:
499 if (!len) /* Avoid division by 0 */
500 len = 1;
501 star = total % len; /* Assumed: the start is aligned. */
502 if (star) /* Other portable ways? */
503 len = len - star;
504 else
505 len = 0;
924ba076 506 /* FALLTHROUGH */
80a13697
NC
507 case 'x':
508 case 'A':
509 case 'Z':
510 case 'a':
80a13697
NC
511 size = 1;
512 break;
513 case 'B':
514 case 'b':
515 len = (len + 7)/8;
516 size = 1;
517 break;
518 case 'H':
519 case 'h':
520 len = (len + 1)/2;
521 size = 1;
522 break;
78d46eaa 523
80a13697
NC
524 case 'P':
525 len = 1;
526 size = sizeof(char*);
78d46eaa
NC
527 break;
528 }
206947d2
IZ
529 }
530 total += len * size;
531 }
532 return total;
533}
534
49704364
WL
535
536/* locate matching closing parenthesis or bracket
537 * returns char pointer to char after match, or NULL
538 */
f7fe979e 539STATIC const char *
5aaab254 540S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 541{
7918f24d
NC
542 PERL_ARGS_ASSERT_GROUP_END;
543
49704364 544 while (patptr < patend) {
f7fe979e 545 const char c = *patptr++;
49704364
WL
546
547 if (isSPACE(c))
548 continue;
549 else if (c == ender)
550 return patptr-1;
551 else if (c == '#') {
552 while (patptr < patend && *patptr != '\n')
553 patptr++;
554 continue;
555 } else if (c == '(')
556 patptr = group_end(patptr, patend, ')') + 1;
557 else if (c == '[')
558 patptr = group_end(patptr, patend, ']') + 1;
18529408 559 }
49704364
WL
560 Perl_croak(aTHX_ "No group ending character '%c' found in template",
561 ender);
a25b5927 562 NOT_REACHED; /* NOTREACHED */
18529408
IZ
563}
564
49704364
WL
565
566/* Convert unsigned decimal number to binary.
567 * Expects a pointer to the first digit and address of length variable
568 * Advances char pointer to 1st non-digit char and returns number
fc241834 569 */
f7fe979e 570STATIC const char *
e1b825c1 571S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 572{
e1b825c1 573 SSize_t len = *patptr++ - '0';
7918f24d
NC
574
575 PERL_ARGS_ASSERT_GET_NUM;
576
49704364 577 while (isDIGIT(*patptr)) {
e1b825c1
Z
578 SSize_t nlen = (len * 10) + (*patptr++ - '0');
579 if (nlen < 0 || nlen/10 != len)
49704364 580 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 581 len = nlen;
49704364
WL
582 }
583 *lenptr = len;
584 return patptr;
585}
586
587/* The marvellous template parsing routine: Using state stored in *symptr,
588 * locates next template code and count
589 */
590STATIC bool
f337b084 591S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 592{
f7fe979e 593 const char* patptr = symptr->patptr;
0bcc34c2 594 const char* const patend = symptr->patend;
49704364 595
7918f24d
NC
596 PERL_ARGS_ASSERT_NEXT_SYMBOL;
597
49704364
WL
598 symptr->flags &= ~FLAG_SLASH;
599
600 while (patptr < patend) {
601 if (isSPACE(*patptr))
602 patptr++;
603 else if (*patptr == '#') {
604 patptr++;
605 while (patptr < patend && *patptr != '\n')
606 patptr++;
607 if (patptr < patend)
608 patptr++;
609 } else {
fc241834 610 /* We should have found a template code */
49704364 611 I32 code = *patptr++ & 0xFF;
66c611c5 612 U32 inherited_modifiers = 0;
49704364
WL
613
614 if (code == ','){ /* grandfather in commas but with a warning */
615 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
616 symptr->flags |= FLAG_COMMA;
617 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 618 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
619 }
620 continue;
621 }
fc241834 622
49704364 623 /* for '(', skip to ')' */
fc241834 624 if (code == '(') {
49704364
WL
625 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
626 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 627 _action( symptr ) );
49704364
WL
628 symptr->grpbeg = patptr;
629 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
630 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
631 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 632 _action( symptr ) );
49704364
WL
633 }
634
66c611c5
MHM
635 /* look for group modifiers to inherit */
636 if (TYPE_ENDIANNESS(symptr->flags)) {
637 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
638 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
639 }
640
1109a392
MHM
641 /* look for modifiers */
642 while (patptr < patend) {
b7787f18
AL
643 const char *allowed;
644 I32 modifier;
1109a392
MHM
645 switch (*patptr) {
646 case '!':
647 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 648 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
649 break;
650 case '>':
651 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 652 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
653 break;
654 case '<':
655 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 656 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
657 break;
658 default:
b7787f18
AL
659 allowed = "";
660 modifier = 0;
1109a392
MHM
661 break;
662 }
66c611c5 663
1109a392
MHM
664 if (modifier == 0)
665 break;
66c611c5 666
1109a392
MHM
667 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
668 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 669 allowed, _action( symptr ) );
66c611c5
MHM
670
671 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 672 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 673 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
674 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
675 TYPE_ENDIANNESS_MASK)
676 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 677 *patptr, _action( symptr ) );
66c611c5 678
a2a5de95
NC
679 if ((code & modifier)) {
680 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
681 "Duplicate modifier '%c' after '%c' in %s",
682 *patptr, (int) TYPE_NO_MODIFIERS(code),
683 _action( symptr ) );
1109a392 684 }
66c611c5 685
1109a392
MHM
686 code |= modifier;
687 patptr++;
49704364
WL
688 }
689
66c611c5
MHM
690 /* inherit modifiers */
691 code |= inherited_modifiers;
692
fc241834 693 /* look for count and/or / */
49704364
WL
694 if (patptr < patend) {
695 if (isDIGIT(*patptr)) {
696 patptr = get_num( patptr, &symptr->length );
697 symptr->howlen = e_number;
698
699 } else if (*patptr == '*') {
700 patptr++;
701 symptr->howlen = e_star;
702
703 } else if (*patptr == '[') {
f7fe979e 704 const char* lenptr = ++patptr;
49704364
WL
705 symptr->howlen = e_number;
706 patptr = group_end( patptr, patend, ']' ) + 1;
707 /* what kind of [] is it? */
708 if (isDIGIT(*lenptr)) {
709 lenptr = get_num( lenptr, &symptr->length );
710 if( *lenptr != ']' )
711 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 712 _action( symptr ) );
49704364
WL
713 } else {
714 tempsym_t savsym = *symptr;
715 symptr->patend = patptr-1;
716 symptr->patptr = lenptr;
717 savsym.length = measure_struct(symptr);
718 *symptr = savsym;
719 }
720 } else {
721 symptr->howlen = e_no_len;
722 symptr->length = 1;
723 }
724
725 /* try to find / */
726 while (patptr < patend) {
727 if (isSPACE(*patptr))
728 patptr++;
729 else if (*patptr == '#') {
730 patptr++;
731 while (patptr < patend && *patptr != '\n')
732 patptr++;
733 if (patptr < patend)
734 patptr++;
735 } else {
66c611c5 736 if (*patptr == '/') {
49704364
WL
737 symptr->flags |= FLAG_SLASH;
738 patptr++;
66c611c5
MHM
739 if (patptr < patend &&
740 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 741 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 742 _action( symptr ) );
49704364
WL
743 }
744 break;
745 }
18529408 746 }
49704364
WL
747 } else {
748 /* at end - no count, no / */
749 symptr->howlen = e_no_len;
750 symptr->length = 1;
751 }
752
753 symptr->code = code;
fc241834 754 symptr->patptr = patptr;
49704364 755 return TRUE;
18529408 756 }
49704364 757 }
fc241834 758 symptr->patptr = patptr;
49704364 759 return FALSE;
18529408
IZ
760}
761
18529408 762/*
fc241834 763 There is no way to cleanly handle the case where we should process the
08ca2aa3 764 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
765 (e.g. estimates like strend-s as an upper bound for the number of
766 characters left wouldn't work). So if we foresee the need of this
767 (pattern starts with U or contains U0), we want to work on the encoded
768 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
769 themselves if they need to do a lot of unpacks like this on it
770*/
fc241834 771STATIC bool
08ca2aa3
TH
772need_utf8(const char *pat, const char *patend)
773{
774 bool first = TRUE;
7918f24d
NC
775
776 PERL_ARGS_ASSERT_NEED_UTF8;
777
08ca2aa3
TH
778 while (pat < patend) {
779 if (pat[0] == '#') {
780 pat++;
f7fe979e 781 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
782 if (!pat) return FALSE;
783 } else if (pat[0] == 'U') {
784 if (first || pat[1] == '0') return TRUE;
785 } else first = FALSE;
786 pat++;
787 }
788 return FALSE;
789}
790
791STATIC char
792first_symbol(const char *pat, const char *patend) {
7918f24d
NC
793 PERL_ARGS_ASSERT_FIRST_SYMBOL;
794
08ca2aa3
TH
795 while (pat < patend) {
796 if (pat[0] != '#') return pat[0];
797 pat++;
f7fe979e 798 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
799 if (!pat) return 0;
800 pat++;
801 }
802 return 0;
803}
804
805/*
dcccc8ff 806
7accc089
JH
807=for apidoc unpackstring
808
796b6530 809The engine implementing the C<unpack()> Perl function.
21ebfc7a 810
796b6530
KW
811Using the template C<pat..patend>, this function unpacks the string
812C<s..strend> into a number of mortal SVs, which it pushes onto the perl
813argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 814C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
815pushed elements.
816
796b6530
KW
817The C<strend> and C<patend> pointers should point to the byte following the
818last character of each string.
21ebfc7a
DM
819
820Although this function returns its values on the perl argument stack, it
821doesn't take any parameters from that stack (and thus in particular
796b6530 822there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 823example).
7accc089
JH
824
825=cut */
826
e1b825c1 827SSize_t
f7fe979e 828Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 829{
f7fe979e 830 tempsym_t sym;
08ca2aa3 831
7918f24d
NC
832 PERL_ARGS_ASSERT_UNPACKSTRING;
833
f337b084 834 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
835 else if (need_utf8(pat, patend)) {
836 /* We probably should try to avoid this in case a scalar context call
837 wouldn't get to the "U0" */
838 STRLEN len = strend - s;
230e1fce 839 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
840 SAVEFREEPV(s);
841 strend = s + len;
f337b084 842 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
843 }
844
f337b084
TH
845 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
846 flags |= FLAG_PARSE_UTF8;
08ca2aa3 847
f7fe979e 848 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
849
850 return unpack_rec(&sym, s, s, strend, NULL );
851}
852
e1b825c1 853STATIC SSize_t
f7fe979e 854S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 855{
20b7effb 856 dSP;
3297d27d 857 SV *sv = NULL;
e1b825c1 858 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 859 howlen_t howlen;
e1b825c1 860 SSize_t checksum = 0;
92d41999 861 UV cuv = 0;
a6ec74c1 862 NV cdouble = 0.0;
e1b825c1 863 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 864 bool beyond = FALSE;
21c16052 865 bool explicit_length;
9e27e96a 866 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 867 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
868
869 PERL_ARGS_ASSERT_UNPACK_REC;
870
28be1210 871 symptr->strbeg = s - strbeg;
49704364 872
49704364 873 while (next_symbol(symptr)) {
a7a3cfaa 874 packprops_t props;
e1b825c1 875 SSize_t len;
f337b084 876 I32 datumtype = symptr->code;
a1219b5e 877 bool needs_swap;
206947d2 878 /* do first one only unless in list context
08ca2aa3 879 / is implemented by unpacking the count, then popping it from the
206947d2 880 stack, so must check that we're not in the middle of a / */
49704364 881 if ( unpack_only_one
206947d2 882 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 883 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 884 break;
49704364 885
f337b084 886 switch (howlen = symptr->howlen) {
fc241834
RGS
887 case e_star:
888 len = strend - strbeg; /* long enough */
49704364 889 break;
f337b084
TH
890 default:
891 /* e_no_len and e_number */
892 len = symptr->length;
893 break;
49704364 894 }
18529408 895
21c16052 896 explicit_length = TRUE;
a6ec74c1 897 redo_switch:
49704364 898 beyond = s >= strend;
a7a3cfaa
TH
899
900 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
901 if (props) {
902 /* props nonzero means we can process this letter. */
e1b825c1
Z
903 const SSize_t size = props & PACK_SIZE_MASK;
904 const SSize_t howmany = (strend - s) / size;
a7a3cfaa
TH
905 if (len > howmany)
906 len = howmany;
907
908 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
909 if (len && unpack_only_one) len = 1;
910 EXTEND(SP, len);
911 EXTEND_MORTAL(len);
78d46eaa
NC
912 }
913 }
a7a3cfaa 914
a1219b5e
NC
915 needs_swap = NEEDS_SWAP(datumtype);
916
1109a392 917 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 918 default:
1109a392 919 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 920
a6ec74c1 921 case '%':
49704364 922 if (howlen == e_no_len)
18529408 923 len = 16; /* len is not specified */
a6ec74c1 924 checksum = len;
92d41999 925 cuv = 0;
a6ec74c1 926 cdouble = 0;
18529408 927 continue;
81d52ecd 928
18529408
IZ
929 case '(':
930 {
49704364 931 tempsym_t savsym = *symptr;
9e27e96a 932 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 933 symptr->flags |= group_modifiers;
49704364 934 symptr->patend = savsym.grpend;
28be1210 935 symptr->previous = &savsym;
49704364 936 symptr->level++;
18529408 937 PUTBACK;
c6f750d1 938 if (len && unpack_only_one) len = 1;
18529408 939 while (len--) {
49704364 940 symptr->patptr = savsym.grpbeg;
f337b084
TH
941 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
942 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
943 unpack_rec(symptr, s, strbeg, strend, &s);
944 if (s == strend && savsym.howlen == e_star)
49704364 945 break; /* No way to continue */
18529408
IZ
946 }
947 SPAGAIN;
28be1210 948 savsym.flags = symptr->flags & ~group_modifiers;
49704364 949 *symptr = savsym;
18529408
IZ
950 break;
951 }
28be1210 952 case '.' | TYPE_IS_SHRIEKING:
28be1210 953 case '.': {
9e27e96a 954 const char *from;
28be1210 955 SV *sv;
9e27e96a 956 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
957 if (howlen == e_star) from = strbeg;
958 else if (len <= 0) from = s;
959 else {
960 tempsym_t *group = symptr;
961
962 while (--len && group) group = group->previous;
963 from = group ? strbeg + group->strbeg : strbeg;
964 }
965 sv = from <= s ?
00646304
CB
966 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
967 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 968 mXPUSHs(sv);
28be1210
TH
969 break;
970 }
28be1210 971 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 972 case '@':
28be1210 973 s = strbeg + symptr->strbeg;
28be1210 974 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 975 {
08ca2aa3
TH
976 while (len > 0) {
977 if (s >= strend)
978 Perl_croak(aTHX_ "'@' outside of string in unpack");
979 s += UTF8SKIP(s);
980 len--;
981 }
982 if (s > strend)
983 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
984 } else {
28be1210 985 if (strend-s < len)
fc241834 986 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 987 s += len;
08ca2aa3 988 }
a6ec74c1 989 break;
62f95557
IZ
990 case 'X' | TYPE_IS_SHRIEKING:
991 if (!len) /* Avoid division by 0 */
992 len = 1;
08ca2aa3 993 if (utf8) {
f7fe979e 994 const char *hop, *last;
e1b825c1 995 SSize_t l = len;
f337b084
TH
996 hop = last = strbeg;
997 while (hop < s) {
998 hop += UTF8SKIP(hop);
999 if (--l == 0) {
08ca2aa3 1000 last = hop;
f337b084
TH
1001 l = len;
1002 }
fc241834 1003 }
f337b084
TH
1004 if (last > s)
1005 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1006 s = last;
1007 break;
f337b084
TH
1008 }
1009 len = (s - strbeg) % len;
924ba076 1010 /* FALLTHROUGH */
a6ec74c1 1011 case 'X':
08ca2aa3
TH
1012 if (utf8) {
1013 while (len > 0) {
1014 if (s <= strbeg)
1015 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1016 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1017 if (s <= strbeg)
1018 Perl_croak(aTHX_ "'X' outside of string in unpack");
1019 }
1020 len--;
1021 }
1022 } else {
fc241834
RGS
1023 if (len > s - strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1025 s -= len;
08ca2aa3 1026 }
a6ec74c1 1027 break;
9e27e96a 1028 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1029 SSize_t ai32;
62f95557
IZ
1030 if (!len) /* Avoid division by 0 */
1031 len = 1;
230e1fce
NC
1032 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1033 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1034 if (ai32 == 0) break;
1035 len -= ai32;
9e27e96a 1036 }
924ba076 1037 /* FALLTHROUGH */
a6ec74c1 1038 case 'x':
08ca2aa3
TH
1039 if (utf8) {
1040 while (len>0) {
1041 if (s >= strend)
1042 Perl_croak(aTHX_ "'x' outside of string in unpack");
1043 s += UTF8SKIP(s);
1044 len--;
1045 }
1046 } else {
fc241834
RGS
1047 if (len > strend - s)
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1049 s += len;
f337b084 1050 }
a6ec74c1
JH
1051 break;
1052 case '/':
49704364 1053 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1054
a6ec74c1
JH
1055 case 'A':
1056 case 'Z':
1057 case 'a':
08ca2aa3
TH
1058 if (checksum) {
1059 /* Preliminary length estimate is assumed done in 'W' */
1060 if (len > strend - s) len = strend - s;
1061 goto W_checksum;
1062 }
1063 if (utf8) {
e1b825c1 1064 SSize_t l;
f7fe979e 1065 const char *hop;
08ca2aa3
TH
1066 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1067 if (hop >= strend) {
1068 if (hop > strend)
1069 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1070 break;
fc241834 1071 }
a6ec74c1 1072 }
08ca2aa3
TH
1073 if (hop > strend)
1074 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1075 len = hop - s;
1076 } else if (len > strend - s)
1077 len = strend - s;
1078
1079 if (datumtype == 'Z') {
1080 /* 'Z' strips stuff after first null */
f7fe979e 1081 const char *ptr, *end;
f337b084
TH
1082 end = s + len;
1083 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1084 sv = newSVpvn(s, ptr-s);
1085 if (howlen == e_star) /* exact for 'Z*' */
1086 len = ptr-s + (ptr != strend ? 1 : 0);
1087 } else if (datumtype == 'A') {
1088 /* 'A' strips both nulls and spaces */
f7fe979e 1089 const char *ptr;
18bdf90a 1090 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1091 for (ptr = s+len-1; ptr >= s; ptr--) {
1092 if ( *ptr != 0
1093 && !UTF8_IS_CONTINUATION(*ptr)
1094 && !isSPACE_utf8_safe(ptr, strend))
1095 {
1096 break;
1097 }
1098 }
18bdf90a
TH
1099 if (ptr >= s) ptr += UTF8SKIP(ptr);
1100 else ptr++;
28be1210 1101 if (ptr > s+len)
18bdf90a
TH
1102 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1103 } else {
1104 for (ptr = s+len-1; ptr >= s; ptr--)
1105 if (*ptr != 0 && !isSPACE(*ptr)) break;
1106 ptr++;
1107 }
08ca2aa3
TH
1108 sv = newSVpvn(s, ptr-s);
1109 } else sv = newSVpvn(s, len);
1110
1111 if (utf8) {
1112 SvUTF8_on(sv);
1113 /* Undo any upgrade done due to need_utf8() */
f337b084 1114 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1115 sv_utf8_downgrade(sv, 0);
a6ec74c1 1116 }
6e449a3a 1117 mXPUSHs(sv);
08ca2aa3 1118 s += len;
a6ec74c1
JH
1119 break;
1120 case 'B':
08ca2aa3
TH
1121 case 'b': {
1122 char *str;
49704364 1123 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1124 len = (strend - s) * 8;
1125 if (checksum) {
f337b084 1126 if (utf8)
08ca2aa3 1127 while (len >= 8 && s < strend) {
9df874cd 1128 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1129 len -= 8;
1130 }
f337b084 1131 else
fc241834 1132 while (len >= 8) {
08ca2aa3 1133 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1134 len -= 8;
1135 }
08ca2aa3
TH
1136 if (len && s < strend) {
1137 U8 bits;
f337b084
TH
1138 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1139 if (datumtype == 'b')
a6ec74c1 1140 while (len-- > 0) {
92d41999 1141 if (bits & 1) cuv++;
a6ec74c1
JH
1142 bits >>= 1;
1143 }
f337b084 1144 else
a6ec74c1 1145 while (len-- > 0) {
08ca2aa3 1146 if (bits & 0x80) cuv++;
a6ec74c1
JH
1147 bits <<= 1;
1148 }
fc241834 1149 }
a6ec74c1
JH
1150 break;
1151 }
08ca2aa3 1152
561b68a9 1153 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1154 SvPOK_on(sv);
1155 str = SvPVX(sv);
1156 if (datumtype == 'b') {
f337b084 1157 U8 bits = 0;
e1b825c1 1158 const SSize_t ai32 = len;
08ca2aa3
TH
1159 for (len = 0; len < ai32; len++) {
1160 if (len & 7) bits >>= 1;
1161 else if (utf8) {
1162 if (s >= strend) break;
9df874cd 1163 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1164 } else bits = *(U8 *) s++;
1165 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1166 }
08ca2aa3 1167 } else {
f337b084 1168 U8 bits = 0;
e1b825c1 1169 const SSize_t ai32 = len;
08ca2aa3
TH
1170 for (len = 0; len < ai32; len++) {
1171 if (len & 7) bits <<= 1;
1172 else if (utf8) {
1173 if (s >= strend) break;
9df874cd 1174 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1175 } else bits = *(U8 *) s++;
1176 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1177 }
1178 }
1179 *str = '\0';
aa07b2f6 1180 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1181 XPUSHs(sv);
a6ec74c1 1182 break;
08ca2aa3 1183 }
a6ec74c1 1184 case 'H':
08ca2aa3 1185 case 'h': {
3297d27d 1186 char *str = NULL;
fc241834 1187 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1188 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1189 len = (strend - s) * 2;
858fe5e1
TC
1190 if (!checksum) {
1191 sv = sv_2mortal(newSV(len ? len : 1));
1192 SvPOK_on(sv);
1193 str = SvPVX(sv);
1194 }
a6ec74c1 1195 if (datumtype == 'h') {
f337b084 1196 U8 bits = 0;
e1b825c1 1197 SSize_t ai32 = len;
fc241834
RGS
1198 for (len = 0; len < ai32; len++) {
1199 if (len & 1) bits >>= 4;
1200 else if (utf8) {
1201 if (s >= strend) break;
9df874cd 1202 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1203 } else bits = * (U8 *) s++;
858fe5e1
TC
1204 if (!checksum)
1205 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1206 }
08ca2aa3 1207 } else {
f337b084 1208 U8 bits = 0;
e1b825c1 1209 const SSize_t ai32 = len;
08ca2aa3
TH
1210 for (len = 0; len < ai32; len++) {
1211 if (len & 1) bits <<= 4;
1212 else if (utf8) {
1213 if (s >= strend) break;
9df874cd 1214 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1215 } else bits = *(U8 *) s++;
858fe5e1
TC
1216 if (!checksum)
1217 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1218 }
1219 }
858fe5e1
TC
1220 if (!checksum) {
1221 *str = '\0';
1222 SvCUR_set(sv, str - SvPVX_const(sv));
1223 XPUSHs(sv);
1224 }
a6ec74c1 1225 break;
08ca2aa3 1226 }
1651fc44
ML
1227 case 'C':
1228 if (len == 0) {
1229 if (explicit_length)
1230 /* Switch to "character" mode */
1231 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1232 break;
1233 }
924ba076 1234 /* FALLTHROUGH */
a6ec74c1 1235 case 'c':
1651fc44
ML
1236 while (len-- > 0 && s < strend) {
1237 int aint;
1238 if (utf8)
1239 {
1240 STRLEN retlen;
1241 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1242 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1243 if (retlen == (STRLEN) -1)
1651fc44
ML
1244 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1245 s += retlen;
1246 }
1247 else
1248 aint = *(U8 *)(s)++;
1249 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1250 aint -= 256;
08ca2aa3 1251 if (!checksum)
6e449a3a 1252 mPUSHi(aint);
73cb7263
NC
1253 else if (checksum > bits_in_uv)
1254 cdouble += (NV)aint;
1255 else
1256 cuv += aint;
a6ec74c1
JH
1257 }
1258 break;
08ca2aa3
TH
1259 case 'W':
1260 W_checksum:
1651fc44 1261 if (utf8) {
08ca2aa3 1262 while (len-- > 0 && s < strend) {
08ca2aa3 1263 STRLEN retlen;
f7fe979e 1264 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1265 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1266 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1267 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1268 s += retlen;
1269 if (!checksum)
6e449a3a 1270 mPUSHu(val);
08ca2aa3
TH
1271 else if (checksum > bits_in_uv)
1272 cdouble += (NV) val;
d6d3e8bd 1273 else
08ca2aa3 1274 cuv += val;
fc241834 1275 }
08ca2aa3 1276 } else if (!checksum)
a6ec74c1 1277 while (len-- > 0) {
f7fe979e 1278 const U8 ch = *(U8 *) s++;
6e449a3a 1279 mPUSHu(ch);
a6ec74c1 1280 }
08ca2aa3
TH
1281 else if (checksum > bits_in_uv)
1282 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1283 else
1284 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1285 break;
1286 case 'U':
35bcd338 1287 if (len == 0) {
c5333953 1288 if (explicit_length && howlen != e_star) {
08ca2aa3 1289 /* Switch to "bytes in UTF-8" mode */
f337b084 1290 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1291 else
1292 /* Should be impossible due to the need_utf8() test */
1293 Perl_croak(aTHX_ "U0 mode on a byte string");
1294 }
35bcd338
JH
1295 break;
1296 }
08ca2aa3 1297 if (len > strend - s) len = strend - s;
fc241834 1298 if (!checksum) {
08ca2aa3
TH
1299 if (len && unpack_only_one) len = 1;
1300 EXTEND(SP, len);
1301 EXTEND_MORTAL(len);
fc241834 1302 }
08ca2aa3
TH
1303 while (len-- > 0 && s < strend) {
1304 STRLEN retlen;
1305 UV auv;
1306 if (utf8) {
81b3f640 1307 U8 result[UTF8_MAXLEN+1];
f7fe979e 1308 const char *ptr = s;
08ca2aa3 1309 STRLEN len;
08ca2aa3
TH
1310 /* Bug: warns about bad utf8 even if we are short on bytes
1311 and will break out of the loop */
9df874cd 1312 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1313 'U'))
08ca2aa3
TH
1314 break;
1315 len = UTF8SKIP(result);
9df874cd 1316 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1317 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1318 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1319 len,
1320 &retlen,
1321 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1322 s = ptr;
1323 } else {
3ece276e
KW
1324 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1325 strend - s,
1326 &retlen,
1327 UTF8_ALLOW_DEFAULT));
df337714 1328 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1329 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1330 s += retlen;
1331 }
1332 if (!checksum)
6e449a3a 1333 mPUSHu(auv);
73cb7263 1334 else if (checksum > bits_in_uv)
08ca2aa3 1335 cdouble += (NV) auv;
73cb7263 1336 else
08ca2aa3 1337 cuv += auv;
a6ec74c1
JH
1338 }
1339 break;
49704364
WL
1340 case 's' | TYPE_IS_SHRIEKING:
1341#if SHORTSIZE != SIZE16
73cb7263 1342 while (len-- > 0) {
08ca2aa3 1343 short ashort;
aaec8192 1344 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1345 if (!checksum)
6e449a3a 1346 mPUSHi(ashort);
73cb7263
NC
1347 else if (checksum > bits_in_uv)
1348 cdouble += (NV)ashort;
1349 else
1350 cuv += ashort;
49704364
WL
1351 }
1352 break;
1353#else
924ba076 1354 /* FALLTHROUGH */
a6ec74c1 1355#endif
49704364 1356 case 's':
73cb7263 1357 while (len-- > 0) {
08ca2aa3
TH
1358 I16 ai16;
1359
1360#if U16SIZE > SIZE16
1361 ai16 = 0;
1362#endif
aaec8192 1363 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1364#if U16SIZE > SIZE16
73cb7263
NC
1365 if (ai16 > 32767)
1366 ai16 -= 65536;
a6ec74c1 1367#endif
08ca2aa3 1368 if (!checksum)
6e449a3a 1369 mPUSHi(ai16);
73cb7263
NC
1370 else if (checksum > bits_in_uv)
1371 cdouble += (NV)ai16;
1372 else
1373 cuv += ai16;
a6ec74c1
JH
1374 }
1375 break;
49704364
WL
1376 case 'S' | TYPE_IS_SHRIEKING:
1377#if SHORTSIZE != SIZE16
73cb7263 1378 while (len-- > 0) {
08ca2aa3 1379 unsigned short aushort;
c0f2b636 1380 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
08ca2aa3 1381 if (!checksum)
6e449a3a 1382 mPUSHu(aushort);
73cb7263
NC
1383 else if (checksum > bits_in_uv)
1384 cdouble += (NV)aushort;
1385 else
1386 cuv += aushort;
49704364
WL
1387 }
1388 break;
1389#else
924ba076 1390 /* FALLTHROUGH */
49704364 1391#endif
a6ec74c1
JH
1392 case 'v':
1393 case 'n':
1394 case 'S':
73cb7263 1395 while (len-- > 0) {
08ca2aa3
TH
1396 U16 au16;
1397#if U16SIZE > SIZE16
1398 au16 = 0;
1399#endif
aaec8192 1400 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1401 if (datumtype == 'n')
1402 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1403 if (datumtype == 'v')
1404 au16 = vtohs(au16);
08ca2aa3 1405 if (!checksum)
6e449a3a 1406 mPUSHu(au16);
73cb7263 1407 else if (checksum > bits_in_uv)
f337b084 1408 cdouble += (NV) au16;
73cb7263
NC
1409 else
1410 cuv += au16;
a6ec74c1
JH
1411 }
1412 break;
068bd2e7
MHM
1413 case 'v' | TYPE_IS_SHRIEKING:
1414 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1415 while (len-- > 0) {
08ca2aa3
TH
1416 I16 ai16;
1417# if U16SIZE > SIZE16
1418 ai16 = 0;
1419# endif
aaec8192 1420 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1421 /* There should never be any byte-swapping here. */
1422 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1423 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1424 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1425 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1426 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1427 if (!checksum)
6e449a3a 1428 mPUSHi(ai16);
73cb7263 1429 else if (checksum > bits_in_uv)
08ca2aa3 1430 cdouble += (NV) ai16;
73cb7263
NC
1431 else
1432 cuv += ai16;
068bd2e7
MHM
1433 }
1434 break;
a6ec74c1 1435 case 'i':
49704364 1436 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1437 while (len-- > 0) {
08ca2aa3 1438 int aint;
aaec8192 1439 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1440 if (!checksum)
6e449a3a 1441 mPUSHi(aint);
73cb7263
NC
1442 else if (checksum > bits_in_uv)
1443 cdouble += (NV)aint;
1444 else
1445 cuv += aint;
a6ec74c1
JH
1446 }
1447 break;
1448 case 'I':
49704364 1449 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1450 while (len-- > 0) {
08ca2aa3 1451 unsigned int auint;
aaec8192 1452 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1453 if (!checksum)
6e449a3a 1454 mPUSHu(auint);
73cb7263
NC
1455 else if (checksum > bits_in_uv)
1456 cdouble += (NV)auint;
1457 else
1458 cuv += auint;
a6ec74c1
JH
1459 }
1460 break;
92d41999 1461 case 'j':
73cb7263 1462 while (len-- > 0) {
08ca2aa3 1463 IV aiv;
aaec8192 1464 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1465 if (!checksum)
6e449a3a 1466 mPUSHi(aiv);
73cb7263
NC
1467 else if (checksum > bits_in_uv)
1468 cdouble += (NV)aiv;
1469 else
1470 cuv += aiv;
92d41999
JH
1471 }
1472 break;
1473 case 'J':
73cb7263 1474 while (len-- > 0) {
08ca2aa3 1475 UV auv;
aaec8192 1476 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1477 if (!checksum)
6e449a3a 1478 mPUSHu(auv);
73cb7263
NC
1479 else if (checksum > bits_in_uv)
1480 cdouble += (NV)auv;
1481 else
1482 cuv += auv;
92d41999
JH
1483 }
1484 break;
49704364
WL
1485 case 'l' | TYPE_IS_SHRIEKING:
1486#if LONGSIZE != SIZE32
73cb7263 1487 while (len-- > 0) {
08ca2aa3 1488 long along;
aaec8192 1489 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1490 if (!checksum)
6e449a3a 1491 mPUSHi(along);
73cb7263
NC
1492 else if (checksum > bits_in_uv)
1493 cdouble += (NV)along;
1494 else
1495 cuv += along;
49704364
WL
1496 }
1497 break;
1498#else
924ba076 1499 /* FALLTHROUGH */
a6ec74c1 1500#endif
49704364 1501 case 'l':
73cb7263 1502 while (len-- > 0) {
08ca2aa3
TH
1503 I32 ai32;
1504#if U32SIZE > SIZE32
1505 ai32 = 0;
1506#endif
aaec8192 1507 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1508#if U32SIZE > SIZE32
08ca2aa3 1509 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1510#endif
08ca2aa3 1511 if (!checksum)
6e449a3a 1512 mPUSHi(ai32);
73cb7263
NC
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV)ai32;
1515 else
1516 cuv += ai32;
a6ec74c1
JH
1517 }
1518 break;
49704364
WL
1519 case 'L' | TYPE_IS_SHRIEKING:
1520#if LONGSIZE != SIZE32
73cb7263 1521 while (len-- > 0) {
08ca2aa3 1522 unsigned long aulong;
aaec8192 1523 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1524 if (!checksum)
6e449a3a 1525 mPUSHu(aulong);
73cb7263
NC
1526 else if (checksum > bits_in_uv)
1527 cdouble += (NV)aulong;
1528 else
1529 cuv += aulong;
49704364
WL
1530 }
1531 break;
1532#else
924ba076 1533 /* FALLTHROUGH */
49704364 1534#endif
a6ec74c1
JH
1535 case 'V':
1536 case 'N':
1537 case 'L':
73cb7263 1538 while (len-- > 0) {
08ca2aa3
TH
1539 U32 au32;
1540#if U32SIZE > SIZE32
1541 au32 = 0;
1542#endif
aaec8192 1543 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1544 if (datumtype == 'N')
1545 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1546 if (datumtype == 'V')
1547 au32 = vtohl(au32);
08ca2aa3 1548 if (!checksum)
6e449a3a 1549 mPUSHu(au32);
fc241834
RGS
1550 else if (checksum > bits_in_uv)
1551 cdouble += (NV)au32;
1552 else
1553 cuv += au32;
a6ec74c1
JH
1554 }
1555 break;
068bd2e7
MHM
1556 case 'V' | TYPE_IS_SHRIEKING:
1557 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1558 while (len-- > 0) {
08ca2aa3 1559 I32 ai32;
f8e5a5db 1560#if U32SIZE > SIZE32
08ca2aa3 1561 ai32 = 0;
f8e5a5db 1562#endif
aaec8192 1563 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1564 /* There should never be any byte swapping here. */
1565 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1566 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1567 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1568 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1569 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1570 if (!checksum)
6e449a3a 1571 mPUSHi(ai32);
73cb7263
NC
1572 else if (checksum > bits_in_uv)
1573 cdouble += (NV)ai32;
1574 else
1575 cuv += ai32;
068bd2e7
MHM
1576 }
1577 break;
a6ec74c1 1578 case 'p':
a6ec74c1 1579 while (len-- > 0) {
f7fe979e 1580 const char *aptr;
aaec8192 1581 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1582 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1583 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1584 }
1585 break;
1586 case 'w':
a6ec74c1
JH
1587 {
1588 UV auv = 0;
e1b825c1 1589 size_t bytes = 0;
fc241834 1590
08ca2aa3
TH
1591 while (len > 0 && s < strend) {
1592 U8 ch;
f337b084 1593 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1594 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1595 /* UTF8_IS_XXXXX not right here because this is a BER, not
1596 * UTF-8 format - using constant 0x80 */
08ca2aa3 1597 if (ch < 0x80) {
a6ec74c1 1598 bytes = 0;
6e449a3a 1599 mPUSHu(auv);
a6ec74c1
JH
1600 len--;
1601 auv = 0;
08ca2aa3 1602 continue;
a6ec74c1 1603 }
08ca2aa3 1604 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1605 const char *t;
a6ec74c1 1606
147e3846
KW
1607 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1608 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1609 while (s < strend) {
f337b084 1610 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1611 sv = mul128(sv, (U8)(ch & 0x7f));
1612 if (!(ch & 0x80)) {
a6ec74c1
JH
1613 bytes = 0;
1614 break;
1615 }
1616 }
10516c54 1617 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1618 while (*t == '0')
1619 t++;
1620 sv_chop(sv, t);
6e449a3a 1621 mPUSHs(sv);
a6ec74c1
JH
1622 len--;
1623 auv = 0;
1624 }
1625 }
1626 if ((s >= strend) && bytes)
49704364 1627 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1628 }
1629 break;
1630 case 'P':
49704364
WL
1631 if (symptr->howlen == e_star)
1632 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1633 EXTEND(SP, 1);
2d3e0934 1634 if (s + sizeof(char*) <= strend) {
08ca2aa3 1635 char *aptr;
aaec8192 1636 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1637 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1638 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1639 }
a6ec74c1 1640 break;
c174bf3b 1641#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1642 case 'q':
73cb7263 1643 while (len-- > 0) {
08ca2aa3 1644 Quad_t aquad;
aaec8192 1645 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1646 if (!checksum)
c174bf3b 1647 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV)aquad;
1650 else
1651 cuv += aquad;
1652 }
a6ec74c1
JH
1653 break;
1654 case 'Q':
73cb7263 1655 while (len-- > 0) {
08ca2aa3 1656 Uquad_t auquad;
aaec8192 1657 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1658 if (!checksum)
c174bf3b 1659 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1660 else if (checksum > bits_in_uv)
1661 cdouble += (NV)auquad;
1662 else
1663 cuv += auquad;
a6ec74c1
JH
1664 }
1665 break;
1640b983 1666#endif
a6ec74c1
JH
1667 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1668 case 'f':
73cb7263 1669 while (len-- > 0) {
08ca2aa3 1670 float afloat;
aaec8192 1671 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1672 if (!checksum)
6e449a3a 1673 mPUSHn(afloat);
08ca2aa3 1674 else
73cb7263 1675 cdouble += afloat;
fc241834 1676 }
a6ec74c1
JH
1677 break;
1678 case 'd':
73cb7263 1679 while (len-- > 0) {
08ca2aa3 1680 double adouble;
aaec8192 1681 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1682 if (!checksum)
6e449a3a 1683 mPUSHn(adouble);
08ca2aa3 1684 else
73cb7263 1685 cdouble += adouble;
fc241834 1686 }
a6ec74c1 1687 break;
92d41999 1688 case 'F':
73cb7263 1689 while (len-- > 0) {
275663fa 1690 NV_bytes anv;
aaec8192
NC
1691 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1692 datumtype, needs_swap);
08ca2aa3 1693 if (!checksum)
275663fa 1694 mPUSHn(anv.nv);
08ca2aa3 1695 else
275663fa 1696 cdouble += anv.nv;
fc241834 1697 }
92d41999
JH
1698 break;
1699#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1700 case 'D':
73cb7263 1701 while (len-- > 0) {
275663fa 1702 ld_bytes aldouble;
aaec8192
NC
1703 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1704 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1705 /* The most common long double format, the x86 80-bit
1706 * extended precision, has either 2 or 6 unused bytes,
1707 * which may contain garbage, which may contain
1708 * unintentional data. While we do zero the bytes of
1709 * the long double data in pack(), here in unpack() we
1710 * don't, because it's really hard to envision that
1711 * reading the long double off aldouble would be
e075ae47 1712 * affected by the unused bytes.
42262fd3
JH
1713 *
1714 * Note that trying to unpack 'long doubles' of 'long
1715 * doubles' packed in another system is in the general
1716 * case doomed without having more detail. */
08ca2aa3 1717 if (!checksum)
275663fa 1718 mPUSHn(aldouble.ld);
08ca2aa3 1719 else
275663fa 1720 cdouble += aldouble.ld;
92d41999
JH
1721 }
1722 break;
1723#endif
a6ec74c1 1724 case 'u':
858fe5e1 1725 if (!checksum) {
f7fe979e 1726 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1727 sv = sv_2mortal(newSV(l));
12cad9bd
TC
1728 if (l) {
1729 SvPOK_on(sv);
1730 *SvEND(sv) = '\0';
1731 }
08ca2aa3 1732 }
08ca2aa3 1733
bee81d36
KW
1734 /* Note that all legal uuencoded strings are ASCII printables, so
1735 * have the same representation under UTF-8 vs not. This means we
1736 * can ignore UTF8ness on legal input. For illegal we stop at the
1737 * first failure, and don't report where/what that is, so again we
1738 * can ignore UTF8ness */
1739
9233b56b
KW
1740 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1741 I32 a, b, c, d;
1742 char hunk[3];
1743
1744 len = PL_uudmap[*(U8*)s++] & 077;
1745 while (len > 0) {
1746 if (s < strend && ISUUCHAR(*s))
1747 a = PL_uudmap[*(U8*)s++] & 077;
1748 else
1749 a = 0;
1750 if (s < strend && ISUUCHAR(*s))
1751 b = PL_uudmap[*(U8*)s++] & 077;
1752 else
1753 b = 0;
1754 if (s < strend && ISUUCHAR(*s))
1755 c = PL_uudmap[*(U8*)s++] & 077;
1756 else
1757 c = 0;
1758 if (s < strend && ISUUCHAR(*s))
1759 d = PL_uudmap[*(U8*)s++] & 077;
1760 else
1761 d = 0;
1762 hunk[0] = (char)((a << 2) | (b >> 4));
1763 hunk[1] = (char)((b << 4) | (c >> 2));
1764 hunk[2] = (char)((c << 6) | d);
1765 if (!checksum)
1766 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1767 len -= 3;
1768 }
1769 if (*s == '\n')
1770 s++;
1771 else /* possible checksum byte */
1772 if (s + 1 < strend && s[1] == '\n')
1773 s += 2;
1774 }
858fe5e1
TC
1775 if (!checksum)
1776 XPUSHs(sv);
a6ec74c1 1777 break;
99f862a1 1778 } /* End of switch */
49704364 1779
a6ec74c1 1780 if (checksum) {
4aada8b9 1781 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1782 (checksum > bits_in_uv &&
4aada8b9 1783 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
08ca2aa3 1784 NV trouble, anv;
a6ec74c1 1785
08ca2aa3 1786 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1787 while (checksum >= 16) {
1788 checksum -= 16;
08ca2aa3 1789 anv *= 65536.0;
a6ec74c1 1790 }
a6ec74c1 1791 while (cdouble < 0.0)
08ca2aa3 1792 cdouble += anv;
09b94b1f
S
1793 cdouble = Perl_modf(cdouble / anv, &trouble);
1794#ifdef LONGDOUBLE_DOUBLEDOUBLE
1795 /* Workaround for powerpc doubledouble modfl bug:
1796 * close to 1.0L and -1.0L cdouble is 0, and trouble
1797 * is cdouble / anv. */
1798 if (trouble != Perl_ceil(trouble)) {
1799 cdouble = trouble;
1800 if (cdouble > 1.0L) cdouble -= 1.0L;
1801 if (cdouble < -1.0L) cdouble += 1.0L;
1802 }
1803#endif
1804 cdouble *= anv;
c4c5f44a 1805 sv = newSVnv(cdouble);
a6ec74c1
JH
1806 }
1807 else {
fa8ec7c1 1808 if (checksum < bits_in_uv) {
d223e1ea 1809 UV mask = nBIT_MASK(checksum);
92d41999 1810 cuv &= mask;
a6ec74c1 1811 }
c4c5f44a 1812 sv = newSVuv(cuv);
a6ec74c1 1813 }
6e449a3a 1814 mXPUSHs(sv);
a6ec74c1
JH
1815 checksum = 0;
1816 }
fc241834 1817
49704364
WL
1818 if (symptr->flags & FLAG_SLASH){
1819 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1820 break;
49704364
WL
1821 if( next_symbol(symptr) ){
1822 if( symptr->howlen == e_number )
1823 Perl_croak(aTHX_ "Count after length/code in unpack" );
1824 if( beyond ){
1825 /* ...end of char buffer then no decent length available */
1826 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1827 } else {
1828 /* take top of stack (hope it's numeric) */
1829 len = POPi;
1830 if( len < 0 )
1831 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1832 }
1833 } else {
1834 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1835 }
1836 datumtype = symptr->code;
21c16052 1837 explicit_length = FALSE;
49704364
WL
1838 goto redo_switch;
1839 }
a6ec74c1 1840 }
49704364 1841
18529408
IZ
1842 if (new_s)
1843 *new_s = s;
1844 PUTBACK;
1845 return SP - PL_stack_base - start_sp_offset;
1846}
1847
1848PP(pp_unpack)
1849{
1850 dSP;
bab9c0ac 1851 dPOPPOPssrl;
1c23e2bd 1852 U8 gimme = GIMME_V;
18529408
IZ
1853 STRLEN llen;
1854 STRLEN rlen;
5c144d81
NC
1855 const char *pat = SvPV_const(left, llen);
1856 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1857 const char *strend = s + rlen;
1858 const char *patend = pat + llen;
e1b825c1 1859 SSize_t cnt;
18529408
IZ
1860
1861 PUTBACK;
7accc089 1862 cnt = unpackstring(pat, patend, s, strend,
49704364 1863 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1864 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1865
18529408
IZ
1866 SPAGAIN;
1867 if ( !cnt && gimme == G_SCALAR )
1868 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1869 RETURN;
1870}
1871
f337b084 1872STATIC U8 *
e1b825c1 1873doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1874{
f337b084 1875 *h++ = PL_uuemap[len];
a6ec74c1 1876 while (len > 2) {
f337b084
TH
1877 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1878 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1879 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1880 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1881 s += 3;
1882 len -= 3;
1883 }
1884 if (len > 0) {
e68aed92 1885 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1886 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1887 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1888 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1889 *h++ = PL_uuemap[0];
a6ec74c1 1890 }
f337b084
TH
1891 *h++ = '\n';
1892 return h;
a6ec74c1
JH
1893}
1894
1895STATIC SV *
f7fe979e 1896S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1897{
8b6e33c7
AL
1898 SV *result = newSVpvn(s, l);
1899 char *const result_c = SvPV_nolen(result); /* convenience */
1900 char *out = result_c;
1901 bool skip = 1;
1902 bool ignore = 0;
a6ec74c1 1903
7918f24d
NC
1904 PERL_ARGS_ASSERT_IS_AN_INT;
1905
a6ec74c1
JH
1906 while (*s) {
1907 switch (*s) {
1908 case ' ':
1909 break;
1910 case '+':
1911 if (!skip) {
1912 SvREFCNT_dec(result);
1913 return (NULL);
1914 }
1915 break;
1916 case '0':
1917 case '1':
1918 case '2':
1919 case '3':
1920 case '4':
1921 case '5':
1922 case '6':
1923 case '7':
1924 case '8':
1925 case '9':
1926 skip = 0;
1927 if (!ignore) {
1928 *(out++) = *s;
1929 }
1930 break;
1931 case '.':
1932 ignore = 1;
1933 break;
1934 default:
1935 SvREFCNT_dec(result);
1936 return (NULL);
1937 }
1938 s++;
1939 }
1940 *(out++) = '\0';
1941 SvCUR_set(result, out - result_c);
1942 return (result);
1943}
1944
1945/* pnum must be '\0' terminated */
1946STATIC int
1947S_div128(pTHX_ SV *pnum, bool *done)
1948{
8b6e33c7
AL
1949 STRLEN len;
1950 char * const s = SvPV(pnum, len);
1951 char *t = s;
1952 int m = 0;
1953
7918f24d
NC
1954 PERL_ARGS_ASSERT_DIV128;
1955
8b6e33c7
AL
1956 *done = 1;
1957 while (*t) {
1958 const int i = m * 10 + (*t - '0');
1959 const int r = (i >> 7); /* r < 10 */
1960 m = i & 0x7F;
1961 if (r) {
1962 *done = 0;
1963 }
1964 *(t++) = '0' + r;
a6ec74c1 1965 }
8b6e33c7
AL
1966 *(t++) = '\0';
1967 SvCUR_set(pnum, (STRLEN) (t - s));
1968 return (m);
a6ec74c1
JH
1969}
1970
18529408 1971/*
7accc089
JH
1972=for apidoc packlist
1973
796b6530 1974The engine implementing C<pack()> Perl function.
7accc089 1975
bfce84ec
AL
1976=cut
1977*/
7accc089
JH
1978
1979void
5aaab254 1980Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1981{
aadb217d
JH
1982 tempsym_t sym;
1983
7918f24d
NC
1984 PERL_ARGS_ASSERT_PACKLIST;
1985
f7fe979e 1986 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1987
f337b084
TH
1988 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1989 Also make sure any UTF8 flag is loaded */
56eb0262 1990 SvPV_force_nolen(cat);
bfce84ec
AL
1991 if (DO_UTF8(cat))
1992 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1993
49704364
WL
1994 (void)pack_rec( cat, &sym, beglist, endlist );
1995}
1996
f337b084
TH
1997/* like sv_utf8_upgrade, but also repoint the group start markers */
1998STATIC void
1999marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2000 STRLEN len;
2001 tempsym_t *group;
f7fe979e
AL
2002 const char *from_ptr, *from_start, *from_end, **marks, **m;
2003 char *to_start, *to_ptr;
f337b084
TH
2004
2005 if (SvUTF8(sv)) return;
2006
aa07b2f6 2007 from_start = SvPVX_const(sv);
f337b084
TH
2008 from_end = from_start + SvCUR(sv);
2009 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2010 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2011 if (from_ptr == from_end) {
2012 /* Simple case: no character needs to be changed */
2013 SvUTF8_on(sv);
2014 return;
2015 }
2016
3473cf63 2017 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2018 Newx(to_start, len, char);
f337b084
TH
2019 Copy(from_start, to_start, from_ptr-from_start, char);
2020 to_ptr = to_start + (from_ptr-from_start);
2021
a02a5408 2022 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2023 for (group=sym_ptr; group; group = group->previous)
2024 marks[group->level] = from_start + group->strbeg;
2025 marks[sym_ptr->level+1] = from_end+1;
2026 for (m = marks; *m < from_ptr; m++)
2027 *m = to_start + (*m-from_start);
2028
2029 for (;from_ptr < from_end; from_ptr++) {
2030 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2031 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2032 }
2033 *to_ptr = 0;
2034
2035 while (*m == from_ptr) *m++ = to_ptr;
2036 if (m != marks + sym_ptr->level+1) {
2037 Safefree(marks);
2038 Safefree(to_start);
5637ef5b
NC
2039 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2040 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2041 }
2042 for (group=sym_ptr; group; group = group->previous)
2043 group->strbeg = marks[group->level] - to_start;
2044 Safefree(marks);
2045
2046 if (SvOOK(sv)) {
2047 if (SvIVX(sv)) {
b162af07 2048 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2049 from_start -= SvIVX(sv);
2050 SvIV_set(sv, 0);
2051 }
2052 SvFLAGS(sv) &= ~SVf_OOK;
2053 }
2054 if (SvLEN(sv) != 0)
2055 Safefree(from_start);
f880fe2f 2056 SvPV_set(sv, to_start);
b162af07
SP
2057 SvCUR_set(sv, to_ptr - to_start);
2058 SvLEN_set(sv, len);
f337b084
TH
2059 SvUTF8_on(sv);
2060}
2061
2062/* Exponential string grower. Makes string extension effectively O(n)
2063 needed says how many extra bytes we need (not counting the final '\0')
2064 Only grows the string if there is an actual lack of space
2065*/
2066STATIC char *
0bd48802 2067S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2068 const STRLEN cur = SvCUR(sv);
2069 const STRLEN len = SvLEN(sv);
f337b084 2070 STRLEN extend;
7918f24d
NC
2071
2072 PERL_ARGS_ASSERT_SV_EXP_GROW;
2073
f337b084
TH
2074 if (len - cur > needed) return SvPVX(sv);
2075 extend = needed > len ? needed : len;
2076 return SvGROW(sv, len+extend+1);
2077}
49704364 2078
93f6e112 2079static SV *
b197e565 2080S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2081{
2082 SvGETMAGIC(sv);
93f6e112
FC
2083 if (UNLIKELY(SvAMAGIC(sv)))
2084 sv = sv_2num(sv);
99f450cc 2085 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2086 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2087 const NV nv = SvNV_nomg(sv);
2088 if (c == 'w')
147e3846 2089 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2090 else
147e3846 2091 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2092 }
93f6e112 2093 return sv;
354b74ae
FC
2094}
2095
93f6e112
FC
2096#define SvIV_no_inf(sv,d) \
2097 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2098#define SvUV_no_inf(sv,d) \
2099 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2100
49704364
WL
2101STATIC
2102SV **
f337b084 2103S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2104{
49704364 2105 tempsym_t lookahead;
e1b825c1 2106 SSize_t items = endlist - beglist;
f337b084
TH
2107 bool found = next_symbol(symptr);
2108 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2109 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2110 char* from;
f337b084 2111
7918f24d
NC
2112 PERL_ARGS_ASSERT_PACK_REC;
2113
f337b084
TH
2114 if (symptr->level == 0 && found && symptr->code == 'U') {
2115 marked_upgrade(aTHX_ cat, symptr);
2116 symptr->flags |= FLAG_DO_UTF8;
2117 utf8 = 0;
49704364 2118 }
f337b084 2119 symptr->strbeg = SvCUR(cat);
49704364
WL
2120
2121 while (found) {
f337b084
TH
2122 SV *fromstr;
2123 STRLEN fromlen;
e1b825c1 2124 SSize_t len;
a0714e2c 2125 SV *lengthcode = NULL;
49704364 2126 I32 datumtype = symptr->code;
f337b084
TH
2127 howlen_t howlen = symptr->howlen;
2128 char *start = SvPVX(cat);
2129 char *cur = start + SvCUR(cat);
a1219b5e 2130 bool needs_swap;
49704364 2131
fc1bb3f2 2132#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2133#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2134
2135 switch (howlen) {
fc241834 2136 case e_star:
4aada8b9 2137 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
f337b084
TH
2138 0 : items;
2139 break;
2140 default:
2141 /* e_no_len and e_number */
2142 len = symptr->length;
49704364
WL
2143 break;
2144 }
2145
f337b084 2146 if (len) {
a7a3cfaa 2147 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2148
a7a3cfaa
TH
2149 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2150 /* We can process this letter. */
2151 STRLEN size = props & PACK_SIZE_MASK;
f17fed50 2152 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
a7a3cfaa 2153 }
f337b084
TH
2154 }
2155
49704364
WL
2156 /* Look ahead for next symbol. Do we have code/code? */
2157 lookahead = *symptr;
2158 found = next_symbol(&lookahead);
246f24af
TH
2159 if (symptr->flags & FLAG_SLASH) {
2160 IV count;
f337b084 2161 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
4aada8b9 2162 if (memCHRs("aAZ", lookahead.code)) {
246f24af
TH
2163 if (lookahead.howlen == e_number) count = lookahead.length;
2164 else {
ce399ba6 2165 if (items > 0) {
48a5da33 2166 count = sv_len_utf8(*beglist);
ce399ba6 2167 }
246f24af
TH
2168 else count = 0;
2169 if (lookahead.code == 'Z') count++;
2170 }
2171 } else {
2172 if (lookahead.howlen == e_number && lookahead.length < items)
2173 count = lookahead.length;
2174 else count = items;
2175 }
2176 lookahead.howlen = e_number;
2177 lookahead.length = count;
2178 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2179 }
49704364 2180
a1219b5e
NC
2181 needs_swap = NEEDS_SWAP(datumtype);
2182
fc241834
RGS
2183 /* Code inside the switch must take care to properly update
2184 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2185 doesn't simply leave using break */
0c7df902 2186 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2187 default:
f337b084
TH
2188 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2189 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2190 case '%':
49704364 2191 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2192
28be1210 2193 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2194 case '.':
2195 if (howlen == e_star) from = start;
2196 else if (len == 0) from = cur;
2197 else {
2198 tempsym_t *group = symptr;
2199
2200 while (--len && group) group = group->previous;
2201 from = group ? start + group->strbeg : start;
2202 }
2203 fromstr = NEXTFROM;
354b74ae 2204 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2205 goto resize;
28be1210 2206 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2207 case '@':
28be1210
TH
2208 from = start + symptr->strbeg;
2209 resize:
28be1210 2210 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2211 if (len >= 0) {
2212 while (len && from < cur) {
2213 from += UTF8SKIP(from);
2214 len--;
2215 }
2216 if (from > cur)
2217 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2218 if (len) {
2219 /* Here we know from == cur */
2220 grow:
2221 GROWING(0, cat, start, cur, len);
2222 Zero(cur, len, char);
2223 cur += len;
2224 } else if (from < cur) {
2225 len = cur - from;
2226 goto shrink;
2227 } else goto no_change;
2228 } else {
2229 cur = from;
2230 len = -len;
2231 goto utf8_shrink;
f337b084 2232 }
28be1210
TH
2233 else {
2234 len -= cur - from;
f337b084 2235 if (len > 0) goto grow;
28be1210 2236 if (len == 0) goto no_change;
fc241834 2237 len = -len;
28be1210 2238 goto shrink;
f337b084 2239 }
a6ec74c1 2240 break;
81d52ecd 2241
fc241834 2242 case '(': {
49704364 2243 tempsym_t savsym = *symptr;
66c611c5
MHM
2244 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2245 symptr->flags |= group_modifiers;
49704364
WL
2246 symptr->patend = savsym.grpend;
2247 symptr->level++;
f337b084 2248 symptr->previous = &lookahead;
18529408 2249 while (len--) {
f337b084
TH
2250 U32 was_utf8;
2251 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2252 else symptr->flags &= ~FLAG_PARSE_UTF8;
2253 was_utf8 = SvUTF8(cat);
49704364 2254 symptr->patptr = savsym.grpbeg;
f337b084
TH
2255 beglist = pack_rec(cat, symptr, beglist, endlist);
2256 if (SvUTF8(cat) != was_utf8)
2257 /* This had better be an upgrade while in utf8==0 mode */
2258 utf8 = 1;
2259
49704364 2260 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2261 break; /* No way to continue */
2262 }
ee790063 2263 items = endlist - beglist;
f337b084
TH
2264 lookahead.flags = symptr->flags & ~group_modifiers;
2265 goto no_change;
18529408 2266 }
62f95557
IZ
2267 case 'X' | TYPE_IS_SHRIEKING:
2268 if (!len) /* Avoid division by 0 */
2269 len = 1;
f337b084
TH
2270 if (utf8) {
2271 char *hop, *last;
e1b825c1 2272 SSize_t l = len;
f337b084
TH
2273 hop = last = start;
2274 while (hop < cur) {
2275 hop += UTF8SKIP(hop);
2276 if (--l == 0) {
2277 last = hop;
2278 l = len;
2279 }
2280 }
2281 if (last > cur)
2282 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2283 cur = last;
2284 break;
2285 }
2286 len = (cur-start) % len;
924ba076 2287 /* FALLTHROUGH */
a6ec74c1 2288 case 'X':
f337b084
TH
2289 if (utf8) {
2290 if (len < 1) goto no_change;
28be1210 2291 utf8_shrink:
f337b084
TH
2292 while (len > 0) {
2293 if (cur <= start)
28be1210
TH
2294 Perl_croak(aTHX_ "'%c' outside of string in pack",
2295 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2296 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2297 if (cur <= start)
28be1210
TH
2298 Perl_croak(aTHX_ "'%c' outside of string in pack",
2299 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2300 }
2301 len--;
2302 }
2303 } else {
fc241834 2304 shrink:
f337b084 2305 if (cur - start < len)
28be1210
TH
2306 Perl_croak(aTHX_ "'%c' outside of string in pack",
2307 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2308 cur -= len;
2309 }
2310 if (cur < start+symptr->strbeg) {
2311 /* Make sure group starts don't point into the void */
2312 tempsym_t *group;
9e27e96a 2313 const STRLEN length = cur-start;
f337b084
TH
2314 for (group = symptr;
2315 group && length < group->strbeg;
2316 group = group->previous) group->strbeg = length;
2317 lookahead.strbeg = length;
2318 }
a6ec74c1 2319 break;
fc241834 2320 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 2321 SSize_t ai32;
62f95557
IZ
2322 if (!len) /* Avoid division by 0 */
2323 len = 1;
230e1fce 2324 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2325 else ai32 = (cur - start) % len;
2326 if (ai32 == 0) goto no_change;
2327 len -= ai32;
2328 }
924ba076 2329 /* FALLTHROUGH */
a6ec74c1 2330 case 'x':
f337b084 2331 goto grow;
a6ec74c1
JH
2332 case 'A':
2333 case 'Z':
f337b084 2334 case 'a': {
f7fe979e 2335 const char *aptr;
f337b084 2336
a6ec74c1 2337 fromstr = NEXTFROM;
e62f0680 2338 aptr = SvPV_const(fromstr, fromlen);
f337b084 2339 if (DO_UTF8(fromstr)) {
f7fe979e 2340 const char *end, *s;
f337b084
TH
2341
2342 if (!utf8 && !SvUTF8(cat)) {
2343 marked_upgrade(aTHX_ cat, symptr);
2344 lookahead.flags |= FLAG_DO_UTF8;
2345 lookahead.strbeg = symptr->strbeg;
2346 utf8 = 1;
2347 start = SvPVX(cat);
2348 cur = start + SvCUR(cat);
2349 }
fc241834 2350 if (howlen == e_star) {
f337b084
TH
2351 if (utf8) goto string_copy;
2352 len = fromlen+1;
2353 }
2354 s = aptr;
2355 end = aptr + fromlen;
2356 fromlen = datumtype == 'Z' ? len-1 : len;
e1b825c1 2357 while ((SSize_t) fromlen > 0 && s < end) {
f337b084
TH
2358 s += UTF8SKIP(s);
2359 fromlen--;
2360 }
2361 if (s > end)
2362 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2363 if (utf8) {
fc241834 2364 len = fromlen;
f337b084
TH
2365 if (datumtype == 'Z') len++;
2366 fromlen = s-aptr;
2367 len += fromlen;
fc241834 2368
f337b084 2369 goto string_copy;
fc241834 2370 }
f337b084
TH
2371 fromlen = len - fromlen;
2372 if (datumtype == 'Z') fromlen--;
2373 if (howlen == e_star) {
2374 len = fromlen;
2375 if (datumtype == 'Z') len++;
fc241834 2376 }
f337b084 2377 GROWING(0, cat, start, cur, len);
9df874cd 2378 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2379 datumtype | TYPE_IS_PACK))
5637ef5b 2380 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
e1b825c1
Z
2381 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2382 (int)datumtype, aptr, end, cur, fromlen);
f337b084 2383 cur += fromlen;
a6ec74c1 2384 len -= fromlen;
f337b084
TH
2385 } else if (utf8) {
2386 if (howlen == e_star) {
2387 len = fromlen;
2388 if (datumtype == 'Z') len++;
a6ec74c1 2389 }
e1b825c1 2390 if (len <= (SSize_t) fromlen) {
f337b084
TH
2391 fromlen = len;
2392 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2393 }
fc241834 2394 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2395 upgrade, so:
2396 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2397 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2398 len -= fromlen;
2399 while (fromlen > 0) {
230e1fce 2400 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2401 aptr++;
2402 fromlen--;
fc241834 2403 }
f337b084
TH
2404 } else {
2405 string_copy:
2406 if (howlen == e_star) {
2407 len = fromlen;
2408 if (datumtype == 'Z') len++;
2409 }
e1b825c1 2410 if (len <= (SSize_t) fromlen) {
f337b084
TH
2411 fromlen = len;
2412 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2413 }
f337b084
TH
2414 GROWING(0, cat, start, cur, len);
2415 Copy(aptr, cur, fromlen, char);
2416 cur += fromlen;
2417 len -= fromlen;
a6ec74c1 2418 }
f337b084
TH
2419 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2420 cur += len;
3c4fb04a 2421 SvTAINT(cat);
a6ec74c1 2422 break;
f337b084 2423 }
a6ec74c1 2424 case 'B':
f337b084 2425 case 'b': {
b83604b4 2426 const char *str, *end;
e1b825c1 2427 SSize_t l, field_len;
f337b084
TH
2428 U8 bits;
2429 bool utf8_source;
2430 U32 utf8_flags;
a6ec74c1 2431
fc241834 2432 fromstr = NEXTFROM;
b83604b4 2433 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2434 end = str + fromlen;
2435 if (DO_UTF8(fromstr)) {
2436 utf8_source = TRUE;
041457d9 2437 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2438 } else {
2439 utf8_source = FALSE;
2440 utf8_flags = 0; /* Unused, but keep compilers happy */
2441 }
2442 if (howlen == e_star) len = fromlen;
2443 field_len = (len+7)/8;
2444 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2445 if (len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2446 bits = 0;
2447 l = 0;
2448 if (datumtype == 'B')
2449 while (l++ < len) {
2450 if (utf8_source) {
95b63a38 2451 UV val = 0;
f337b084
TH
2452 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2453 bits |= val & 1;
2454 } else bits |= *str++ & 1;
2455 if (l & 7) bits <<= 1;
fc241834 2456 else {
f337b084
TH
2457 PUSH_BYTE(utf8, cur, bits);
2458 bits = 0;
a6ec74c1
JH
2459 }
2460 }
f337b084
TH
2461 else
2462 /* datumtype == 'b' */
2463 while (l++ < len) {
2464 if (utf8_source) {
95b63a38 2465 UV val = 0;
f337b084
TH
2466 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2467 if (val & 1) bits |= 0x80;
2468 } else if (*str++ & 1)
2469 bits |= 0x80;
2470 if (l & 7) bits >>= 1;
fc241834 2471 else {
f337b084
TH
2472 PUSH_BYTE(utf8, cur, bits);
2473 bits = 0;
a6ec74c1
JH
2474 }
2475 }
f337b084
TH
2476 l--;
2477 if (l & 7) {
fc241834 2478 if (datumtype == 'B')
f337b084 2479 bits <<= 7 - (l & 7);
fc241834 2480 else
f337b084
TH
2481 bits >>= 7 - (l & 7);
2482 PUSH_BYTE(utf8, cur, bits);
2483 l += 7;
a6ec74c1 2484 }
f337b084
TH
2485 /* Determine how many chars are left in the requested field */
2486 l /= 8;
2487 if (howlen == e_star) field_len = 0;
2488 else field_len -= l;
2489 Zero(cur, field_len, char);
2490 cur += field_len;
a6ec74c1 2491 break;
f337b084 2492 }
a6ec74c1 2493 case 'H':
f337b084 2494 case 'h': {
10516c54 2495 const char *str, *end;
e1b825c1 2496 SSize_t l, field_len;
f337b084
TH
2497 U8 bits;
2498 bool utf8_source;
2499 U32 utf8_flags;
a6ec74c1 2500
fc241834 2501 fromstr = NEXTFROM;
10516c54 2502 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2503 end = str + fromlen;
2504 if (DO_UTF8(fromstr)) {
2505 utf8_source = TRUE;
041457d9 2506 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2507 } else {
2508 utf8_source = FALSE;
2509 utf8_flags = 0; /* Unused, but keep compilers happy */
2510 }
2511 if (howlen == e_star) len = fromlen;
2512 field_len = (len+1)/2;
2513 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2514 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2515 bits = 0;
2516 l = 0;
2517 if (datumtype == 'H')
2518 while (l++ < len) {
2519 if (utf8_source) {
95b63a38 2520 UV val = 0;
f337b084
TH
2521 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2522 if (val < 256 && isALPHA(val))
2523 bits |= (val + 9) & 0xf;
a6ec74c1 2524 else
f337b084
TH
2525 bits |= val & 0xf;
2526 } else if (isALPHA(*str))
2527 bits |= (*str++ + 9) & 0xf;
2528 else
2529 bits |= *str++ & 0xf;
2530 if (l & 1) bits <<= 4;
fc241834 2531 else {
f337b084
TH
2532 PUSH_BYTE(utf8, cur, bits);
2533 bits = 0;
a6ec74c1
JH
2534 }
2535 }
f337b084
TH
2536 else
2537 while (l++ < len) {
2538 if (utf8_source) {
95b63a38 2539 UV val = 0;
f337b084
TH
2540 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2541 if (val < 256 && isALPHA(val))
2542 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2543 else
f337b084
TH
2544 bits |= (val & 0xf) << 4;
2545 } else if (isALPHA(*str))
2546 bits |= ((*str++ + 9) & 0xf) << 4;
2547 else
2548 bits |= (*str++ & 0xf) << 4;
2549 if (l & 1) bits >>= 4;
fc241834 2550 else {
f337b084
TH
2551 PUSH_BYTE(utf8, cur, bits);
2552 bits = 0;
a6ec74c1 2553 }
fc241834 2554 }
f337b084
TH
2555 l--;
2556 if (l & 1) {
2557 PUSH_BYTE(utf8, cur, bits);
2558 l++;
2559 }
2560 /* Determine how many chars are left in the requested field */
2561 l /= 2;
2562 if (howlen == e_star) field_len = 0;
2563 else field_len -= l;
2564 Zero(cur, field_len, char);
2565 cur += field_len;
2566 break;
fc241834
RGS
2567 }
2568 case 'c':
f337b084
TH
2569 while (len-- > 0) {
2570 IV aiv;
2571 fromstr = NEXTFROM;
354b74ae 2572 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2573 if ((-128 > aiv || aiv > 127))
2574 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2575 "Character in 'c' format wrapped in pack");
585ec06d 2576 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2577 }
2578 break;
2579 case 'C':
f337b084
TH
2580 if (len == 0) {
2581 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2582 break;
2583 }
a6ec74c1 2584 while (len-- > 0) {
f337b084 2585 IV aiv;
a6ec74c1 2586 fromstr = NEXTFROM;
354b74ae 2587 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2588 if ((0 > aiv || aiv > 0xff))
2589 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2590 "Character in 'C' format wrapped in pack");
1651fc44 2591 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2592 }
fc241834
RGS
2593 break;
2594 case 'W': {
2595 char *end;
670f1322 2596 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2597
2598 end = start+SvLEN(cat)-1;
2599 if (utf8) end -= UTF8_MAXLEN-1;
2600 while (len-- > 0) {
2601 UV auv;
2602 fromstr = NEXTFROM;
354b74ae 2603 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2604 if (in_bytes) auv = auv % 0x100;
2605 if (utf8) {
2606 W_utf8:
bf4a926a 2607 if (cur >= end) {
fc241834 2608 *cur = '\0';
b162af07 2609 SvCUR_set(cat, cur - start);
fc241834
RGS
2610
2611 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2612 end = start+SvLEN(cat)-UTF8_MAXLEN;
2613 }
05fefba9 2614 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2615 } else {
2616 if (auv >= 0x100) {
2617 if (!SvUTF8(cat)) {
2618 *cur = '\0';
b162af07 2619 SvCUR_set(cat, cur - start);
fc241834
RGS
2620 marked_upgrade(aTHX_ cat, symptr);
2621 lookahead.flags |= FLAG_DO_UTF8;
2622 lookahead.strbeg = symptr->strbeg;
2623 utf8 = 1;
2624 start = SvPVX(cat);
2625 cur = start + SvCUR(cat);
2626 end = start+SvLEN(cat)-UTF8_MAXLEN;
2627 goto W_utf8;
2628 }
a2a5de95
NC
2629 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2630 "Character in 'W' format wrapped in pack");
fc241834
RGS
2631 auv &= 0xff;
2632 }
2633 if (cur >= end) {
2634 *cur = '\0';
b162af07 2635 SvCUR_set(cat, cur - start);
fc241834
RGS
2636 GROWING(0, cat, start, cur, len+1);
2637 end = start+SvLEN(cat)-1;
2638 }
fe2774ed 2639 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2640 }
2641 }
2642 break;
fc241834
RGS
2643 }
2644 case 'U': {
2645 char *end;
2646
2647 if (len == 0) {
2648 if (!(symptr->flags & FLAG_DO_UTF8)) {
2649 marked_upgrade(aTHX_ cat, symptr);
2650 lookahead.flags |= FLAG_DO_UTF8;
2651 lookahead.strbeg = symptr->strbeg;
2652 }
2653 utf8 = 0;
2654 goto no_change;
2655 }
2656
2657 end = start+SvLEN(cat);
2658 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2659 while (len-- > 0) {
fc241834 2660 UV auv;
a6ec74c1 2661 fromstr = NEXTFROM;
354b74ae 2662 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2663 if (utf8) {
81b3f640 2664 U8 buffer[UTF8_MAXLEN+1], *endb;
05fefba9 2665 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2666 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2667 *cur = '\0';
b162af07 2668 SvCUR_set(cat, cur - start);
fc241834
RGS
2669 GROWING(0, cat, start, cur,
2670 len+(endb-buffer)*UTF8_EXPAND);
2671 end = start+SvLEN(cat);
2672 }
d21d7215 2673 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2674 } else {
2675 if (cur >= end) {
2676 *cur = '\0';
b162af07 2677 SvCUR_set(cat, cur - start);
fc241834
RGS
2678 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2679 end = start+SvLEN(cat)-UTF8_MAXLEN;
2680 }
05fefba9
KW
2681 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2682 UNI_TO_NATIVE(auv),
2683 0);
fc241834 2684 }
a6ec74c1 2685 }
a6ec74c1 2686 break;
fc241834 2687 }
a6ec74c1
JH
2688 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2689 case 'f':
a6ec74c1 2690 while (len-- > 0) {
f337b084
TH
2691 float afloat;
2692 NV anv;
a6ec74c1 2693 fromstr = NEXTFROM;
f337b084 2694 anv = SvNV(fromstr);
a7157111 2695# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2697 * on Alpha; fake it if we don't have them.
2698 */
f337b084 2699 if (anv > FLT_MAX)
fc241834 2700 afloat = FLT_MAX;
f337b084 2701 else if (anv < -FLT_MAX)
fc241834 2702 afloat = -FLT_MAX;
f337b084 2703 else afloat = (float)anv;
baf3cf9c 2704# else
a7157111 2705# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2706 if(Perl_isnan(anv))
2707 afloat = (float)NV_NAN;
2708 else
a7157111
JH
2709# endif
2710# ifdef NV_INF
919894b7
DM
2711 /* a simple cast to float is undefined if outside
2712 * the range of values that can be represented */
2713 afloat = (float)(anv > FLT_MAX ? NV_INF :
2714 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2715# endif
baf3cf9c 2716# endif
3a88beaa 2717 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2718 }
2719 break;
2720 case 'd':
a6ec74c1 2721 while (len-- > 0) {
f337b084
TH
2722 double adouble;
2723 NV anv;
a6ec74c1 2724 fromstr = NEXTFROM;
f337b084 2725 anv = SvNV(fromstr);
a7157111 2726# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2727 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2728 * on Alpha; fake it if we don't have them.
2729 */
f337b084 2730 if (anv > DBL_MAX)
fc241834 2731 adouble = DBL_MAX;
f337b084 2732 else if (anv < -DBL_MAX)
fc241834 2733 adouble = -DBL_MAX;
f337b084 2734 else adouble = (double)anv;
baf3cf9c 2735# else
f337b084 2736 adouble = (double)anv;
baf3cf9c 2737# endif
3a88beaa 2738 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2739 }
2740 break;
fc241834 2741 case 'F': {
275663fa 2742 NV_bytes anv;
1109a392 2743 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2744 while (len-- > 0) {
2745 fromstr = NEXTFROM;
cd07c537
DM
2746#ifdef __GNUC__
2747 /* to work round a gcc/x86 bug; don't use SvNV */
2748 anv.nv = sv_2nv(fromstr);
070e2677
TC
2749# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2750 && LONG_DOUBLESIZE > 10
2751 /* GCC sometimes overwrites the padding in the
2752 assignment above */
2753 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2754# endif
cd07c537 2755#else
275663fa 2756 anv.nv = SvNV(fromstr);
cd07c537 2757#endif
3a88beaa 2758 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2759 }
2760 break;
fc241834 2761 }
92d41999 2762#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2763 case 'D': {
275663fa 2764 ld_bytes aldouble;
1109a392
MHM
2765 /* long doubles can have unused bits, which may be nonzero */
2766 Zero(&aldouble, 1, long double);
92d41999
JH
2767 while (len-- > 0) {
2768 fromstr = NEXTFROM;
cd07c537
DM
2769# ifdef __GNUC__
2770 /* to work round a gcc/x86 bug; don't use SvNV */
2771 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2772# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2773 /* GCC sometimes overwrites the padding in the
2774 assignment above */
2775 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2776# endif
cd07c537 2777# else
275663fa 2778 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2779# endif
3a88beaa
NC
2780 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2781 needs_swap);
92d41999
JH
2782 }
2783 break;
fc241834 2784 }
92d41999 2785#endif
068bd2e7 2786 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2787 case 'n':
2788 while (len-- > 0) {
f337b084 2789 I16 ai16;
a6ec74c1 2790 fromstr = NEXTFROM;
354b74ae 2791 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2792 ai16 = PerlSock_htons(ai16);
3a88beaa 2793 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2794 }
2795 break;
068bd2e7 2796 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2797 case 'v':
2798 while (len-- > 0) {
f337b084 2799 I16 ai16;
a6ec74c1 2800 fromstr = NEXTFROM;
354b74ae 2801 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2802 ai16 = htovs(ai16);
3a88beaa 2803 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2804 }
2805 break;
49704364 2806 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2807#if SHORTSIZE != SIZE16
fc241834 2808 while (len-- > 0) {
f337b084 2809 unsigned short aushort;
fc241834 2810 fromstr = NEXTFROM;
354b74ae 2811 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2812 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2813 }
49704364
WL
2814 break;
2815#else
924ba076 2816 /* FALLTHROUGH */
a6ec74c1 2817#endif
49704364 2818 case 'S':
fc241834 2819 while (len-- > 0) {
f337b084 2820 U16 au16;
fc241834 2821 fromstr = NEXTFROM;
354b74ae 2822 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2823 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2824 }
2825 break;
49704364 2826 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2827#if SHORTSIZE != SIZE16
fc241834 2828 while (len-- > 0) {
f337b084 2829 short ashort;
fc241834 2830 fromstr = NEXTFROM;
354b74ae 2831 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2832 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2833 }
49704364
WL
2834 break;
2835#else
924ba076 2836 /* FALLTHROUGH */
a6ec74c1 2837#endif
49704364
WL
2838 case 's':
2839 while (len-- > 0) {
f337b084 2840 I16 ai16;
49704364 2841 fromstr = NEXTFROM;
354b74ae 2842 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2843 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2844 }
2845 break;
2846 case 'I':
49704364 2847 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2848 while (len-- > 0) {
f337b084 2849 unsigned int auint;
a6ec74c1 2850 fromstr = NEXTFROM;
354b74ae 2851 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2852 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2853 }
2854 break;
92d41999
JH
2855 case 'j':
2856 while (len-- > 0) {
f337b084 2857 IV aiv;
92d41999 2858 fromstr = NEXTFROM;
354b74ae 2859 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2860 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2861 }
2862 break;
2863 case 'J':
2864 while (len-- > 0) {
f337b084 2865 UV auv;
92d41999 2866 fromstr = NEXTFROM;
354b74ae 2867 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2868 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2869 }
2870 break;
a6ec74c1
JH
2871 case 'w':
2872 while (len-- > 0) {
f337b084 2873 NV anv;
a6ec74c1 2874 fromstr = NEXTFROM;
83388ae2 2875 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2876 anv = SvNV_nomg(fromstr);
a6ec74c1 2877
f337b084
TH
2878 if (anv < 0) {
2879 *cur = '\0';
b162af07 2880 SvCUR_set(cat, cur - start);
49704364 2881 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2882 }
a6ec74c1 2883
196b62db
NC
2884 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2885 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2886 any negative IVs will have already been got by the croak()
2887 above. IOK is untrue for fractions, so we test them
2888 against UV_MAX_P1. */
f337b084
TH
2889 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2890 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2891 char *in = buf + sizeof(buf);
1e9a122e 2892 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2893
2894 do {
eb160463 2895 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2896 auv >>= 7;
2897 } while (auv);
2898 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2899 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2900 in, (buf + sizeof(buf)) - in);
2901 } else if (SvPOKp(fromstr))
2902 goto w_string;
a6ec74c1 2903 else if (SvNOKp(fromstr)) {
0258719b 2904 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2905 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2906 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2907 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2908 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2909 Some C compilers are strict about integral constant
2910 expressions so we conservatively divide by a slightly
2911 smaller integer instead of multiplying by the exact
2912 floating-point value.
0258719b
NC
2913 */
2914#ifdef NV_MAX_10_EXP
f337b084 2915 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2916 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2917#else
f337b084 2918 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2919 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2920#endif
a6ec74c1
JH
2921 char *in = buf + sizeof(buf);
2922
8b6e33c7 2923 anv = Perl_floor(anv);
a6ec74c1 2924 do {
8b6e33c7 2925 const NV next = Perl_floor(anv / 128);
a6ec74c1 2926 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2927 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2928 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2929 anv = next;
2930 } while (anv > 0);
a6ec74c1 2931 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2932 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2933 in, (buf + sizeof(buf)) - in);
2934 } else {
8b6e33c7
AL
2935 const char *from;
2936 char *result, *in;
735b914b
JH
2937 SV *norm;
2938 STRLEN len;
2939 bool done;
2940
f337b084 2941 w_string:
735b914b 2942 /* Copy string and check for compliance */
1e9a122e 2943 from = SvPV_nomg_const(fromstr, len);
735b914b 2944 if ((norm = is_an_int(from, len)) == NULL)
49704364 2945 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2946
a02a5408 2947 Newx(result, len, char);
735b914b
JH
2948 in = result + len;
2949 done = FALSE;
f337b084 2950 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2951 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2952 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2953 in, (result + len) - in);
735b914b
JH
2954 Safefree(result);
2955 SvREFCNT_dec(norm); /* free norm */
fc241834 2956 }
a6ec74c1
JH
2957 }
2958 break;
2959 case 'i':
49704364 2960 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2961 while (len-- > 0) {
f337b084 2962 int aint;
a6ec74c1 2963 fromstr = NEXTFROM;
354b74ae 2964 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2965 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2966 }
2967 break;
068bd2e7 2968 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2969 case 'N':
2970 while (len-- > 0) {
f337b084 2971 U32 au32;
a6ec74c1 2972 fromstr = NEXTFROM;
354b74ae 2973 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2974 au32 = PerlSock_htonl(au32);
3a88beaa 2975 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2976 }
2977 break;
068bd2e7 2978 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2979 case 'V':
2980 while (len-- > 0) {
f337b084 2981 U32 au32;
a6ec74c1 2982 fromstr = NEXTFROM;
354b74ae 2983 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2984 au32 = htovl(au32);
3a88beaa 2985 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2986 }
2987 break;
49704364 2988 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2989#if LONGSIZE != SIZE32
fc241834 2990 while (len-- > 0) {
f337b084 2991 unsigned long aulong;
fc241834 2992 fromstr = NEXTFROM;
354b74ae 2993 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2994 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2995 }
49704364
WL
2996 break;
2997#else
2998 /* Fall though! */
a6ec74c1 2999#endif
49704364 3000 case 'L':
fc241834 3001 while (len-- > 0) {
f337b084 3002 U32 au32;
fc241834 3003 fromstr = NEXTFROM;
354b74ae 3004 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3005 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
3006 }
3007 break;
49704364 3008 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3009#if LONGSIZE != SIZE32
fc241834 3010 while (len-- > 0) {
f337b084 3011 long along;
fc241834 3012 fromstr = NEXTFROM;
354b74ae 3013 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3014 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3015 }
49704364
WL
3016 break;
3017#else
3018 /* Fall though! */
a6ec74c1 3019#endif
49704364
WL
3020 case 'l':
3021 while (len-- > 0) {
f337b084 3022 I32 ai32;
49704364 3023 fromstr = NEXTFROM;
354b74ae 3024 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3025 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3026 }
3027 break;
c174bf3b 3028#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3029 case 'Q':
3030 while (len-- > 0) {
f337b084 3031 Uquad_t auquad;
a6ec74c1 3032 fromstr = NEXTFROM;
354b74ae 3033 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3034 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3035 }
3036 break;
3037 case 'q':
3038 while (len-- > 0) {
f337b084 3039 Quad_t aquad;
a6ec74c1 3040 fromstr = NEXTFROM;
354b74ae 3041 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3042 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3043 }
3044 break;
1640b983 3045#endif
a6ec74c1
JH
3046 case 'P':
3047 len = 1; /* assume SV is correct length */
f337b084 3048 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3049 /* FALLTHROUGH */
a6ec74c1
JH
3050 case 'p':
3051 while (len-- > 0) {
83003860 3052 const char *aptr;
f337b084 3053
a6ec74c1 3054 fromstr = NEXTFROM;
28a4f200
TH
3055 SvGETMAGIC(fromstr);
3056 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3057 else {
a6ec74c1
JH
3058 /* XXX better yet, could spirit away the string to
3059 * a safe spot and hang on to it until the result
3060 * of pack() (and all copies of the result) are
3061 * gone.
3062 */
b199a398
FC
3063 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3064 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3065 !SvREADONLY(fromstr)))) {
3066 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3067 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3068 }
3069 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3070 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3071 else
2596d9fe 3072 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3073 }
3a88beaa 3074 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3075 }
3076 break;
fc241834 3077 case 'u': {
f7fe979e 3078 const char *aptr, *aend;
fc241834 3079 bool from_utf8;
f337b084 3080
a6ec74c1 3081 fromstr = NEXTFROM;
fc241834
RGS
3082 if (len <= 2) len = 45;
3083 else len = len / 3 * 3;
3084 if (len >= 64) {
a2a5de95
NC
3085 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3086 "Field too wide in 'u' format in pack");
fc241834
RGS
3087 len = 63;
3088 }
83003860 3089 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3090 from_utf8 = DO_UTF8(fromstr);
3091 if (from_utf8) {
3092 aend = aptr + fromlen;
3f63b0e5 3093 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3094 } else aend = NULL; /* Unused, but keep compilers happy */
3095 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3096 while (fromlen > 0) {
fc241834 3097 U8 *end;
e1b825c1 3098 SSize_t todo;
fc241834 3099 U8 hunk[1+63/3*4+1];
a6ec74c1 3100
e1b825c1 3101 if ((SSize_t)fromlen > len)
a6ec74c1
JH
3102 todo = len;
3103 else
3104 todo = fromlen;
fc241834
RGS
3105 if (from_utf8) {
3106 char buffer[64];
9df874cd 3107 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3108 'u' | TYPE_IS_PACK)) {
3109 *cur = '\0';
b162af07 3110 SvCUR_set(cat, cur - start);
5637ef5b 3111 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
e1b825c1
Z
3112 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3113 aptr, aend, buffer, todo);
fc241834 3114 }
e68aed92 3115 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3116 } else {
e68aed92 3117 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3118 aptr += todo;
3119 }
3a88beaa 3120 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3121 fromlen -= todo;
3122 }
a6ec74c1
JH
3123 break;
3124 }
f337b084
TH
3125 }
3126 *cur = '\0';
b162af07 3127 SvCUR_set(cat, cur - start);
f337b084 3128 no_change:
49704364 3129 *symptr = lookahead;
a6ec74c1 3130 }
49704364 3131 return beglist;
18529408
IZ
3132}
3133#undef NEXTFROM
3134
3135
3136PP(pp_pack)
3137{
20b7effb 3138 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3139 SV *cat = TARG;
18529408 3140 STRLEN fromlen;
349d4f2f 3141 SV *pat_sv = *++MARK;
eb578fdb
KW
3142 const char *pat = SvPV_const(pat_sv, fromlen);
3143 const char *patend = pat + fromlen;
18529408
IZ
3144
3145 MARK++;
e8f7c79f 3146 SvPVCLEAR(cat);
f337b084 3147 SvUTF8_off(cat);
18529408 3148
7accc089 3149 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3150
fd879d93
KW
3151 if (SvUTF8(cat)) {
3152 STRLEN result_len;
3153 const char * result = SvPV_nomg(cat, result_len);
3154 const U8 * error_pos;
3155
3156 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3157 _force_out_malformed_utf8_message(error_pos,
3158 (U8 *) result + result_len,
3159 0, /* no flags */
3160 1 /* Die */
3161 );
3162 NOT_REACHED; /* NOTREACHED */
3163 }
3164 }
3165
a6ec74c1
JH
3166 SvSETMAGIC(cat);
3167 SP = ORIGMARK;
3168 PUSHs(cat);
3169 RETURN;
3170}
a6ec74c1 3171
73cb7263 3172/*
14d04a33 3173 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3174 */