This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
HACKERS: Add info about Hints and Warnings
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
e6006968
KW
215#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
216 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 217
66c611c5 218/* type modifiers */
62f95557 219#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
220#define TYPE_IS_BIG_ENDIAN 0x200
221#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 222#define TYPE_IS_PACK 0x800
1109a392 223#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 224#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
225#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
226
7212898e
NC
227# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
228# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
229
230# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
231
78d46eaa 232#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 233#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
234#define PACK_SIZE_MASK 0x3F
235
4ad176ef 236#include "packsizetables.inc"
78d46eaa 237
20aa3a38
NC
238static void
239S_reverse_copy(const char *src, char *dest, STRLEN len)
240{
241 dest += len;
242 while (len--)
243 *--dest = *src++;
244}
245
08ca2aa3 246STATIC U8
9df874cd 247utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 248{
08ca2aa3 249 STRLEN retlen;
df337714
KW
250 UV val;
251
252 if (*s >= end) {
253 goto croak;
254 }
255 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
257 if (retlen == (STRLEN) -1)
258 croak:
f337b084
TH
259 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
260 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 261 if (val >= 0x100) {
a2a5de95
NC
262 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
263 "Character in '%c' format wrapped in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
265 val &= 0xff;
266 }
267 *s += retlen;
fe2774ed 268 return (U8)val;
08ca2aa3
TH
269}
270
f337b084 271#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 272 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
273 *(U8 *)(s)++)
274
275STATIC bool
e1b825c1 276S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
277{
278 UV val;
279 STRLEN retlen;
f7fe979e 280 const char *from = *s;
08ca2aa3 281 int bad = 0;
f7fe979e 282 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 283 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
284 const bool needs_swap = NEEDS_SWAP(datumtype);
285
228e69a7 286 if (UNLIKELY(needs_swap))
20aa3a38
NC
287 buf += buf_len;
288
08ca2aa3
TH
289 for (;buf_len > 0; buf_len--) {
290 if (from >= end) return FALSE;
f337b084 291 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 292 if (retlen == (STRLEN) -1) {
18c47def 293 from += UTF8_SAFE_SKIP(from, end);
08ca2aa3
TH
294 bad |= 1;
295 } else from += retlen;
296 if (val >= 0x100) {
297 bad |= 2;
298 val &= 0xff;
299 }
228e69a7 300 if (UNLIKELY(needs_swap))
20aa3a38
NC
301 *(U8 *)--buf = (U8)val;
302 else
303 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
304 }
305 /* We have enough characters for the buffer. Did we have problems ? */
306 if (bad) {
307 if (bad & 1) {
308 /* Rewalk the string fragment while warning */
f7fe979e 309 const char *ptr;
e1b825c1 310 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
311 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
312 if (ptr >= end) break;
c80e42f3 313 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
314 }
315 if (from > end) from = end;
316 }
a2a5de95
NC
317 if ((bad & 2))
318 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 319 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
320 "Character(s) in '%c' format wrapped in %s",
321 (int) TYPE_NO_MODIFIERS(datumtype),
322 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
323 }
324 *s = from;
325 return TRUE;
326}
327
64844641 328STATIC char *
d21d7215
KW
329S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
330 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 331
228e69a7 332 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
333 const U8 *p = start + len;
334 while (p-- > start) {
55d09dc8 335 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
336 }
337 } else {
338 const U8 * const end = start + len;
339 while (start < end) {
55d09dc8 340 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
341 start++;
342 }
f337b084 343 }
64844641 344 return dest;
f337b084
TH
345}
346
3a88beaa 347#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 348STMT_START { \
228e69a7 349 if (UNLIKELY(utf8)) \
d21d7215 350 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 351 else { \
228e69a7 352 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
353 S_reverse_copy((char *)(buf), cur, len); \
354 else \
355 Copy(buf, cur, len, char); \
230e1fce
NC
356 (cur) += (len); \
357 } \
f337b084
TH
358} STMT_END
359
f17fed50
TC
360#define SAFE_UTF8_EXPAND(var) \
361STMT_START { \
362 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
363 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
364 (var) = (var) * UTF8_EXPAND; \
365} STMT_END
366
367#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
368STMT_START { \
369 if (SSize_t_MAX / (item_size) < (item_count)) \
370 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
371 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
372} STMT_END
373
f337b084
TH
374#define GROWING(utf8, cat, start, cur, in_len) \
375STMT_START { \
376 STRLEN glen = (in_len); \
f17fed50
TC
377 STRLEN catcur = (STRLEN)((cur) - (start)); \
378 if (utf8) SAFE_UTF8_EXPAND(glen); \
379 if (SSize_t_MAX - glen < catcur) \
380 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
381 if (catcur + glen >= SvLEN(cat)) { \
0bd48802 382 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
383 (cur) = (start) + SvCUR(cat); \
384 } \
385} STMT_END
386
387#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
388STMT_START { \
f7fe979e 389 const STRLEN glen = (in_len); \
f337b084 390 STRLEN gl = glen; \
f17fed50 391 if (utf8) SAFE_UTF8_EXPAND(gl); \
f337b084
TH
392 if ((cur) + gl >= (start) + SvLEN(cat)) { \
393 *cur = '\0'; \
b162af07 394 SvCUR_set((cat), (cur) - (start)); \
0bd48802 395 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
396 (cur) = (start) + SvCUR(cat); \
397 } \
3a88beaa 398 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
399} STMT_END
400
401#define PUSH_BYTE(utf8, s, byte) \
402STMT_START { \
403 if (utf8) { \
f7fe979e 404 const U8 au8 = (byte); \
d21d7215 405 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
406 } else *(U8 *)(s)++ = (byte); \
407} STMT_END
408
409/* Only to be used inside a loop (see the break) */
410#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
411STMT_START { \
412 STRLEN retlen; \
413 if (str >= end) break; \
414 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 415 if (retlen == (STRLEN) -1) { \
f337b084
TH
416 *cur = '\0'; \
417 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
418 } \
419 str += retlen; \
420} STMT_END
421
f7fe979e
AL
422static const char *_action( const tempsym_t* symptr )
423{
10edeb5d 424 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
425}
426
206947d2 427/* Returns the sizeof() struct described by pat */
e1b825c1 428STATIC SSize_t
f337b084 429S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 430{
e1b825c1 431 SSize_t total = 0;
206947d2 432
7918f24d
NC
433 PERL_ARGS_ASSERT_MEASURE_STRUCT;
434
49704364 435 while (next_symbol(symptr)) {
e1b825c1 436 SSize_t len, size;
f337b084
TH
437
438 switch (symptr->howlen) {
fc241834 439 case e_star:
49704364 440 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 441 _action( symptr ) );
81d52ecd 442
f337b084
TH
443 default:
444 /* e_no_len and e_number */
445 len = symptr->length;
446 break;
49704364
WL
447 }
448
a7a3cfaa 449 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 450 if (!size) {
e1b825c1 451 SSize_t star;
80a13697
NC
452 /* endianness doesn't influence the size of a type */
453 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
454 default:
455 Perl_croak(aTHX_ "Invalid type '%c' in %s",
456 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 457 _action( symptr ) );
28be1210
TH
458 case '.' | TYPE_IS_SHRIEKING:
459 case '@' | TYPE_IS_SHRIEKING:
80a13697 460 case '@':
28be1210 461 case '.':
80a13697
NC
462 case '/':
463 case 'U': /* XXXX Is it correct? */
464 case 'w':
465 case 'u':
466 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 467 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 468 _action( symptr ) );
80a13697
NC
469 case '%':
470 size = 0;
471 break;
472 case '(':
fc241834
RGS
473 {
474 tempsym_t savsym = *symptr;
475 symptr->patptr = savsym.grpbeg;
476 symptr->patend = savsym.grpend;
477 /* XXXX Theoretically, we need to measure many times at
478 different positions, since the subexpression may contain
479 alignment commands, but be not of aligned length.
480 Need to detect this and croak(). */
481 size = measure_struct(symptr);
482 *symptr = savsym;
483 break;
484 }
80a13697
NC
485 case 'X' | TYPE_IS_SHRIEKING:
486 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
487 */
488 if (!len) /* Avoid division by 0 */
489 len = 1;
490 len = total % len; /* Assumed: the start is aligned. */
924ba076 491 /* FALLTHROUGH */
80a13697
NC
492 case 'X':
493 size = -1;
494 if (total < len)
f7fe979e 495 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
496 break;
497 case 'x' | TYPE_IS_SHRIEKING:
498 if (!len) /* Avoid division by 0 */
499 len = 1;
500 star = total % len; /* Assumed: the start is aligned. */
501 if (star) /* Other portable ways? */
502 len = len - star;
503 else
504 len = 0;
924ba076 505 /* FALLTHROUGH */
80a13697
NC
506 case 'x':
507 case 'A':
508 case 'Z':
509 case 'a':
80a13697
NC
510 size = 1;
511 break;
512 case 'B':
513 case 'b':
514 len = (len + 7)/8;
515 size = 1;
516 break;
517 case 'H':
518 case 'h':
519 len = (len + 1)/2;
520 size = 1;
521 break;
78d46eaa 522
80a13697
NC
523 case 'P':
524 len = 1;
525 size = sizeof(char*);
78d46eaa
NC
526 break;
527 }
206947d2
IZ
528 }
529 total += len * size;
530 }
531 return total;
532}
533
49704364
WL
534
535/* locate matching closing parenthesis or bracket
536 * returns char pointer to char after match, or NULL
537 */
f7fe979e 538STATIC const char *
5aaab254 539S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 540{
7918f24d
NC
541 PERL_ARGS_ASSERT_GROUP_END;
542
49704364 543 while (patptr < patend) {
f7fe979e 544 const char c = *patptr++;
49704364
WL
545
546 if (isSPACE(c))
547 continue;
548 else if (c == ender)
549 return patptr-1;
550 else if (c == '#') {
551 while (patptr < patend && *patptr != '\n')
552 patptr++;
553 continue;
554 } else if (c == '(')
555 patptr = group_end(patptr, patend, ')') + 1;
556 else if (c == '[')
557 patptr = group_end(patptr, patend, ']') + 1;
18529408 558 }
49704364
WL
559 Perl_croak(aTHX_ "No group ending character '%c' found in template",
560 ender);
a25b5927 561 NOT_REACHED; /* NOTREACHED */
18529408
IZ
562}
563
49704364
WL
564
565/* Convert unsigned decimal number to binary.
566 * Expects a pointer to the first digit and address of length variable
567 * Advances char pointer to 1st non-digit char and returns number
fc241834 568 */
f7fe979e 569STATIC const char *
e1b825c1 570S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 571{
e1b825c1 572 SSize_t len = *patptr++ - '0';
7918f24d
NC
573
574 PERL_ARGS_ASSERT_GET_NUM;
575
49704364 576 while (isDIGIT(*patptr)) {
e1b825c1
Z
577 SSize_t nlen = (len * 10) + (*patptr++ - '0');
578 if (nlen < 0 || nlen/10 != len)
49704364 579 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 580 len = nlen;
49704364
WL
581 }
582 *lenptr = len;
583 return patptr;
584}
585
586/* The marvellous template parsing routine: Using state stored in *symptr,
587 * locates next template code and count
588 */
589STATIC bool
f337b084 590S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 591{
f7fe979e 592 const char* patptr = symptr->patptr;
0bcc34c2 593 const char* const patend = symptr->patend;
49704364 594
7918f24d
NC
595 PERL_ARGS_ASSERT_NEXT_SYMBOL;
596
49704364
WL
597 symptr->flags &= ~FLAG_SLASH;
598
599 while (patptr < patend) {
600 if (isSPACE(*patptr))
601 patptr++;
602 else if (*patptr == '#') {
603 patptr++;
604 while (patptr < patend && *patptr != '\n')
605 patptr++;
606 if (patptr < patend)
607 patptr++;
608 } else {
fc241834 609 /* We should have found a template code */
49704364 610 I32 code = *patptr++ & 0xFF;
66c611c5 611 U32 inherited_modifiers = 0;
49704364
WL
612
613 if (code == ','){ /* grandfather in commas but with a warning */
614 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
615 symptr->flags |= FLAG_COMMA;
616 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 617 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
618 }
619 continue;
620 }
fc241834 621
49704364 622 /* for '(', skip to ')' */
fc241834 623 if (code == '(') {
49704364
WL
624 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
625 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 626 _action( symptr ) );
49704364
WL
627 symptr->grpbeg = patptr;
628 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
629 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
630 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 631 _action( symptr ) );
49704364
WL
632 }
633
66c611c5
MHM
634 /* look for group modifiers to inherit */
635 if (TYPE_ENDIANNESS(symptr->flags)) {
636 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
637 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
638 }
639
1109a392
MHM
640 /* look for modifiers */
641 while (patptr < patend) {
b7787f18
AL
642 const char *allowed;
643 I32 modifier;
1109a392
MHM
644 switch (*patptr) {
645 case '!':
646 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 647 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
648 break;
649 case '>':
650 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 651 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
652 break;
653 case '<':
654 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 655 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
656 break;
657 default:
b7787f18
AL
658 allowed = "";
659 modifier = 0;
1109a392
MHM
660 break;
661 }
66c611c5 662
1109a392
MHM
663 if (modifier == 0)
664 break;
66c611c5 665
1109a392
MHM
666 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
667 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 668 allowed, _action( symptr ) );
66c611c5
MHM
669
670 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 671 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 672 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
673 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
674 TYPE_ENDIANNESS_MASK)
675 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 676 *patptr, _action( symptr ) );
66c611c5 677
a2a5de95
NC
678 if ((code & modifier)) {
679 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
680 "Duplicate modifier '%c' after '%c' in %s",
681 *patptr, (int) TYPE_NO_MODIFIERS(code),
682 _action( symptr ) );
1109a392 683 }
66c611c5 684
1109a392
MHM
685 code |= modifier;
686 patptr++;
49704364
WL
687 }
688
66c611c5
MHM
689 /* inherit modifiers */
690 code |= inherited_modifiers;
691
fc241834 692 /* look for count and/or / */
49704364
WL
693 if (patptr < patend) {
694 if (isDIGIT(*patptr)) {
695 patptr = get_num( patptr, &symptr->length );
696 symptr->howlen = e_number;
697
698 } else if (*patptr == '*') {
699 patptr++;
700 symptr->howlen = e_star;
701
702 } else if (*patptr == '[') {
f7fe979e 703 const char* lenptr = ++patptr;
49704364
WL
704 symptr->howlen = e_number;
705 patptr = group_end( patptr, patend, ']' ) + 1;
706 /* what kind of [] is it? */
707 if (isDIGIT(*lenptr)) {
708 lenptr = get_num( lenptr, &symptr->length );
709 if( *lenptr != ']' )
710 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 711 _action( symptr ) );
49704364
WL
712 } else {
713 tempsym_t savsym = *symptr;
714 symptr->patend = patptr-1;
715 symptr->patptr = lenptr;
716 savsym.length = measure_struct(symptr);
717 *symptr = savsym;
718 }
719 } else {
720 symptr->howlen = e_no_len;
721 symptr->length = 1;
722 }
723
724 /* try to find / */
725 while (patptr < patend) {
726 if (isSPACE(*patptr))
727 patptr++;
728 else if (*patptr == '#') {
729 patptr++;
730 while (patptr < patend && *patptr != '\n')
731 patptr++;
732 if (patptr < patend)
733 patptr++;
734 } else {
66c611c5 735 if (*patptr == '/') {
49704364
WL
736 symptr->flags |= FLAG_SLASH;
737 patptr++;
66c611c5
MHM
738 if (patptr < patend &&
739 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 740 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 741 _action( symptr ) );
49704364
WL
742 }
743 break;
744 }
18529408 745 }
49704364
WL
746 } else {
747 /* at end - no count, no / */
748 symptr->howlen = e_no_len;
749 symptr->length = 1;
750 }
751
752 symptr->code = code;
fc241834 753 symptr->patptr = patptr;
49704364 754 return TRUE;
18529408 755 }
49704364 756 }
fc241834 757 symptr->patptr = patptr;
49704364 758 return FALSE;
18529408
IZ
759}
760
18529408 761/*
fc241834 762 There is no way to cleanly handle the case where we should process the
08ca2aa3 763 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
764 (e.g. estimates like strend-s as an upper bound for the number of
765 characters left wouldn't work). So if we foresee the need of this
766 (pattern starts with U or contains U0), we want to work on the encoded
767 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
768 themselves if they need to do a lot of unpacks like this on it
769*/
fc241834 770STATIC bool
08ca2aa3
TH
771need_utf8(const char *pat, const char *patend)
772{
773 bool first = TRUE;
7918f24d
NC
774
775 PERL_ARGS_ASSERT_NEED_UTF8;
776
08ca2aa3
TH
777 while (pat < patend) {
778 if (pat[0] == '#') {
779 pat++;
f7fe979e 780 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
781 if (!pat) return FALSE;
782 } else if (pat[0] == 'U') {
783 if (first || pat[1] == '0') return TRUE;
784 } else first = FALSE;
785 pat++;
786 }
787 return FALSE;
788}
789
790STATIC char
791first_symbol(const char *pat, const char *patend) {
7918f24d
NC
792 PERL_ARGS_ASSERT_FIRST_SYMBOL;
793
08ca2aa3
TH
794 while (pat < patend) {
795 if (pat[0] != '#') return pat[0];
796 pat++;
f7fe979e 797 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
798 if (!pat) return 0;
799 pat++;
800 }
801 return 0;
802}
803
804/*
dcccc8ff
KW
805
806=head1 Pack and Unpack
807
7accc089
JH
808=for apidoc unpackstring
809
796b6530 810The engine implementing the C<unpack()> Perl function.
21ebfc7a 811
796b6530
KW
812Using the template C<pat..patend>, this function unpacks the string
813C<s..strend> into a number of mortal SVs, which it pushes onto the perl
814argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 815C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
816pushed elements.
817
796b6530
KW
818The C<strend> and C<patend> pointers should point to the byte following the
819last character of each string.
21ebfc7a
DM
820
821Although this function returns its values on the perl argument stack, it
822doesn't take any parameters from that stack (and thus in particular
796b6530 823there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 824example).
7accc089
JH
825
826=cut */
827
e1b825c1 828SSize_t
f7fe979e 829Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 830{
f7fe979e 831 tempsym_t sym;
08ca2aa3 832
7918f24d
NC
833 PERL_ARGS_ASSERT_UNPACKSTRING;
834
f337b084 835 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
836 else if (need_utf8(pat, patend)) {
837 /* We probably should try to avoid this in case a scalar context call
838 wouldn't get to the "U0" */
839 STRLEN len = strend - s;
230e1fce 840 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
841 SAVEFREEPV(s);
842 strend = s + len;
f337b084 843 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
844 }
845
f337b084
TH
846 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
847 flags |= FLAG_PARSE_UTF8;
08ca2aa3 848
f7fe979e 849 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
850
851 return unpack_rec(&sym, s, s, strend, NULL );
852}
853
e1b825c1 854STATIC SSize_t
f7fe979e 855S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 856{
20b7effb 857 dSP;
3297d27d 858 SV *sv = NULL;
e1b825c1 859 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 860 howlen_t howlen;
e1b825c1 861 SSize_t checksum = 0;
92d41999 862 UV cuv = 0;
a6ec74c1 863 NV cdouble = 0.0;
e1b825c1 864 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 865 bool beyond = FALSE;
21c16052 866 bool explicit_length;
9e27e96a 867 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 868 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
869
870 PERL_ARGS_ASSERT_UNPACK_REC;
871
28be1210 872 symptr->strbeg = s - strbeg;
49704364 873
49704364 874 while (next_symbol(symptr)) {
a7a3cfaa 875 packprops_t props;
e1b825c1 876 SSize_t len;
f337b084 877 I32 datumtype = symptr->code;
a1219b5e 878 bool needs_swap;
206947d2 879 /* do first one only unless in list context
08ca2aa3 880 / is implemented by unpacking the count, then popping it from the
206947d2 881 stack, so must check that we're not in the middle of a / */
49704364 882 if ( unpack_only_one
206947d2 883 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 884 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 885 break;
49704364 886
f337b084 887 switch (howlen = symptr->howlen) {
fc241834
RGS
888 case e_star:
889 len = strend - strbeg; /* long enough */
49704364 890 break;
f337b084
TH
891 default:
892 /* e_no_len and e_number */
893 len = symptr->length;
894 break;
49704364 895 }
18529408 896
21c16052 897 explicit_length = TRUE;
a6ec74c1 898 redo_switch:
49704364 899 beyond = s >= strend;
a7a3cfaa
TH
900
901 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
902 if (props) {
903 /* props nonzero means we can process this letter. */
e1b825c1
Z
904 const SSize_t size = props & PACK_SIZE_MASK;
905 const SSize_t howmany = (strend - s) / size;
a7a3cfaa
TH
906 if (len > howmany)
907 len = howmany;
908
909 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
910 if (len && unpack_only_one) len = 1;
911 EXTEND(SP, len);
912 EXTEND_MORTAL(len);
78d46eaa
NC
913 }
914 }
a7a3cfaa 915
a1219b5e
NC
916 needs_swap = NEEDS_SWAP(datumtype);
917
1109a392 918 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 919 default:
1109a392 920 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 921
a6ec74c1 922 case '%':
49704364 923 if (howlen == e_no_len)
18529408 924 len = 16; /* len is not specified */
a6ec74c1 925 checksum = len;
92d41999 926 cuv = 0;
a6ec74c1 927 cdouble = 0;
18529408 928 continue;
81d52ecd 929
18529408
IZ
930 case '(':
931 {
49704364 932 tempsym_t savsym = *symptr;
9e27e96a 933 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 934 symptr->flags |= group_modifiers;
49704364 935 symptr->patend = savsym.grpend;
28be1210 936 symptr->previous = &savsym;
49704364 937 symptr->level++;
18529408 938 PUTBACK;
c6f750d1 939 if (len && unpack_only_one) len = 1;
18529408 940 while (len--) {
49704364 941 symptr->patptr = savsym.grpbeg;
f337b084
TH
942 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
943 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
944 unpack_rec(symptr, s, strbeg, strend, &s);
945 if (s == strend && savsym.howlen == e_star)
49704364 946 break; /* No way to continue */
18529408
IZ
947 }
948 SPAGAIN;
28be1210 949 savsym.flags = symptr->flags & ~group_modifiers;
49704364 950 *symptr = savsym;
18529408
IZ
951 break;
952 }
28be1210 953 case '.' | TYPE_IS_SHRIEKING:
28be1210 954 case '.': {
9e27e96a 955 const char *from;
28be1210 956 SV *sv;
9e27e96a 957 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
958 if (howlen == e_star) from = strbeg;
959 else if (len <= 0) from = s;
960 else {
961 tempsym_t *group = symptr;
962
963 while (--len && group) group = group->previous;
964 from = group ? strbeg + group->strbeg : strbeg;
965 }
966 sv = from <= s ?
00646304
CB
967 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
968 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 969 mXPUSHs(sv);
28be1210
TH
970 break;
971 }
28be1210 972 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 973 case '@':
28be1210 974 s = strbeg + symptr->strbeg;
28be1210 975 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 976 {
08ca2aa3
TH
977 while (len > 0) {
978 if (s >= strend)
979 Perl_croak(aTHX_ "'@' outside of string in unpack");
980 s += UTF8SKIP(s);
981 len--;
982 }
983 if (s > strend)
984 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
985 } else {
28be1210 986 if (strend-s < len)
fc241834 987 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 988 s += len;
08ca2aa3 989 }
a6ec74c1 990 break;
62f95557
IZ
991 case 'X' | TYPE_IS_SHRIEKING:
992 if (!len) /* Avoid division by 0 */
993 len = 1;
08ca2aa3 994 if (utf8) {
f7fe979e 995 const char *hop, *last;
e1b825c1 996 SSize_t l = len;
f337b084
TH
997 hop = last = strbeg;
998 while (hop < s) {
999 hop += UTF8SKIP(hop);
1000 if (--l == 0) {
08ca2aa3 1001 last = hop;
f337b084
TH
1002 l = len;
1003 }
fc241834 1004 }
f337b084
TH
1005 if (last > s)
1006 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1007 s = last;
1008 break;
f337b084
TH
1009 }
1010 len = (s - strbeg) % len;
924ba076 1011 /* FALLTHROUGH */
a6ec74c1 1012 case 'X':
08ca2aa3
TH
1013 if (utf8) {
1014 while (len > 0) {
1015 if (s <= strbeg)
1016 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1017 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1018 if (s <= strbeg)
1019 Perl_croak(aTHX_ "'X' outside of string in unpack");
1020 }
1021 len--;
1022 }
1023 } else {
fc241834
RGS
1024 if (len > s - strbeg)
1025 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1026 s -= len;
08ca2aa3 1027 }
a6ec74c1 1028 break;
9e27e96a 1029 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1030 SSize_t ai32;
62f95557
IZ
1031 if (!len) /* Avoid division by 0 */
1032 len = 1;
230e1fce
NC
1033 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1034 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1035 if (ai32 == 0) break;
1036 len -= ai32;
9e27e96a 1037 }
924ba076 1038 /* FALLTHROUGH */
a6ec74c1 1039 case 'x':
08ca2aa3
TH
1040 if (utf8) {
1041 while (len>0) {
1042 if (s >= strend)
1043 Perl_croak(aTHX_ "'x' outside of string in unpack");
1044 s += UTF8SKIP(s);
1045 len--;
1046 }
1047 } else {
fc241834
RGS
1048 if (len > strend - s)
1049 Perl_croak(aTHX_ "'x' outside of string in unpack");
1050 s += len;
f337b084 1051 }
a6ec74c1
JH
1052 break;
1053 case '/':
49704364 1054 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1055
a6ec74c1
JH
1056 case 'A':
1057 case 'Z':
1058 case 'a':
08ca2aa3
TH
1059 if (checksum) {
1060 /* Preliminary length estimate is assumed done in 'W' */
1061 if (len > strend - s) len = strend - s;
1062 goto W_checksum;
1063 }
1064 if (utf8) {
e1b825c1 1065 SSize_t l;
f7fe979e 1066 const char *hop;
08ca2aa3
TH
1067 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1068 if (hop >= strend) {
1069 if (hop > strend)
1070 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1071 break;
fc241834 1072 }
a6ec74c1 1073 }
08ca2aa3
TH
1074 if (hop > strend)
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 len = hop - s;
1077 } else if (len > strend - s)
1078 len = strend - s;
1079
1080 if (datumtype == 'Z') {
1081 /* 'Z' strips stuff after first null */
f7fe979e 1082 const char *ptr, *end;
f337b084
TH
1083 end = s + len;
1084 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1085 sv = newSVpvn(s, ptr-s);
1086 if (howlen == e_star) /* exact for 'Z*' */
1087 len = ptr-s + (ptr != strend ? 1 : 0);
1088 } else if (datumtype == 'A') {
1089 /* 'A' strips both nulls and spaces */
f7fe979e 1090 const char *ptr;
18bdf90a 1091 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1092 for (ptr = s+len-1; ptr >= s; ptr--) {
1093 if ( *ptr != 0
1094 && !UTF8_IS_CONTINUATION(*ptr)
1095 && !isSPACE_utf8_safe(ptr, strend))
1096 {
1097 break;
1098 }
1099 }
18bdf90a
TH
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1101 else ptr++;
28be1210 1102 if (ptr > s+len)
18bdf90a
TH
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 } else {
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1107 ptr++;
1108 }
08ca2aa3
TH
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1111
1112 if (utf8) {
1113 SvUTF8_on(sv);
1114 /* Undo any upgrade done due to need_utf8() */
f337b084 1115 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1116 sv_utf8_downgrade(sv, 0);
a6ec74c1 1117 }
6e449a3a 1118 mXPUSHs(sv);
08ca2aa3 1119 s += len;
a6ec74c1
JH
1120 break;
1121 case 'B':
08ca2aa3
TH
1122 case 'b': {
1123 char *str;
49704364 1124 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1125 len = (strend - s) * 8;
1126 if (checksum) {
f337b084 1127 if (utf8)
08ca2aa3 1128 while (len >= 8 && s < strend) {
9df874cd 1129 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1130 len -= 8;
1131 }
f337b084 1132 else
fc241834 1133 while (len >= 8) {
08ca2aa3 1134 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1135 len -= 8;
1136 }
08ca2aa3
TH
1137 if (len && s < strend) {
1138 U8 bits;
f337b084
TH
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
a6ec74c1 1141 while (len-- > 0) {
92d41999 1142 if (bits & 1) cuv++;
a6ec74c1
JH
1143 bits >>= 1;
1144 }
f337b084 1145 else
a6ec74c1 1146 while (len-- > 0) {
08ca2aa3 1147 if (bits & 0x80) cuv++;
a6ec74c1
JH
1148 bits <<= 1;
1149 }
fc241834 1150 }
a6ec74c1
JH
1151 break;
1152 }
08ca2aa3 1153
561b68a9 1154 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1155 SvPOK_on(sv);
1156 str = SvPVX(sv);
1157 if (datumtype == 'b') {
f337b084 1158 U8 bits = 0;
e1b825c1 1159 const SSize_t ai32 = len;
08ca2aa3
TH
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1162 else if (utf8) {
1163 if (s >= strend) break;
9df874cd 1164 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1167 }
08ca2aa3 1168 } else {
f337b084 1169 U8 bits = 0;
e1b825c1 1170 const SSize_t ai32 = len;
08ca2aa3
TH
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1173 else if (utf8) {
1174 if (s >= strend) break;
9df874cd 1175 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1178 }
1179 }
1180 *str = '\0';
aa07b2f6 1181 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1182 XPUSHs(sv);
a6ec74c1 1183 break;
08ca2aa3 1184 }
a6ec74c1 1185 case 'H':
08ca2aa3 1186 case 'h': {
3297d27d 1187 char *str = NULL;
fc241834 1188 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1189 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1190 len = (strend - s) * 2;
858fe5e1
TC
1191 if (!checksum) {
1192 sv = sv_2mortal(newSV(len ? len : 1));
1193 SvPOK_on(sv);
1194 str = SvPVX(sv);
1195 }
a6ec74c1 1196 if (datumtype == 'h') {
f337b084 1197 U8 bits = 0;
e1b825c1 1198 SSize_t ai32 = len;
fc241834
RGS
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1201 else if (utf8) {
1202 if (s >= strend) break;
9df874cd 1203 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1204 } else bits = * (U8 *) s++;
858fe5e1
TC
1205 if (!checksum)
1206 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1207 }
08ca2aa3 1208 } else {
f337b084 1209 U8 bits = 0;
e1b825c1 1210 const SSize_t ai32 = len;
08ca2aa3
TH
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1213 else if (utf8) {
1214 if (s >= strend) break;
9df874cd 1215 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1216 } else bits = *(U8 *) s++;
858fe5e1
TC
1217 if (!checksum)
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1219 }
1220 }
858fe5e1
TC
1221 if (!checksum) {
1222 *str = '\0';
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1224 XPUSHs(sv);
1225 }
a6ec74c1 1226 break;
08ca2aa3 1227 }
1651fc44
ML
1228 case 'C':
1229 if (len == 0) {
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1233 break;
1234 }
924ba076 1235 /* FALLTHROUGH */
a6ec74c1 1236 case 'c':
1651fc44
ML
1237 while (len-- > 0 && s < strend) {
1238 int aint;
1239 if (utf8)
1240 {
1241 STRLEN retlen;
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1244 if (retlen == (STRLEN) -1)
1651fc44
ML
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1246 s += retlen;
1247 }
1248 else
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1251 aint -= 256;
08ca2aa3 1252 if (!checksum)
6e449a3a 1253 mPUSHi(aint);
73cb7263
NC
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1256 else
1257 cuv += aint;
a6ec74c1
JH
1258 }
1259 break;
08ca2aa3
TH
1260 case 'W':
1261 W_checksum:
1651fc44 1262 if (utf8) {
08ca2aa3 1263 while (len-- > 0 && s < strend) {
08ca2aa3 1264 STRLEN retlen;
f7fe979e 1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1267 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1269 s += retlen;
1270 if (!checksum)
6e449a3a 1271 mPUSHu(val);
08ca2aa3
TH
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
d6d3e8bd 1274 else
08ca2aa3 1275 cuv += val;
fc241834 1276 }
08ca2aa3 1277 } else if (!checksum)
a6ec74c1 1278 while (len-- > 0) {
f7fe979e 1279 const U8 ch = *(U8 *) s++;
6e449a3a 1280 mPUSHu(ch);
a6ec74c1 1281 }
08ca2aa3
TH
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 else
1285 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1286 break;
1287 case 'U':
35bcd338 1288 if (len == 0) {
c5333953 1289 if (explicit_length && howlen != e_star) {
08ca2aa3 1290 /* Switch to "bytes in UTF-8" mode */
f337b084 1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1292 else
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1295 }
35bcd338
JH
1296 break;
1297 }
08ca2aa3 1298 if (len > strend - s) len = strend - s;
fc241834 1299 if (!checksum) {
08ca2aa3
TH
1300 if (len && unpack_only_one) len = 1;
1301 EXTEND(SP, len);
1302 EXTEND_MORTAL(len);
fc241834 1303 }
08ca2aa3
TH
1304 while (len-- > 0 && s < strend) {
1305 STRLEN retlen;
1306 UV auv;
1307 if (utf8) {
81b3f640 1308 U8 result[UTF8_MAXLEN+1];
f7fe979e 1309 const char *ptr = s;
08ca2aa3 1310 STRLEN len;
08ca2aa3
TH
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
9df874cd 1313 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1314 'U'))
08ca2aa3
TH
1315 break;
1316 len = UTF8SKIP(result);
9df874cd 1317 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1318 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1319 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1320 len,
1321 &retlen,
1322 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1323 s = ptr;
1324 } else {
3ece276e
KW
1325 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1326 strend - s,
1327 &retlen,
1328 UTF8_ALLOW_DEFAULT));
df337714 1329 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1330 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1331 s += retlen;
1332 }
1333 if (!checksum)
6e449a3a 1334 mPUSHu(auv);
73cb7263 1335 else if (checksum > bits_in_uv)
08ca2aa3 1336 cdouble += (NV) auv;
73cb7263 1337 else
08ca2aa3 1338 cuv += auv;
a6ec74c1
JH
1339 }
1340 break;
49704364
WL
1341 case 's' | TYPE_IS_SHRIEKING:
1342#if SHORTSIZE != SIZE16
73cb7263 1343 while (len-- > 0) {
08ca2aa3 1344 short ashort;
aaec8192 1345 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1346 if (!checksum)
6e449a3a 1347 mPUSHi(ashort);
73cb7263
NC
1348 else if (checksum > bits_in_uv)
1349 cdouble += (NV)ashort;
1350 else
1351 cuv += ashort;
49704364
WL
1352 }
1353 break;
1354#else
924ba076 1355 /* FALLTHROUGH */
a6ec74c1 1356#endif
49704364 1357 case 's':
73cb7263 1358 while (len-- > 0) {
08ca2aa3
TH
1359 I16 ai16;
1360
1361#if U16SIZE > SIZE16
1362 ai16 = 0;
1363#endif
aaec8192 1364 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1365#if U16SIZE > SIZE16
73cb7263
NC
1366 if (ai16 > 32767)
1367 ai16 -= 65536;
a6ec74c1 1368#endif
08ca2aa3 1369 if (!checksum)
6e449a3a 1370 mPUSHi(ai16);
73cb7263
NC
1371 else if (checksum > bits_in_uv)
1372 cdouble += (NV)ai16;
1373 else
1374 cuv += ai16;
a6ec74c1
JH
1375 }
1376 break;
49704364
WL
1377 case 'S' | TYPE_IS_SHRIEKING:
1378#if SHORTSIZE != SIZE16
73cb7263 1379 while (len-- > 0) {
08ca2aa3 1380 unsigned short aushort;
c0f2b636 1381 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
08ca2aa3 1382 if (!checksum)
6e449a3a 1383 mPUSHu(aushort);
73cb7263
NC
1384 else if (checksum > bits_in_uv)
1385 cdouble += (NV)aushort;
1386 else
1387 cuv += aushort;
49704364
WL
1388 }
1389 break;
1390#else
924ba076 1391 /* FALLTHROUGH */
49704364 1392#endif
a6ec74c1
JH
1393 case 'v':
1394 case 'n':
1395 case 'S':
73cb7263 1396 while (len-- > 0) {
08ca2aa3
TH
1397 U16 au16;
1398#if U16SIZE > SIZE16
1399 au16 = 0;
1400#endif
aaec8192 1401 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1402 if (datumtype == 'n')
1403 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1404 if (datumtype == 'v')
1405 au16 = vtohs(au16);
08ca2aa3 1406 if (!checksum)
6e449a3a 1407 mPUSHu(au16);
73cb7263 1408 else if (checksum > bits_in_uv)
f337b084 1409 cdouble += (NV) au16;
73cb7263
NC
1410 else
1411 cuv += au16;
a6ec74c1
JH
1412 }
1413 break;
068bd2e7
MHM
1414 case 'v' | TYPE_IS_SHRIEKING:
1415 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1416 while (len-- > 0) {
08ca2aa3
TH
1417 I16 ai16;
1418# if U16SIZE > SIZE16
1419 ai16 = 0;
1420# endif
aaec8192 1421 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1422 /* There should never be any byte-swapping here. */
1423 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1424 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1425 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1426 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1427 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1428 if (!checksum)
6e449a3a 1429 mPUSHi(ai16);
73cb7263 1430 else if (checksum > bits_in_uv)
08ca2aa3 1431 cdouble += (NV) ai16;
73cb7263
NC
1432 else
1433 cuv += ai16;
068bd2e7
MHM
1434 }
1435 break;
a6ec74c1 1436 case 'i':
49704364 1437 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1438 while (len-- > 0) {
08ca2aa3 1439 int aint;
aaec8192 1440 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1441 if (!checksum)
6e449a3a 1442 mPUSHi(aint);
73cb7263
NC
1443 else if (checksum > bits_in_uv)
1444 cdouble += (NV)aint;
1445 else
1446 cuv += aint;
a6ec74c1
JH
1447 }
1448 break;
1449 case 'I':
49704364 1450 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1451 while (len-- > 0) {
08ca2aa3 1452 unsigned int auint;
aaec8192 1453 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1454 if (!checksum)
6e449a3a 1455 mPUSHu(auint);
73cb7263
NC
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)auint;
1458 else
1459 cuv += auint;
a6ec74c1
JH
1460 }
1461 break;
92d41999 1462 case 'j':
73cb7263 1463 while (len-- > 0) {
08ca2aa3 1464 IV aiv;
aaec8192 1465 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1466 if (!checksum)
6e449a3a 1467 mPUSHi(aiv);
73cb7263
NC
1468 else if (checksum > bits_in_uv)
1469 cdouble += (NV)aiv;
1470 else
1471 cuv += aiv;
92d41999
JH
1472 }
1473 break;
1474 case 'J':
73cb7263 1475 while (len-- > 0) {
08ca2aa3 1476 UV auv;
aaec8192 1477 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1478 if (!checksum)
6e449a3a 1479 mPUSHu(auv);
73cb7263
NC
1480 else if (checksum > bits_in_uv)
1481 cdouble += (NV)auv;
1482 else
1483 cuv += auv;
92d41999
JH
1484 }
1485 break;
49704364
WL
1486 case 'l' | TYPE_IS_SHRIEKING:
1487#if LONGSIZE != SIZE32
73cb7263 1488 while (len-- > 0) {
08ca2aa3 1489 long along;
aaec8192 1490 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1491 if (!checksum)
6e449a3a 1492 mPUSHi(along);
73cb7263
NC
1493 else if (checksum > bits_in_uv)
1494 cdouble += (NV)along;
1495 else
1496 cuv += along;
49704364
WL
1497 }
1498 break;
1499#else
924ba076 1500 /* FALLTHROUGH */
a6ec74c1 1501#endif
49704364 1502 case 'l':
73cb7263 1503 while (len-- > 0) {
08ca2aa3
TH
1504 I32 ai32;
1505#if U32SIZE > SIZE32
1506 ai32 = 0;
1507#endif
aaec8192 1508 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1509#if U32SIZE > SIZE32
08ca2aa3 1510 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1511#endif
08ca2aa3 1512 if (!checksum)
6e449a3a 1513 mPUSHi(ai32);
73cb7263
NC
1514 else if (checksum > bits_in_uv)
1515 cdouble += (NV)ai32;
1516 else
1517 cuv += ai32;
a6ec74c1
JH
1518 }
1519 break;
49704364
WL
1520 case 'L' | TYPE_IS_SHRIEKING:
1521#if LONGSIZE != SIZE32
73cb7263 1522 while (len-- > 0) {
08ca2aa3 1523 unsigned long aulong;
aaec8192 1524 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1525 if (!checksum)
6e449a3a 1526 mPUSHu(aulong);
73cb7263
NC
1527 else if (checksum > bits_in_uv)
1528 cdouble += (NV)aulong;
1529 else
1530 cuv += aulong;
49704364
WL
1531 }
1532 break;
1533#else
924ba076 1534 /* FALLTHROUGH */
49704364 1535#endif
a6ec74c1
JH
1536 case 'V':
1537 case 'N':
1538 case 'L':
73cb7263 1539 while (len-- > 0) {
08ca2aa3
TH
1540 U32 au32;
1541#if U32SIZE > SIZE32
1542 au32 = 0;
1543#endif
aaec8192 1544 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1545 if (datumtype == 'N')
1546 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1547 if (datumtype == 'V')
1548 au32 = vtohl(au32);
08ca2aa3 1549 if (!checksum)
6e449a3a 1550 mPUSHu(au32);
fc241834
RGS
1551 else if (checksum > bits_in_uv)
1552 cdouble += (NV)au32;
1553 else
1554 cuv += au32;
a6ec74c1
JH
1555 }
1556 break;
068bd2e7
MHM
1557 case 'V' | TYPE_IS_SHRIEKING:
1558 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1559 while (len-- > 0) {
08ca2aa3 1560 I32 ai32;
f8e5a5db 1561#if U32SIZE > SIZE32
08ca2aa3 1562 ai32 = 0;
f8e5a5db 1563#endif
aaec8192 1564 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1565 /* There should never be any byte swapping here. */
1566 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1567 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1568 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1569 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1570 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1571 if (!checksum)
6e449a3a 1572 mPUSHi(ai32);
73cb7263
NC
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)ai32;
1575 else
1576 cuv += ai32;
068bd2e7
MHM
1577 }
1578 break;
a6ec74c1 1579 case 'p':
a6ec74c1 1580 while (len-- > 0) {
f7fe979e 1581 const char *aptr;
aaec8192 1582 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1583 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1584 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1585 }
1586 break;
1587 case 'w':
a6ec74c1
JH
1588 {
1589 UV auv = 0;
e1b825c1 1590 size_t bytes = 0;
fc241834 1591
08ca2aa3
TH
1592 while (len > 0 && s < strend) {
1593 U8 ch;
f337b084 1594 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1595 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1596 /* UTF8_IS_XXXXX not right here because this is a BER, not
1597 * UTF-8 format - using constant 0x80 */
08ca2aa3 1598 if (ch < 0x80) {
a6ec74c1 1599 bytes = 0;
6e449a3a 1600 mPUSHu(auv);
a6ec74c1
JH
1601 len--;
1602 auv = 0;
08ca2aa3 1603 continue;
a6ec74c1 1604 }
08ca2aa3 1605 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1606 const char *t;
a6ec74c1 1607
147e3846
KW
1608 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1609 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1610 while (s < strend) {
f337b084 1611 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1612 sv = mul128(sv, (U8)(ch & 0x7f));
1613 if (!(ch & 0x80)) {
a6ec74c1
JH
1614 bytes = 0;
1615 break;
1616 }
1617 }
10516c54 1618 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1619 while (*t == '0')
1620 t++;
1621 sv_chop(sv, t);
6e449a3a 1622 mPUSHs(sv);
a6ec74c1
JH
1623 len--;
1624 auv = 0;
1625 }
1626 }
1627 if ((s >= strend) && bytes)
49704364 1628 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1629 }
1630 break;
1631 case 'P':
49704364
WL
1632 if (symptr->howlen == e_star)
1633 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1634 EXTEND(SP, 1);
2d3e0934 1635 if (s + sizeof(char*) <= strend) {
08ca2aa3 1636 char *aptr;
aaec8192 1637 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1638 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1639 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1640 }
a6ec74c1 1641 break;
c174bf3b 1642#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1643 case 'q':
73cb7263 1644 while (len-- > 0) {
08ca2aa3 1645 Quad_t aquad;
aaec8192 1646 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1647 if (!checksum)
c174bf3b 1648 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1649 else if (checksum > bits_in_uv)
1650 cdouble += (NV)aquad;
1651 else
1652 cuv += aquad;
1653 }
a6ec74c1
JH
1654 break;
1655 case 'Q':
73cb7263 1656 while (len-- > 0) {
08ca2aa3 1657 Uquad_t auquad;
aaec8192 1658 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1659 if (!checksum)
c174bf3b 1660 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1661 else if (checksum > bits_in_uv)
1662 cdouble += (NV)auquad;
1663 else
1664 cuv += auquad;
a6ec74c1
JH
1665 }
1666 break;
1640b983 1667#endif
a6ec74c1
JH
1668 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1669 case 'f':
73cb7263 1670 while (len-- > 0) {
08ca2aa3 1671 float afloat;
aaec8192 1672 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1673 if (!checksum)
6e449a3a 1674 mPUSHn(afloat);
08ca2aa3 1675 else
73cb7263 1676 cdouble += afloat;
fc241834 1677 }
a6ec74c1
JH
1678 break;
1679 case 'd':
73cb7263 1680 while (len-- > 0) {
08ca2aa3 1681 double adouble;
aaec8192 1682 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1683 if (!checksum)
6e449a3a 1684 mPUSHn(adouble);
08ca2aa3 1685 else
73cb7263 1686 cdouble += adouble;
fc241834 1687 }
a6ec74c1 1688 break;
92d41999 1689 case 'F':
73cb7263 1690 while (len-- > 0) {
275663fa 1691 NV_bytes anv;
aaec8192
NC
1692 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1693 datumtype, needs_swap);
08ca2aa3 1694 if (!checksum)
275663fa 1695 mPUSHn(anv.nv);
08ca2aa3 1696 else
275663fa 1697 cdouble += anv.nv;
fc241834 1698 }
92d41999
JH
1699 break;
1700#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1701 case 'D':
73cb7263 1702 while (len-- > 0) {
275663fa 1703 ld_bytes aldouble;
aaec8192
NC
1704 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1705 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1706 /* The most common long double format, the x86 80-bit
1707 * extended precision, has either 2 or 6 unused bytes,
1708 * which may contain garbage, which may contain
1709 * unintentional data. While we do zero the bytes of
1710 * the long double data in pack(), here in unpack() we
1711 * don't, because it's really hard to envision that
1712 * reading the long double off aldouble would be
e075ae47 1713 * affected by the unused bytes.
42262fd3
JH
1714 *
1715 * Note that trying to unpack 'long doubles' of 'long
1716 * doubles' packed in another system is in the general
1717 * case doomed without having more detail. */
08ca2aa3 1718 if (!checksum)
275663fa 1719 mPUSHn(aldouble.ld);
08ca2aa3 1720 else
275663fa 1721 cdouble += aldouble.ld;
92d41999
JH
1722 }
1723 break;
1724#endif
a6ec74c1 1725 case 'u':
858fe5e1 1726 if (!checksum) {
f7fe979e 1727 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1728 sv = sv_2mortal(newSV(l));
12cad9bd
TC
1729 if (l) {
1730 SvPOK_on(sv);
1731 *SvEND(sv) = '\0';
1732 }
08ca2aa3 1733 }
08ca2aa3 1734
bee81d36
KW
1735 /* Note that all legal uuencoded strings are ASCII printables, so
1736 * have the same representation under UTF-8 vs not. This means we
1737 * can ignore UTF8ness on legal input. For illegal we stop at the
1738 * first failure, and don't report where/what that is, so again we
1739 * can ignore UTF8ness */
1740
9233b56b
KW
1741 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1742 I32 a, b, c, d;
1743 char hunk[3];
1744
1745 len = PL_uudmap[*(U8*)s++] & 077;
1746 while (len > 0) {
1747 if (s < strend && ISUUCHAR(*s))
1748 a = PL_uudmap[*(U8*)s++] & 077;
1749 else
1750 a = 0;
1751 if (s < strend && ISUUCHAR(*s))
1752 b = PL_uudmap[*(U8*)s++] & 077;
1753 else
1754 b = 0;
1755 if (s < strend && ISUUCHAR(*s))
1756 c = PL_uudmap[*(U8*)s++] & 077;
1757 else
1758 c = 0;
1759 if (s < strend && ISUUCHAR(*s))
1760 d = PL_uudmap[*(U8*)s++] & 077;
1761 else
1762 d = 0;
1763 hunk[0] = (char)((a << 2) | (b >> 4));
1764 hunk[1] = (char)((b << 4) | (c >> 2));
1765 hunk[2] = (char)((c << 6) | d);
1766 if (!checksum)
1767 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1768 len -= 3;
1769 }
1770 if (*s == '\n')
1771 s++;
1772 else /* possible checksum byte */
1773 if (s + 1 < strend && s[1] == '\n')
1774 s += 2;
1775 }
858fe5e1
TC
1776 if (!checksum)
1777 XPUSHs(sv);
a6ec74c1 1778 break;
99f862a1 1779 } /* End of switch */
49704364 1780
a6ec74c1 1781 if (checksum) {
1109a392 1782 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1783 (checksum > bits_in_uv &&
08ca2aa3
TH
1784 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1785 NV trouble, anv;
a6ec74c1 1786
08ca2aa3 1787 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1788 while (checksum >= 16) {
1789 checksum -= 16;
08ca2aa3 1790 anv *= 65536.0;
a6ec74c1 1791 }
a6ec74c1 1792 while (cdouble < 0.0)
08ca2aa3 1793 cdouble += anv;
09b94b1f
S
1794 cdouble = Perl_modf(cdouble / anv, &trouble);
1795#ifdef LONGDOUBLE_DOUBLEDOUBLE
1796 /* Workaround for powerpc doubledouble modfl bug:
1797 * close to 1.0L and -1.0L cdouble is 0, and trouble
1798 * is cdouble / anv. */
1799 if (trouble != Perl_ceil(trouble)) {
1800 cdouble = trouble;
1801 if (cdouble > 1.0L) cdouble -= 1.0L;
1802 if (cdouble < -1.0L) cdouble += 1.0L;
1803 }
1804#endif
1805 cdouble *= anv;
c4c5f44a 1806 sv = newSVnv(cdouble);
a6ec74c1
JH
1807 }
1808 else {
fa8ec7c1
NC
1809 if (checksum < bits_in_uv) {
1810 UV mask = ((UV)1 << checksum) - 1;
92d41999 1811 cuv &= mask;
a6ec74c1 1812 }
c4c5f44a 1813 sv = newSVuv(cuv);
a6ec74c1 1814 }
6e449a3a 1815 mXPUSHs(sv);
a6ec74c1
JH
1816 checksum = 0;
1817 }
fc241834 1818
49704364
WL
1819 if (symptr->flags & FLAG_SLASH){
1820 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1821 break;
49704364
WL
1822 if( next_symbol(symptr) ){
1823 if( symptr->howlen == e_number )
1824 Perl_croak(aTHX_ "Count after length/code in unpack" );
1825 if( beyond ){
1826 /* ...end of char buffer then no decent length available */
1827 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1828 } else {
1829 /* take top of stack (hope it's numeric) */
1830 len = POPi;
1831 if( len < 0 )
1832 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1833 }
1834 } else {
1835 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1836 }
1837 datumtype = symptr->code;
21c16052 1838 explicit_length = FALSE;
49704364
WL
1839 goto redo_switch;
1840 }
a6ec74c1 1841 }
49704364 1842
18529408
IZ
1843 if (new_s)
1844 *new_s = s;
1845 PUTBACK;
1846 return SP - PL_stack_base - start_sp_offset;
1847}
1848
1849PP(pp_unpack)
1850{
1851 dSP;
bab9c0ac 1852 dPOPPOPssrl;
1c23e2bd 1853 U8 gimme = GIMME_V;
18529408
IZ
1854 STRLEN llen;
1855 STRLEN rlen;
5c144d81
NC
1856 const char *pat = SvPV_const(left, llen);
1857 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1858 const char *strend = s + rlen;
1859 const char *patend = pat + llen;
e1b825c1 1860 SSize_t cnt;
18529408
IZ
1861
1862 PUTBACK;
7accc089 1863 cnt = unpackstring(pat, patend, s, strend,
49704364 1864 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1865 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1866
18529408
IZ
1867 SPAGAIN;
1868 if ( !cnt && gimme == G_SCALAR )
1869 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1870 RETURN;
1871}
1872
f337b084 1873STATIC U8 *
e1b825c1 1874doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1875{
f337b084 1876 *h++ = PL_uuemap[len];
a6ec74c1 1877 while (len > 2) {
f337b084
TH
1878 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1879 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1880 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1881 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1882 s += 3;
1883 len -= 3;
1884 }
1885 if (len > 0) {
e68aed92 1886 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1887 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1888 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1889 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1890 *h++ = PL_uuemap[0];
a6ec74c1 1891 }
f337b084
TH
1892 *h++ = '\n';
1893 return h;
a6ec74c1
JH
1894}
1895
1896STATIC SV *
f7fe979e 1897S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1898{
8b6e33c7
AL
1899 SV *result = newSVpvn(s, l);
1900 char *const result_c = SvPV_nolen(result); /* convenience */
1901 char *out = result_c;
1902 bool skip = 1;
1903 bool ignore = 0;
a6ec74c1 1904
7918f24d
NC
1905 PERL_ARGS_ASSERT_IS_AN_INT;
1906
a6ec74c1
JH
1907 while (*s) {
1908 switch (*s) {
1909 case ' ':
1910 break;
1911 case '+':
1912 if (!skip) {
1913 SvREFCNT_dec(result);
1914 return (NULL);
1915 }
1916 break;
1917 case '0':
1918 case '1':
1919 case '2':
1920 case '3':
1921 case '4':
1922 case '5':
1923 case '6':
1924 case '7':
1925 case '8':
1926 case '9':
1927 skip = 0;
1928 if (!ignore) {
1929 *(out++) = *s;
1930 }
1931 break;
1932 case '.':
1933 ignore = 1;
1934 break;
1935 default:
1936 SvREFCNT_dec(result);
1937 return (NULL);
1938 }
1939 s++;
1940 }
1941 *(out++) = '\0';
1942 SvCUR_set(result, out - result_c);
1943 return (result);
1944}
1945
1946/* pnum must be '\0' terminated */
1947STATIC int
1948S_div128(pTHX_ SV *pnum, bool *done)
1949{
8b6e33c7
AL
1950 STRLEN len;
1951 char * const s = SvPV(pnum, len);
1952 char *t = s;
1953 int m = 0;
1954
7918f24d
NC
1955 PERL_ARGS_ASSERT_DIV128;
1956
8b6e33c7
AL
1957 *done = 1;
1958 while (*t) {
1959 const int i = m * 10 + (*t - '0');
1960 const int r = (i >> 7); /* r < 10 */
1961 m = i & 0x7F;
1962 if (r) {
1963 *done = 0;
1964 }
1965 *(t++) = '0' + r;
a6ec74c1 1966 }
8b6e33c7
AL
1967 *(t++) = '\0';
1968 SvCUR_set(pnum, (STRLEN) (t - s));
1969 return (m);
a6ec74c1
JH
1970}
1971
18529408 1972/*
7accc089
JH
1973=for apidoc packlist
1974
796b6530 1975The engine implementing C<pack()> Perl function.
7accc089 1976
bfce84ec
AL
1977=cut
1978*/
7accc089
JH
1979
1980void
5aaab254 1981Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1982{
aadb217d
JH
1983 tempsym_t sym;
1984
7918f24d
NC
1985 PERL_ARGS_ASSERT_PACKLIST;
1986
f7fe979e 1987 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1988
f337b084
TH
1989 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1990 Also make sure any UTF8 flag is loaded */
56eb0262 1991 SvPV_force_nolen(cat);
bfce84ec
AL
1992 if (DO_UTF8(cat))
1993 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1994
49704364
WL
1995 (void)pack_rec( cat, &sym, beglist, endlist );
1996}
1997
f337b084
TH
1998/* like sv_utf8_upgrade, but also repoint the group start markers */
1999STATIC void
2000marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2001 STRLEN len;
2002 tempsym_t *group;
f7fe979e
AL
2003 const char *from_ptr, *from_start, *from_end, **marks, **m;
2004 char *to_start, *to_ptr;
f337b084
TH
2005
2006 if (SvUTF8(sv)) return;
2007
aa07b2f6 2008 from_start = SvPVX_const(sv);
f337b084
TH
2009 from_end = from_start + SvCUR(sv);
2010 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2011 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2012 if (from_ptr == from_end) {
2013 /* Simple case: no character needs to be changed */
2014 SvUTF8_on(sv);
2015 return;
2016 }
2017
3473cf63 2018 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2019 Newx(to_start, len, char);
f337b084
TH
2020 Copy(from_start, to_start, from_ptr-from_start, char);
2021 to_ptr = to_start + (from_ptr-from_start);
2022
a02a5408 2023 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2024 for (group=sym_ptr; group; group = group->previous)
2025 marks[group->level] = from_start + group->strbeg;
2026 marks[sym_ptr->level+1] = from_end+1;
2027 for (m = marks; *m < from_ptr; m++)
2028 *m = to_start + (*m-from_start);
2029
2030 for (;from_ptr < from_end; from_ptr++) {
2031 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2032 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2033 }
2034 *to_ptr = 0;
2035
2036 while (*m == from_ptr) *m++ = to_ptr;
2037 if (m != marks + sym_ptr->level+1) {
2038 Safefree(marks);
2039 Safefree(to_start);
5637ef5b
NC
2040 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2041 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2042 }
2043 for (group=sym_ptr; group; group = group->previous)
2044 group->strbeg = marks[group->level] - to_start;
2045 Safefree(marks);
2046
2047 if (SvOOK(sv)) {
2048 if (SvIVX(sv)) {
b162af07 2049 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2050 from_start -= SvIVX(sv);
2051 SvIV_set(sv, 0);
2052 }
2053 SvFLAGS(sv) &= ~SVf_OOK;
2054 }
2055 if (SvLEN(sv) != 0)
2056 Safefree(from_start);
f880fe2f 2057 SvPV_set(sv, to_start);
b162af07
SP
2058 SvCUR_set(sv, to_ptr - to_start);
2059 SvLEN_set(sv, len);
f337b084
TH
2060 SvUTF8_on(sv);
2061}
2062
2063/* Exponential string grower. Makes string extension effectively O(n)
2064 needed says how many extra bytes we need (not counting the final '\0')
2065 Only grows the string if there is an actual lack of space
2066*/
2067STATIC char *
0bd48802 2068S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2069 const STRLEN cur = SvCUR(sv);
2070 const STRLEN len = SvLEN(sv);
f337b084 2071 STRLEN extend;
7918f24d
NC
2072
2073 PERL_ARGS_ASSERT_SV_EXP_GROW;
2074
f337b084
TH
2075 if (len - cur > needed) return SvPVX(sv);
2076 extend = needed > len ? needed : len;
2077 return SvGROW(sv, len+extend+1);
2078}
49704364 2079
93f6e112 2080static SV *
b197e565 2081S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2082{
2083 SvGETMAGIC(sv);
93f6e112
FC
2084 if (UNLIKELY(SvAMAGIC(sv)))
2085 sv = sv_2num(sv);
99f450cc 2086 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2087 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2088 const NV nv = SvNV_nomg(sv);
2089 if (c == 'w')
147e3846 2090 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2091 else
147e3846 2092 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2093 }
93f6e112 2094 return sv;
354b74ae
FC
2095}
2096
93f6e112
FC
2097#define SvIV_no_inf(sv,d) \
2098 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2099#define SvUV_no_inf(sv,d) \
2100 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2101
49704364
WL
2102STATIC
2103SV **
f337b084 2104S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2105{
49704364 2106 tempsym_t lookahead;
e1b825c1 2107 SSize_t items = endlist - beglist;
f337b084
TH
2108 bool found = next_symbol(symptr);
2109 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2110 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2111 char* from;
f337b084 2112
7918f24d
NC
2113 PERL_ARGS_ASSERT_PACK_REC;
2114
f337b084
TH
2115 if (symptr->level == 0 && found && symptr->code == 'U') {
2116 marked_upgrade(aTHX_ cat, symptr);
2117 symptr->flags |= FLAG_DO_UTF8;
2118 utf8 = 0;
49704364 2119 }
f337b084 2120 symptr->strbeg = SvCUR(cat);
49704364
WL
2121
2122 while (found) {
f337b084
TH
2123 SV *fromstr;
2124 STRLEN fromlen;
e1b825c1 2125 SSize_t len;
a0714e2c 2126 SV *lengthcode = NULL;
49704364 2127 I32 datumtype = symptr->code;
f337b084
TH
2128 howlen_t howlen = symptr->howlen;
2129 char *start = SvPVX(cat);
2130 char *cur = start + SvCUR(cat);
a1219b5e 2131 bool needs_swap;
49704364 2132
fc1bb3f2 2133#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2134#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2135
2136 switch (howlen) {
fc241834 2137 case e_star:
f337b084
TH
2138 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2139 0 : items;
2140 break;
2141 default:
2142 /* e_no_len and e_number */
2143 len = symptr->length;
49704364
WL
2144 break;
2145 }
2146
f337b084 2147 if (len) {
a7a3cfaa 2148 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2149
a7a3cfaa
TH
2150 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2151 /* We can process this letter. */
2152 STRLEN size = props & PACK_SIZE_MASK;
f17fed50 2153 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
a7a3cfaa 2154 }
f337b084
TH
2155 }
2156
49704364
WL
2157 /* Look ahead for next symbol. Do we have code/code? */
2158 lookahead = *symptr;
2159 found = next_symbol(&lookahead);
246f24af
TH
2160 if (symptr->flags & FLAG_SLASH) {
2161 IV count;
f337b084 2162 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2163 if (strchr("aAZ", lookahead.code)) {
2164 if (lookahead.howlen == e_number) count = lookahead.length;
2165 else {
ce399ba6 2166 if (items > 0) {
48a5da33 2167 count = sv_len_utf8(*beglist);
ce399ba6 2168 }
246f24af
TH
2169 else count = 0;
2170 if (lookahead.code == 'Z') count++;
2171 }
2172 } else {
2173 if (lookahead.howlen == e_number && lookahead.length < items)
2174 count = lookahead.length;
2175 else count = items;
2176 }
2177 lookahead.howlen = e_number;
2178 lookahead.length = count;
2179 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2180 }
49704364 2181
a1219b5e
NC
2182 needs_swap = NEEDS_SWAP(datumtype);
2183
fc241834
RGS
2184 /* Code inside the switch must take care to properly update
2185 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2186 doesn't simply leave using break */
0c7df902 2187 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2188 default:
f337b084
TH
2189 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2190 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2191 case '%':
49704364 2192 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2193
28be1210 2194 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2195 case '.':
2196 if (howlen == e_star) from = start;
2197 else if (len == 0) from = cur;
2198 else {
2199 tempsym_t *group = symptr;
2200
2201 while (--len && group) group = group->previous;
2202 from = group ? start + group->strbeg : start;
2203 }
2204 fromstr = NEXTFROM;
354b74ae 2205 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2206 goto resize;
28be1210 2207 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2208 case '@':
28be1210
TH
2209 from = start + symptr->strbeg;
2210 resize:
28be1210 2211 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2212 if (len >= 0) {
2213 while (len && from < cur) {
2214 from += UTF8SKIP(from);
2215 len--;
2216 }
2217 if (from > cur)
2218 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2219 if (len) {
2220 /* Here we know from == cur */
2221 grow:
2222 GROWING(0, cat, start, cur, len);
2223 Zero(cur, len, char);
2224 cur += len;
2225 } else if (from < cur) {
2226 len = cur - from;
2227 goto shrink;
2228 } else goto no_change;
2229 } else {
2230 cur = from;
2231 len = -len;
2232 goto utf8_shrink;
f337b084 2233 }
28be1210
TH
2234 else {
2235 len -= cur - from;
f337b084 2236 if (len > 0) goto grow;
28be1210 2237 if (len == 0) goto no_change;
fc241834 2238 len = -len;
28be1210 2239 goto shrink;
f337b084 2240 }
a6ec74c1 2241 break;
81d52ecd 2242
fc241834 2243 case '(': {
49704364 2244 tempsym_t savsym = *symptr;
66c611c5
MHM
2245 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2246 symptr->flags |= group_modifiers;
49704364
WL
2247 symptr->patend = savsym.grpend;
2248 symptr->level++;
f337b084 2249 symptr->previous = &lookahead;
18529408 2250 while (len--) {
f337b084
TH
2251 U32 was_utf8;
2252 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2253 else symptr->flags &= ~FLAG_PARSE_UTF8;
2254 was_utf8 = SvUTF8(cat);
49704364 2255 symptr->patptr = savsym.grpbeg;
f337b084
TH
2256 beglist = pack_rec(cat, symptr, beglist, endlist);
2257 if (SvUTF8(cat) != was_utf8)
2258 /* This had better be an upgrade while in utf8==0 mode */
2259 utf8 = 1;
2260
49704364 2261 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2262 break; /* No way to continue */
2263 }
ee790063 2264 items = endlist - beglist;
f337b084
TH
2265 lookahead.flags = symptr->flags & ~group_modifiers;
2266 goto no_change;
18529408 2267 }
62f95557
IZ
2268 case 'X' | TYPE_IS_SHRIEKING:
2269 if (!len) /* Avoid division by 0 */
2270 len = 1;
f337b084
TH
2271 if (utf8) {
2272 char *hop, *last;
e1b825c1 2273 SSize_t l = len;
f337b084
TH
2274 hop = last = start;
2275 while (hop < cur) {
2276 hop += UTF8SKIP(hop);
2277 if (--l == 0) {
2278 last = hop;
2279 l = len;
2280 }
2281 }
2282 if (last > cur)
2283 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2284 cur = last;
2285 break;
2286 }
2287 len = (cur-start) % len;
924ba076 2288 /* FALLTHROUGH */
a6ec74c1 2289 case 'X':
f337b084
TH
2290 if (utf8) {
2291 if (len < 1) goto no_change;
28be1210 2292 utf8_shrink:
f337b084
TH
2293 while (len > 0) {
2294 if (cur <= start)
28be1210
TH
2295 Perl_croak(aTHX_ "'%c' outside of string in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2297 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2298 if (cur <= start)
28be1210
TH
2299 Perl_croak(aTHX_ "'%c' outside of string in pack",
2300 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2301 }
2302 len--;
2303 }
2304 } else {
fc241834 2305 shrink:
f337b084 2306 if (cur - start < len)
28be1210
TH
2307 Perl_croak(aTHX_ "'%c' outside of string in pack",
2308 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2309 cur -= len;
2310 }
2311 if (cur < start+symptr->strbeg) {
2312 /* Make sure group starts don't point into the void */
2313 tempsym_t *group;
9e27e96a 2314 const STRLEN length = cur-start;
f337b084
TH
2315 for (group = symptr;
2316 group && length < group->strbeg;
2317 group = group->previous) group->strbeg = length;
2318 lookahead.strbeg = length;
2319 }
a6ec74c1 2320 break;
fc241834 2321 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 2322 SSize_t ai32;
62f95557
IZ
2323 if (!len) /* Avoid division by 0 */
2324 len = 1;
230e1fce 2325 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2326 else ai32 = (cur - start) % len;
2327 if (ai32 == 0) goto no_change;
2328 len -= ai32;
2329 }
924ba076 2330 /* FALLTHROUGH */
a6ec74c1 2331 case 'x':
f337b084 2332 goto grow;
a6ec74c1
JH
2333 case 'A':
2334 case 'Z':
f337b084 2335 case 'a': {
f7fe979e 2336 const char *aptr;
f337b084 2337
a6ec74c1 2338 fromstr = NEXTFROM;
e62f0680 2339 aptr = SvPV_const(fromstr, fromlen);
f337b084 2340 if (DO_UTF8(fromstr)) {
f7fe979e 2341 const char *end, *s;
f337b084
TH
2342
2343 if (!utf8 && !SvUTF8(cat)) {
2344 marked_upgrade(aTHX_ cat, symptr);
2345 lookahead.flags |= FLAG_DO_UTF8;
2346 lookahead.strbeg = symptr->strbeg;
2347 utf8 = 1;
2348 start = SvPVX(cat);
2349 cur = start + SvCUR(cat);
2350 }
fc241834 2351 if (howlen == e_star) {
f337b084
TH
2352 if (utf8) goto string_copy;
2353 len = fromlen+1;
2354 }
2355 s = aptr;
2356 end = aptr + fromlen;
2357 fromlen = datumtype == 'Z' ? len-1 : len;
e1b825c1 2358 while ((SSize_t) fromlen > 0 && s < end) {
f337b084
TH
2359 s += UTF8SKIP(s);
2360 fromlen--;
2361 }
2362 if (s > end)
2363 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2364 if (utf8) {
fc241834 2365 len = fromlen;
f337b084
TH
2366 if (datumtype == 'Z') len++;
2367 fromlen = s-aptr;
2368 len += fromlen;
fc241834 2369
f337b084 2370 goto string_copy;
fc241834 2371 }
f337b084
TH
2372 fromlen = len - fromlen;
2373 if (datumtype == 'Z') fromlen--;
2374 if (howlen == e_star) {
2375 len = fromlen;
2376 if (datumtype == 'Z') len++;
fc241834 2377 }
f337b084 2378 GROWING(0, cat, start, cur, len);
9df874cd 2379 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2380 datumtype | TYPE_IS_PACK))
5637ef5b 2381 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
e1b825c1
Z
2382 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2383 (int)datumtype, aptr, end, cur, fromlen);
f337b084 2384 cur += fromlen;
a6ec74c1 2385 len -= fromlen;
f337b084
TH
2386 } else if (utf8) {
2387 if (howlen == e_star) {
2388 len = fromlen;
2389 if (datumtype == 'Z') len++;
a6ec74c1 2390 }
e1b825c1 2391 if (len <= (SSize_t) fromlen) {
f337b084
TH
2392 fromlen = len;
2393 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2394 }
fc241834 2395 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2396 upgrade, so:
2397 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2398 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2399 len -= fromlen;
2400 while (fromlen > 0) {
230e1fce 2401 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2402 aptr++;
2403 fromlen--;
fc241834 2404 }
f337b084
TH
2405 } else {
2406 string_copy:
2407 if (howlen == e_star) {
2408 len = fromlen;
2409 if (datumtype == 'Z') len++;
2410 }
e1b825c1 2411 if (len <= (SSize_t) fromlen) {
f337b084
TH
2412 fromlen = len;
2413 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2414 }
f337b084
TH
2415 GROWING(0, cat, start, cur, len);
2416 Copy(aptr, cur, fromlen, char);
2417 cur += fromlen;
2418 len -= fromlen;
a6ec74c1 2419 }
f337b084
TH
2420 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2421 cur += len;
3c4fb04a 2422 SvTAINT(cat);
a6ec74c1 2423 break;
f337b084 2424 }
a6ec74c1 2425 case 'B':
f337b084 2426 case 'b': {
b83604b4 2427 const char *str, *end;
e1b825c1 2428 SSize_t l, field_len;
f337b084
TH
2429 U8 bits;
2430 bool utf8_source;
2431 U32 utf8_flags;
a6ec74c1 2432
fc241834 2433 fromstr = NEXTFROM;
b83604b4 2434 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2435 end = str + fromlen;
2436 if (DO_UTF8(fromstr)) {
2437 utf8_source = TRUE;
041457d9 2438 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2439 } else {
2440 utf8_source = FALSE;
2441 utf8_flags = 0; /* Unused, but keep compilers happy */
2442 }
2443 if (howlen == e_star) len = fromlen;
2444 field_len = (len+7)/8;
2445 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2446 if (len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2447 bits = 0;
2448 l = 0;
2449 if (datumtype == 'B')
2450 while (l++ < len) {
2451 if (utf8_source) {
95b63a38 2452 UV val = 0;
f337b084
TH
2453 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2454 bits |= val & 1;
2455 } else bits |= *str++ & 1;
2456 if (l & 7) bits <<= 1;
fc241834 2457 else {
f337b084
TH
2458 PUSH_BYTE(utf8, cur, bits);
2459 bits = 0;
a6ec74c1
JH
2460 }
2461 }
f337b084
TH
2462 else
2463 /* datumtype == 'b' */
2464 while (l++ < len) {
2465 if (utf8_source) {
95b63a38 2466 UV val = 0;
f337b084
TH
2467 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2468 if (val & 1) bits |= 0x80;
2469 } else if (*str++ & 1)
2470 bits |= 0x80;
2471 if (l & 7) bits >>= 1;
fc241834 2472 else {
f337b084
TH
2473 PUSH_BYTE(utf8, cur, bits);
2474 bits = 0;
a6ec74c1
JH
2475 }
2476 }
f337b084
TH
2477 l--;
2478 if (l & 7) {
fc241834 2479 if (datumtype == 'B')
f337b084 2480 bits <<= 7 - (l & 7);
fc241834 2481 else
f337b084
TH
2482 bits >>= 7 - (l & 7);
2483 PUSH_BYTE(utf8, cur, bits);
2484 l += 7;
a6ec74c1 2485 }
f337b084
TH
2486 /* Determine how many chars are left in the requested field */
2487 l /= 8;
2488 if (howlen == e_star) field_len = 0;
2489 else field_len -= l;
2490 Zero(cur, field_len, char);
2491 cur += field_len;
a6ec74c1 2492 break;
f337b084 2493 }
a6ec74c1 2494 case 'H':
f337b084 2495 case 'h': {
10516c54 2496 const char *str, *end;
e1b825c1 2497 SSize_t l, field_len;
f337b084
TH
2498 U8 bits;
2499 bool utf8_source;
2500 U32 utf8_flags;
a6ec74c1 2501
fc241834 2502 fromstr = NEXTFROM;
10516c54 2503 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2504 end = str + fromlen;
2505 if (DO_UTF8(fromstr)) {
2506 utf8_source = TRUE;
041457d9 2507 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2508 } else {
2509 utf8_source = FALSE;
2510 utf8_flags = 0; /* Unused, but keep compilers happy */
2511 }
2512 if (howlen == e_star) len = fromlen;
2513 field_len = (len+1)/2;
2514 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2515 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2516 bits = 0;
2517 l = 0;
2518 if (datumtype == 'H')
2519 while (l++ < len) {
2520 if (utf8_source) {
95b63a38 2521 UV val = 0;
f337b084
TH
2522 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2523 if (val < 256 && isALPHA(val))
2524 bits |= (val + 9) & 0xf;
a6ec74c1 2525 else
f337b084
TH
2526 bits |= val & 0xf;
2527 } else if (isALPHA(*str))
2528 bits |= (*str++ + 9) & 0xf;
2529 else
2530 bits |= *str++ & 0xf;
2531 if (l & 1) bits <<= 4;
fc241834 2532 else {
f337b084
TH
2533 PUSH_BYTE(utf8, cur, bits);
2534 bits = 0;
a6ec74c1
JH
2535 }
2536 }
f337b084
TH
2537 else
2538 while (l++ < len) {
2539 if (utf8_source) {
95b63a38 2540 UV val = 0;
f337b084
TH
2541 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2542 if (val < 256 && isALPHA(val))
2543 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2544 else
f337b084
TH
2545 bits |= (val & 0xf) << 4;
2546 } else if (isALPHA(*str))
2547 bits |= ((*str++ + 9) & 0xf) << 4;
2548 else
2549 bits |= (*str++ & 0xf) << 4;
2550 if (l & 1) bits >>= 4;
fc241834 2551 else {
f337b084
TH
2552 PUSH_BYTE(utf8, cur, bits);
2553 bits = 0;
a6ec74c1 2554 }
fc241834 2555 }
f337b084
TH
2556 l--;
2557 if (l & 1) {
2558 PUSH_BYTE(utf8, cur, bits);
2559 l++;
2560 }
2561 /* Determine how many chars are left in the requested field */
2562 l /= 2;
2563 if (howlen == e_star) field_len = 0;
2564 else field_len -= l;
2565 Zero(cur, field_len, char);
2566 cur += field_len;
2567 break;
fc241834
RGS
2568 }
2569 case 'c':
f337b084
TH
2570 while (len-- > 0) {
2571 IV aiv;
2572 fromstr = NEXTFROM;
354b74ae 2573 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2574 if ((-128 > aiv || aiv > 127))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'c' format wrapped in pack");
585ec06d 2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2578 }
2579 break;
2580 case 'C':
f337b084
TH
2581 if (len == 0) {
2582 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2583 break;
2584 }
a6ec74c1 2585 while (len-- > 0) {
f337b084 2586 IV aiv;
a6ec74c1 2587 fromstr = NEXTFROM;
354b74ae 2588 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2589 if ((0 > aiv || aiv > 0xff))
2590 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2591 "Character in 'C' format wrapped in pack");
1651fc44 2592 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2593 }
fc241834
RGS
2594 break;
2595 case 'W': {
2596 char *end;
670f1322 2597 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2598
2599 end = start+SvLEN(cat)-1;
2600 if (utf8) end -= UTF8_MAXLEN-1;
2601 while (len-- > 0) {
2602 UV auv;
2603 fromstr = NEXTFROM;
354b74ae 2604 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2605 if (in_bytes) auv = auv % 0x100;
2606 if (utf8) {
2607 W_utf8:
bf4a926a 2608 if (cur >= end) {
fc241834 2609 *cur = '\0';
b162af07 2610 SvCUR_set(cat, cur - start);
fc241834
RGS
2611
2612 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2613 end = start+SvLEN(cat)-UTF8_MAXLEN;
2614 }
05fefba9 2615 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2616 } else {
2617 if (auv >= 0x100) {
2618 if (!SvUTF8(cat)) {
2619 *cur = '\0';
b162af07 2620 SvCUR_set(cat, cur - start);
fc241834
RGS
2621 marked_upgrade(aTHX_ cat, symptr);
2622 lookahead.flags |= FLAG_DO_UTF8;
2623 lookahead.strbeg = symptr->strbeg;
2624 utf8 = 1;
2625 start = SvPVX(cat);
2626 cur = start + SvCUR(cat);
2627 end = start+SvLEN(cat)-UTF8_MAXLEN;
2628 goto W_utf8;
2629 }
a2a5de95
NC
2630 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2631 "Character in 'W' format wrapped in pack");
fc241834
RGS
2632 auv &= 0xff;
2633 }
2634 if (cur >= end) {
2635 *cur = '\0';
b162af07 2636 SvCUR_set(cat, cur - start);
fc241834
RGS
2637 GROWING(0, cat, start, cur, len+1);
2638 end = start+SvLEN(cat)-1;
2639 }
fe2774ed 2640 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2641 }
2642 }
2643 break;
fc241834
RGS
2644 }
2645 case 'U': {
2646 char *end;
2647
2648 if (len == 0) {
2649 if (!(symptr->flags & FLAG_DO_UTF8)) {
2650 marked_upgrade(aTHX_ cat, symptr);
2651 lookahead.flags |= FLAG_DO_UTF8;
2652 lookahead.strbeg = symptr->strbeg;
2653 }
2654 utf8 = 0;
2655 goto no_change;
2656 }
2657
2658 end = start+SvLEN(cat);
2659 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2660 while (len-- > 0) {
fc241834 2661 UV auv;
a6ec74c1 2662 fromstr = NEXTFROM;
354b74ae 2663 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2664 if (utf8) {
81b3f640 2665 U8 buffer[UTF8_MAXLEN+1], *endb;
05fefba9 2666 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2667 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2668 *cur = '\0';
b162af07 2669 SvCUR_set(cat, cur - start);
fc241834
RGS
2670 GROWING(0, cat, start, cur,
2671 len+(endb-buffer)*UTF8_EXPAND);
2672 end = start+SvLEN(cat);
2673 }
d21d7215 2674 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2675 } else {
2676 if (cur >= end) {
2677 *cur = '\0';
b162af07 2678 SvCUR_set(cat, cur - start);
fc241834
RGS
2679 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2680 end = start+SvLEN(cat)-UTF8_MAXLEN;
2681 }
05fefba9
KW
2682 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2683 UNI_TO_NATIVE(auv),
2684 0);
fc241834 2685 }
a6ec74c1 2686 }
a6ec74c1 2687 break;
fc241834 2688 }
a6ec74c1
JH
2689 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2690 case 'f':
a6ec74c1 2691 while (len-- > 0) {
f337b084
TH
2692 float afloat;
2693 NV anv;
a6ec74c1 2694 fromstr = NEXTFROM;
f337b084 2695 anv = SvNV(fromstr);
a7157111 2696# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2697 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2698 * on Alpha; fake it if we don't have them.
2699 */
f337b084 2700 if (anv > FLT_MAX)
fc241834 2701 afloat = FLT_MAX;
f337b084 2702 else if (anv < -FLT_MAX)
fc241834 2703 afloat = -FLT_MAX;
f337b084 2704 else afloat = (float)anv;
baf3cf9c 2705# else
a7157111 2706# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2707 if(Perl_isnan(anv))
2708 afloat = (float)NV_NAN;
2709 else
a7157111
JH
2710# endif
2711# ifdef NV_INF
919894b7
DM
2712 /* a simple cast to float is undefined if outside
2713 * the range of values that can be represented */
2714 afloat = (float)(anv > FLT_MAX ? NV_INF :
2715 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2716# endif
baf3cf9c 2717# endif
3a88beaa 2718 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2719 }
2720 break;
2721 case 'd':
a6ec74c1 2722 while (len-- > 0) {
f337b084
TH
2723 double adouble;
2724 NV anv;
a6ec74c1 2725 fromstr = NEXTFROM;
f337b084 2726 anv = SvNV(fromstr);
a7157111 2727# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2728 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2729 * on Alpha; fake it if we don't have them.
2730 */
f337b084 2731 if (anv > DBL_MAX)
fc241834 2732 adouble = DBL_MAX;
f337b084 2733 else if (anv < -DBL_MAX)
fc241834 2734 adouble = -DBL_MAX;
f337b084 2735 else adouble = (double)anv;
baf3cf9c 2736# else
f337b084 2737 adouble = (double)anv;
baf3cf9c 2738# endif
3a88beaa 2739 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2740 }
2741 break;
fc241834 2742 case 'F': {
275663fa 2743 NV_bytes anv;
1109a392 2744 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2745 while (len-- > 0) {
2746 fromstr = NEXTFROM;
cd07c537
DM
2747#ifdef __GNUC__
2748 /* to work round a gcc/x86 bug; don't use SvNV */
2749 anv.nv = sv_2nv(fromstr);
070e2677
TC
2750# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2751 && LONG_DOUBLESIZE > 10
2752 /* GCC sometimes overwrites the padding in the
2753 assignment above */
2754 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2755# endif
cd07c537 2756#else
275663fa 2757 anv.nv = SvNV(fromstr);
cd07c537 2758#endif
3a88beaa 2759 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2760 }
2761 break;
fc241834 2762 }
92d41999 2763#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2764 case 'D': {
275663fa 2765 ld_bytes aldouble;
1109a392
MHM
2766 /* long doubles can have unused bits, which may be nonzero */
2767 Zero(&aldouble, 1, long double);
92d41999
JH
2768 while (len-- > 0) {
2769 fromstr = NEXTFROM;
cd07c537
DM
2770# ifdef __GNUC__
2771 /* to work round a gcc/x86 bug; don't use SvNV */
2772 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2773# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2774 /* GCC sometimes overwrites the padding in the
2775 assignment above */
2776 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2777# endif
cd07c537 2778# else
275663fa 2779 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2780# endif
3a88beaa
NC
2781 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2782 needs_swap);
92d41999
JH
2783 }
2784 break;
fc241834 2785 }
92d41999 2786#endif
068bd2e7 2787 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2788 case 'n':
2789 while (len-- > 0) {
f337b084 2790 I16 ai16;
a6ec74c1 2791 fromstr = NEXTFROM;
354b74ae 2792 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2793 ai16 = PerlSock_htons(ai16);
3a88beaa 2794 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2795 }
2796 break;
068bd2e7 2797 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2798 case 'v':
2799 while (len-- > 0) {
f337b084 2800 I16 ai16;
a6ec74c1 2801 fromstr = NEXTFROM;
354b74ae 2802 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2803 ai16 = htovs(ai16);
3a88beaa 2804 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2805 }
2806 break;
49704364 2807 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2808#if SHORTSIZE != SIZE16
fc241834 2809 while (len-- > 0) {
f337b084 2810 unsigned short aushort;
fc241834 2811 fromstr = NEXTFROM;
354b74ae 2812 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2813 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2814 }
49704364
WL
2815 break;
2816#else
924ba076 2817 /* FALLTHROUGH */
a6ec74c1 2818#endif
49704364 2819 case 'S':
fc241834 2820 while (len-- > 0) {
f337b084 2821 U16 au16;
fc241834 2822 fromstr = NEXTFROM;
354b74ae 2823 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2824 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2825 }
2826 break;
49704364 2827 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2828#if SHORTSIZE != SIZE16
fc241834 2829 while (len-- > 0) {
f337b084 2830 short ashort;
fc241834 2831 fromstr = NEXTFROM;
354b74ae 2832 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2833 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2834 }
49704364
WL
2835 break;
2836#else
924ba076 2837 /* FALLTHROUGH */
a6ec74c1 2838#endif
49704364
WL
2839 case 's':
2840 while (len-- > 0) {
f337b084 2841 I16 ai16;
49704364 2842 fromstr = NEXTFROM;
354b74ae 2843 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2844 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2845 }
2846 break;
2847 case 'I':
49704364 2848 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2849 while (len-- > 0) {
f337b084 2850 unsigned int auint;
a6ec74c1 2851 fromstr = NEXTFROM;
354b74ae 2852 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2853 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2854 }
2855 break;
92d41999
JH
2856 case 'j':
2857 while (len-- > 0) {
f337b084 2858 IV aiv;
92d41999 2859 fromstr = NEXTFROM;
354b74ae 2860 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2861 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2862 }
2863 break;
2864 case 'J':
2865 while (len-- > 0) {
f337b084 2866 UV auv;
92d41999 2867 fromstr = NEXTFROM;
354b74ae 2868 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2869 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2870 }
2871 break;
a6ec74c1
JH
2872 case 'w':
2873 while (len-- > 0) {
f337b084 2874 NV anv;
a6ec74c1 2875 fromstr = NEXTFROM;
83388ae2 2876 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2877 anv = SvNV_nomg(fromstr);
a6ec74c1 2878
f337b084
TH
2879 if (anv < 0) {
2880 *cur = '\0';
b162af07 2881 SvCUR_set(cat, cur - start);
49704364 2882 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2883 }
a6ec74c1 2884
196b62db
NC
2885 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2886 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2887 any negative IVs will have already been got by the croak()
2888 above. IOK is untrue for fractions, so we test them
2889 against UV_MAX_P1. */
f337b084
TH
2890 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2891 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2892 char *in = buf + sizeof(buf);
1e9a122e 2893 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2894
2895 do {
eb160463 2896 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2897 auv >>= 7;
2898 } while (auv);
2899 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2900 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2901 in, (buf + sizeof(buf)) - in);
2902 } else if (SvPOKp(fromstr))
2903 goto w_string;
a6ec74c1 2904 else if (SvNOKp(fromstr)) {
0258719b 2905 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2906 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2907 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2908 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2909 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2910 Some C compilers are strict about integral constant
2911 expressions so we conservatively divide by a slightly
2912 smaller integer instead of multiplying by the exact
2913 floating-point value.
0258719b
NC
2914 */
2915#ifdef NV_MAX_10_EXP
f337b084 2916 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2917 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2918#else
f337b084 2919 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2920 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2921#endif
a6ec74c1
JH
2922 char *in = buf + sizeof(buf);
2923
8b6e33c7 2924 anv = Perl_floor(anv);
a6ec74c1 2925 do {
8b6e33c7 2926 const NV next = Perl_floor(anv / 128);
a6ec74c1 2927 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2928 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2929 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2930 anv = next;
2931 } while (anv > 0);
a6ec74c1 2932 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2933 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2934 in, (buf + sizeof(buf)) - in);
2935 } else {
8b6e33c7
AL
2936 const char *from;
2937 char *result, *in;
735b914b
JH
2938 SV *norm;
2939 STRLEN len;
2940 bool done;
2941
f337b084 2942 w_string:
735b914b 2943 /* Copy string and check for compliance */
1e9a122e 2944 from = SvPV_nomg_const(fromstr, len);
735b914b 2945 if ((norm = is_an_int(from, len)) == NULL)
49704364 2946 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2947
a02a5408 2948 Newx(result, len, char);
735b914b
JH
2949 in = result + len;
2950 done = FALSE;
f337b084 2951 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2952 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2953 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2954 in, (result + len) - in);
735b914b
JH
2955 Safefree(result);
2956 SvREFCNT_dec(norm); /* free norm */
fc241834 2957 }
a6ec74c1
JH
2958 }
2959 break;
2960 case 'i':
49704364 2961 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2962 while (len-- > 0) {
f337b084 2963 int aint;
a6ec74c1 2964 fromstr = NEXTFROM;
354b74ae 2965 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2966 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2967 }
2968 break;
068bd2e7 2969 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2970 case 'N':
2971 while (len-- > 0) {
f337b084 2972 U32 au32;
a6ec74c1 2973 fromstr = NEXTFROM;
354b74ae 2974 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2975 au32 = PerlSock_htonl(au32);
3a88beaa 2976 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2977 }
2978 break;
068bd2e7 2979 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2980 case 'V':
2981 while (len-- > 0) {
f337b084 2982 U32 au32;
a6ec74c1 2983 fromstr = NEXTFROM;
354b74ae 2984 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2985 au32 = htovl(au32);
3a88beaa 2986 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2987 }
2988 break;
49704364 2989 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2990#if LONGSIZE != SIZE32
fc241834 2991 while (len-- > 0) {
f337b084 2992 unsigned long aulong;
fc241834 2993 fromstr = NEXTFROM;
354b74ae 2994 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2995 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2996 }
49704364
WL
2997 break;
2998#else
2999 /* Fall though! */
a6ec74c1 3000#endif
49704364 3001 case 'L':
fc241834 3002 while (len-- > 0) {
f337b084 3003 U32 au32;
fc241834 3004 fromstr = NEXTFROM;
354b74ae 3005 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3006 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
3007 }
3008 break;
49704364 3009 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3010#if LONGSIZE != SIZE32
fc241834 3011 while (len-- > 0) {
f337b084 3012 long along;
fc241834 3013 fromstr = NEXTFROM;
354b74ae 3014 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3015 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3016 }
49704364
WL
3017 break;
3018#else
3019 /* Fall though! */
a6ec74c1 3020#endif
49704364
WL
3021 case 'l':
3022 while (len-- > 0) {
f337b084 3023 I32 ai32;
49704364 3024 fromstr = NEXTFROM;
354b74ae 3025 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3026 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3027 }
3028 break;
c174bf3b 3029#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3030 case 'Q':
3031 while (len-- > 0) {
f337b084 3032 Uquad_t auquad;
a6ec74c1 3033 fromstr = NEXTFROM;
354b74ae 3034 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3035 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3036 }
3037 break;
3038 case 'q':
3039 while (len-- > 0) {
f337b084 3040 Quad_t aquad;
a6ec74c1 3041 fromstr = NEXTFROM;
354b74ae 3042 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3043 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3044 }
3045 break;
1640b983 3046#endif
a6ec74c1
JH
3047 case 'P':
3048 len = 1; /* assume SV is correct length */
f337b084 3049 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3050 /* FALLTHROUGH */
a6ec74c1
JH
3051 case 'p':
3052 while (len-- > 0) {
83003860 3053 const char *aptr;
f337b084 3054
a6ec74c1 3055 fromstr = NEXTFROM;
28a4f200
TH
3056 SvGETMAGIC(fromstr);
3057 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3058 else {
a6ec74c1
JH
3059 /* XXX better yet, could spirit away the string to
3060 * a safe spot and hang on to it until the result
3061 * of pack() (and all copies of the result) are
3062 * gone.
3063 */
b199a398
FC
3064 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3065 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3066 !SvREADONLY(fromstr)))) {
3067 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3068 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3069 }
3070 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3071 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3072 else
2596d9fe 3073 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3074 }
3a88beaa 3075 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3076 }
3077 break;
fc241834 3078 case 'u': {
f7fe979e 3079 const char *aptr, *aend;
fc241834 3080 bool from_utf8;
f337b084 3081
a6ec74c1 3082 fromstr = NEXTFROM;
fc241834
RGS
3083 if (len <= 2) len = 45;
3084 else len = len / 3 * 3;
3085 if (len >= 64) {
a2a5de95
NC
3086 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3087 "Field too wide in 'u' format in pack");
fc241834
RGS
3088 len = 63;
3089 }
83003860 3090 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3091 from_utf8 = DO_UTF8(fromstr);
3092 if (from_utf8) {
3093 aend = aptr + fromlen;
3f63b0e5 3094 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3095 } else aend = NULL; /* Unused, but keep compilers happy */
3096 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3097 while (fromlen > 0) {
fc241834 3098 U8 *end;
e1b825c1 3099 SSize_t todo;
fc241834 3100 U8 hunk[1+63/3*4+1];
a6ec74c1 3101
e1b825c1 3102 if ((SSize_t)fromlen > len)
a6ec74c1
JH
3103 todo = len;
3104 else
3105 todo = fromlen;
fc241834
RGS
3106 if (from_utf8) {
3107 char buffer[64];
9df874cd 3108 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3109 'u' | TYPE_IS_PACK)) {
3110 *cur = '\0';
b162af07 3111 SvCUR_set(cat, cur - start);
5637ef5b 3112 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
e1b825c1
Z
3113 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3114 aptr, aend, buffer, todo);
fc241834 3115 }
e68aed92 3116 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3117 } else {
e68aed92 3118 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3119 aptr += todo;
3120 }
3a88beaa 3121 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3122 fromlen -= todo;
3123 }
a6ec74c1
JH
3124 break;
3125 }
f337b084
TH
3126 }
3127 *cur = '\0';
b162af07 3128 SvCUR_set(cat, cur - start);
f337b084 3129 no_change:
49704364 3130 *symptr = lookahead;
a6ec74c1 3131 }
49704364 3132 return beglist;
18529408
IZ
3133}
3134#undef NEXTFROM
3135
3136
3137PP(pp_pack)
3138{
20b7effb 3139 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3140 SV *cat = TARG;
18529408 3141 STRLEN fromlen;
349d4f2f 3142 SV *pat_sv = *++MARK;
eb578fdb
KW
3143 const char *pat = SvPV_const(pat_sv, fromlen);
3144 const char *patend = pat + fromlen;
18529408
IZ
3145
3146 MARK++;
e8f7c79f 3147 SvPVCLEAR(cat);
f337b084 3148 SvUTF8_off(cat);
18529408 3149
7accc089 3150 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3151
fd879d93
KW
3152 if (SvUTF8(cat)) {
3153 STRLEN result_len;
3154 const char * result = SvPV_nomg(cat, result_len);
3155 const U8 * error_pos;
3156
3157 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3158 _force_out_malformed_utf8_message(error_pos,
3159 (U8 *) result + result_len,
3160 0, /* no flags */
3161 1 /* Die */
3162 );
3163 NOT_REACHED; /* NOTREACHED */
3164 }
3165 }
3166
a6ec74c1
JH
3167 SvSETMAGIC(cat);
3168 SP = ORIGMARK;
3169 PUSHs(cat);
3170 RETURN;
3171}
a6ec74c1 3172
73cb7263 3173/*
14d04a33 3174 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3175 */