This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pack(): avoid << of negative values
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
f337b084 154 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
LW
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e
NC
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
78d46eaa 244#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
246#define PACK_SIZE_MASK 0x3F
247
298bc19c 248#include "packsizetables.c"
78d46eaa 249
20aa3a38
NC
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
08ca2aa3 258STATIC U8
f7fe979e 259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 260{
08ca2aa3 261 STRLEN retlen;
0bcc34c2 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 264 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 270 if (val >= 0x100) {
a2a5de95
NC
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
274 val &= 0xff;
275 }
276 *s += retlen;
fe2774ed 277 return (U8)val;
08ca2aa3
TH
278}
279
f337b084
TH
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
282 *(U8 *)(s)++)
283
284STATIC bool
f7fe979e 285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
286{
287 UV val;
288 STRLEN retlen;
f7fe979e 289 const char *from = *s;
08ca2aa3 290 int bad = 0;
f7fe979e 291 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
228e69a7 295 if (UNLIKELY(needs_swap))
20aa3a38
NC
296 buf += buf_len;
297
08ca2aa3
TH
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
f337b084 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
228e69a7 309 if (UNLIKELY(needs_swap))
20aa3a38
NC
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
f7fe979e 318 const char *ptr;
9e27e96a 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
c80e42f3 322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
323 }
324 if (from > end) from = end;
325 }
a2a5de95
NC
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 328 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
f7fe979e 338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 339{
08ca2aa3 340 STRLEN retlen;
0bcc34c2 341 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
342 if (val >= 0x100 || !ISUUCHAR(val) ||
343 retlen == (STRLEN) -1 || retlen == 0) {
344 *out = 0;
345 return FALSE;
346 }
347 *out = PL_uudmap[val] & 077;
f337b084 348 *s += retlen;
08ca2aa3
TH
349 return TRUE;
350}
78d46eaa 351
64844641 352STATIC char *
3a88beaa 353S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
7918f24d
NC
354 PERL_ARGS_ASSERT_BYTES_TO_UNI;
355
228e69a7 356 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
357 const U8 *p = start + len;
358 while (p-- > start) {
55d09dc8 359 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
360 }
361 } else {
362 const U8 * const end = start + len;
363 while (start < end) {
55d09dc8 364 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
365 start++;
366 }
f337b084 367 }
64844641 368 return dest;
f337b084
TH
369}
370
3a88beaa 371#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 372STMT_START { \
228e69a7 373 if (UNLIKELY(utf8)) \
3a88beaa 374 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
230e1fce 375 else { \
228e69a7 376 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
377 S_reverse_copy((char *)(buf), cur, len); \
378 else \
379 Copy(buf, cur, len, char); \
230e1fce
NC
380 (cur) += (len); \
381 } \
f337b084
TH
382} STMT_END
383
384#define GROWING(utf8, cat, start, cur, in_len) \
385STMT_START { \
386 STRLEN glen = (in_len); \
3473cf63 387 if (utf8) glen *= UTF8_EXPAND; \
f337b084 388 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 389 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
390 (cur) = (start) + SvCUR(cat); \
391 } \
392} STMT_END
393
394#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
395STMT_START { \
f7fe979e 396 const STRLEN glen = (in_len); \
f337b084 397 STRLEN gl = glen; \
3473cf63 398 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
399 if ((cur) + gl >= (start) + SvLEN(cat)) { \
400 *cur = '\0'; \
b162af07 401 SvCUR_set((cat), (cur) - (start)); \
0bd48802 402 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
403 (cur) = (start) + SvCUR(cat); \
404 } \
3a88beaa 405 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
406} STMT_END
407
408#define PUSH_BYTE(utf8, s, byte) \
409STMT_START { \
410 if (utf8) { \
f7fe979e 411 const U8 au8 = (byte); \
3a88beaa 412 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
f337b084
TH
413 } else *(U8 *)(s)++ = (byte); \
414} STMT_END
415
416/* Only to be used inside a loop (see the break) */
417#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
418STMT_START { \
419 STRLEN retlen; \
420 if (str >= end) break; \
421 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
422 if (retlen == (STRLEN) -1 || retlen == 0) { \
423 *cur = '\0'; \
424 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
425 } \
426 str += retlen; \
427} STMT_END
428
f7fe979e
AL
429static const char *_action( const tempsym_t* symptr )
430{
10edeb5d 431 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
432}
433
206947d2 434/* Returns the sizeof() struct described by pat */
028d1f6d 435STATIC I32
f337b084 436S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 437{
f337b084 438 I32 total = 0;
206947d2 439
7918f24d
NC
440 PERL_ARGS_ASSERT_MEASURE_STRUCT;
441
49704364 442 while (next_symbol(symptr)) {
f337b084 443 I32 len;
f7fe979e 444 int size;
f337b084
TH
445
446 switch (symptr->howlen) {
fc241834 447 case e_star:
49704364 448 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 449 _action( symptr ) );
81d52ecd 450
f337b084
TH
451 default:
452 /* e_no_len and e_number */
453 len = symptr->length;
454 break;
49704364
LW
455 }
456
a7a3cfaa 457 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 458 if (!size) {
f7fe979e 459 int star;
80a13697
NC
460 /* endianness doesn't influence the size of a type */
461 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
462 default:
463 Perl_croak(aTHX_ "Invalid type '%c' in %s",
464 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 465 _action( symptr ) );
28be1210
TH
466 case '.' | TYPE_IS_SHRIEKING:
467 case '@' | TYPE_IS_SHRIEKING:
80a13697 468 case '@':
28be1210 469 case '.':
80a13697
NC
470 case '/':
471 case 'U': /* XXXX Is it correct? */
472 case 'w':
473 case 'u':
474 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 475 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 476 _action( symptr ) );
80a13697
NC
477 case '%':
478 size = 0;
479 break;
480 case '(':
fc241834
RGS
481 {
482 tempsym_t savsym = *symptr;
483 symptr->patptr = savsym.grpbeg;
484 symptr->patend = savsym.grpend;
485 /* XXXX Theoretically, we need to measure many times at
486 different positions, since the subexpression may contain
487 alignment commands, but be not of aligned length.
488 Need to detect this and croak(). */
489 size = measure_struct(symptr);
490 *symptr = savsym;
491 break;
492 }
80a13697
NC
493 case 'X' | TYPE_IS_SHRIEKING:
494 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
495 */
496 if (!len) /* Avoid division by 0 */
497 len = 1;
498 len = total % len; /* Assumed: the start is aligned. */
924ba076 499 /* FALLTHROUGH */
80a13697
NC
500 case 'X':
501 size = -1;
502 if (total < len)
f7fe979e 503 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
504 break;
505 case 'x' | TYPE_IS_SHRIEKING:
506 if (!len) /* Avoid division by 0 */
507 len = 1;
508 star = total % len; /* Assumed: the start is aligned. */
509 if (star) /* Other portable ways? */
510 len = len - star;
511 else
512 len = 0;
924ba076 513 /* FALLTHROUGH */
80a13697
NC
514 case 'x':
515 case 'A':
516 case 'Z':
517 case 'a':
80a13697
NC
518 size = 1;
519 break;
520 case 'B':
521 case 'b':
522 len = (len + 7)/8;
523 size = 1;
524 break;
525 case 'H':
526 case 'h':
527 len = (len + 1)/2;
528 size = 1;
529 break;
78d46eaa 530
80a13697
NC
531 case 'P':
532 len = 1;
533 size = sizeof(char*);
78d46eaa
NC
534 break;
535 }
206947d2
IZ
536 }
537 total += len * size;
538 }
539 return total;
540}
541
49704364
LW
542
543/* locate matching closing parenthesis or bracket
544 * returns char pointer to char after match, or NULL
545 */
f7fe979e 546STATIC const char *
5aaab254 547S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 548{
7918f24d
NC
549 PERL_ARGS_ASSERT_GROUP_END;
550
49704364 551 while (patptr < patend) {
f7fe979e 552 const char c = *patptr++;
49704364
LW
553
554 if (isSPACE(c))
555 continue;
556 else if (c == ender)
557 return patptr-1;
558 else if (c == '#') {
559 while (patptr < patend && *patptr != '\n')
560 patptr++;
561 continue;
562 } else if (c == '(')
563 patptr = group_end(patptr, patend, ')') + 1;
564 else if (c == '[')
565 patptr = group_end(patptr, patend, ']') + 1;
18529408 566 }
49704364
LW
567 Perl_croak(aTHX_ "No group ending character '%c' found in template",
568 ender);
a25b5927 569 NOT_REACHED; /* NOTREACHED */
18529408
IZ
570}
571
49704364
LW
572
573/* Convert unsigned decimal number to binary.
574 * Expects a pointer to the first digit and address of length variable
575 * Advances char pointer to 1st non-digit char and returns number
fc241834 576 */
f7fe979e 577STATIC const char *
5aaab254 578S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
579{
580 I32 len = *patptr++ - '0';
7918f24d
NC
581
582 PERL_ARGS_ASSERT_GET_NUM;
583
49704364
LW
584 while (isDIGIT(*patptr)) {
585 if (len >= 0x7FFFFFFF/10)
586 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
587 len = (len * 10) + (*patptr++ - '0');
588 }
589 *lenptr = len;
590 return patptr;
591}
592
593/* The marvellous template parsing routine: Using state stored in *symptr,
594 * locates next template code and count
595 */
596STATIC bool
f337b084 597S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 598{
f7fe979e 599 const char* patptr = symptr->patptr;
0bcc34c2 600 const char* const patend = symptr->patend;
49704364 601
7918f24d
NC
602 PERL_ARGS_ASSERT_NEXT_SYMBOL;
603
49704364
LW
604 symptr->flags &= ~FLAG_SLASH;
605
606 while (patptr < patend) {
607 if (isSPACE(*patptr))
608 patptr++;
609 else if (*patptr == '#') {
610 patptr++;
611 while (patptr < patend && *patptr != '\n')
612 patptr++;
613 if (patptr < patend)
614 patptr++;
615 } else {
fc241834 616 /* We should have found a template code */
49704364 617 I32 code = *patptr++ & 0xFF;
66c611c5 618 U32 inherited_modifiers = 0;
49704364
LW
619
620 if (code == ','){ /* grandfather in commas but with a warning */
621 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
622 symptr->flags |= FLAG_COMMA;
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 624 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
625 }
626 continue;
627 }
fc241834 628
49704364 629 /* for '(', skip to ')' */
fc241834 630 if (code == '(') {
49704364
LW
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 633 _action( symptr ) );
49704364
LW
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 638 _action( symptr ) );
49704364
LW
639 }
640
66c611c5
MHM
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
645 }
646
1109a392
MHM
647 /* look for modifiers */
648 while (patptr < patend) {
b7787f18
AL
649 const char *allowed;
650 I32 modifier;
1109a392
MHM
651 switch (*patptr) {
652 case '!':
653 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 654 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
655 break;
656 case '>':
657 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 658 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
659 break;
660 case '<':
661 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 662 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
663 break;
664 default:
b7787f18
AL
665 allowed = "";
666 modifier = 0;
1109a392
MHM
667 break;
668 }
66c611c5 669
1109a392
MHM
670 if (modifier == 0)
671 break;
66c611c5 672
1109a392
MHM
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 675 allowed, _action( symptr ) );
66c611c5
MHM
676
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 683 *patptr, _action( symptr ) );
66c611c5 684
a2a5de95
NC
685 if ((code & modifier)) {
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
689 _action( symptr ) );
1109a392 690 }
66c611c5 691
1109a392
MHM
692 code |= modifier;
693 patptr++;
49704364
LW
694 }
695
66c611c5
MHM
696 /* inherit modifiers */
697 code |= inherited_modifiers;
698
fc241834 699 /* look for count and/or / */
49704364
LW
700 if (patptr < patend) {
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
703 symptr->howlen = e_number;
704
705 } else if (*patptr == '*') {
706 patptr++;
707 symptr->howlen = e_star;
708
709 } else if (*patptr == '[') {
f7fe979e 710 const char* lenptr = ++patptr;
49704364
LW
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
716 if( *lenptr != ']' )
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 718 _action( symptr ) );
49704364
LW
719 } else {
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
724 *symptr = savsym;
725 }
726 } else {
727 symptr->howlen = e_no_len;
728 symptr->length = 1;
729 }
730
731 /* try to find / */
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
734 patptr++;
735 else if (*patptr == '#') {
736 patptr++;
737 while (patptr < patend && *patptr != '\n')
738 patptr++;
739 if (patptr < patend)
740 patptr++;
741 } else {
66c611c5 742 if (*patptr == '/') {
49704364
LW
743 symptr->flags |= FLAG_SLASH;
744 patptr++;
66c611c5
MHM
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 748 _action( symptr ) );
49704364
LW
749 }
750 break;
751 }
18529408 752 }
49704364
LW
753 } else {
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
756 symptr->length = 1;
757 }
758
759 symptr->code = code;
fc241834 760 symptr->patptr = patptr;
49704364 761 return TRUE;
18529408 762 }
49704364 763 }
fc241834 764 symptr->patptr = patptr;
49704364 765 return FALSE;
18529408
IZ
766}
767
18529408 768/*
fc241834 769 There is no way to cleanly handle the case where we should process the
08ca2aa3 770 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
775 themselves if they need to do a lot of unpacks like this on it
776*/
fc241834 777STATIC bool
08ca2aa3
TH
778need_utf8(const char *pat, const char *patend)
779{
780 bool first = TRUE;
7918f24d
NC
781
782 PERL_ARGS_ASSERT_NEED_UTF8;
783
08ca2aa3
TH
784 while (pat < patend) {
785 if (pat[0] == '#') {
786 pat++;
f7fe979e 787 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
792 pat++;
793 }
794 return FALSE;
795}
796
797STATIC char
798first_symbol(const char *pat, const char *patend) {
7918f24d
NC
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
800
08ca2aa3
TH
801 while (pat < patend) {
802 if (pat[0] != '#') return pat[0];
803 pat++;
f7fe979e 804 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
805 if (!pat) return 0;
806 pat++;
807 }
808 return 0;
809}
810
811/*
dcccc8ff
KW
812
813=head1 Pack and Unpack
814
7accc089
JH
815=for apidoc unpackstring
816
21ebfc7a
DM
817The engine implementing the unpack() Perl function.
818
819Using the template pat..patend, this function unpacks the string
820s..strend into a number of mortal SVs, which it pushes onto the perl
821argument (@_) stack (so you will need to issue a C<PUTBACK> before and
72d33970 822C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
823pushed elements.
824
825The strend and patend pointers should point to the byte following the last
826character of each string.
827
828Although this function returns its values on the perl argument stack, it
829doesn't take any parameters from that stack (and thus in particular
830there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
831example).
7accc089
JH
832
833=cut */
834
835I32
f7fe979e 836Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 837{
f7fe979e 838 tempsym_t sym;
08ca2aa3 839
7918f24d
NC
840 PERL_ARGS_ASSERT_UNPACKSTRING;
841
f337b084 842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
230e1fce 847 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
848 SAVEFREEPV(s);
849 strend = s + len;
f337b084 850 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
851 }
852
f337b084
TH
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
08ca2aa3 855
f7fe979e 856 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
857
858 return unpack_rec(&sym, s, s, strend, NULL );
859}
860
4136a0f7 861STATIC I32
f7fe979e 862S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 863{
20b7effb 864 dSP;
3297d27d 865 SV *sv = NULL;
f7fe979e 866 const I32 start_sp_offset = SP - PL_stack_base;
49704364 867 howlen_t howlen;
a6ec74c1 868 I32 checksum = 0;
92d41999 869 UV cuv = 0;
a6ec74c1 870 NV cdouble = 0.0;
f337b084 871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 872 bool beyond = FALSE;
21c16052 873 bool explicit_length;
9e27e96a 874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
876
877 PERL_ARGS_ASSERT_UNPACK_REC;
878
28be1210 879 symptr->strbeg = s - strbeg;
49704364 880
49704364 881 while (next_symbol(symptr)) {
a7a3cfaa 882 packprops_t props;
9e27e96a 883 I32 len;
f337b084 884 I32 datumtype = symptr->code;
a1219b5e 885 bool needs_swap;
206947d2 886 /* do first one only unless in list context
08ca2aa3 887 / is implemented by unpacking the count, then popping it from the
206947d2 888 stack, so must check that we're not in the middle of a / */
49704364 889 if ( unpack_only_one
206947d2 890 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 891 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 892 break;
49704364 893
f337b084 894 switch (howlen = symptr->howlen) {
fc241834
RGS
895 case e_star:
896 len = strend - strbeg; /* long enough */
49704364 897 break;
f337b084
TH
898 default:
899 /* e_no_len and e_number */
900 len = symptr->length;
901 break;
49704364 902 }
18529408 903
21c16052 904 explicit_length = TRUE;
a6ec74c1 905 redo_switch:
49704364 906 beyond = s >= strend;
a7a3cfaa
TH
907
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
909 if (props) {
910 /* props nonzero means we can process this letter. */
9e27e96a
AL
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
a7a3cfaa
TH
913 if (len > howmany)
914 len = howmany;
915
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
918 EXTEND(SP, len);
919 EXTEND_MORTAL(len);
78d46eaa
NC
920 }
921 }
a7a3cfaa 922
a1219b5e
NC
923 needs_swap = NEEDS_SWAP(datumtype);
924
1109a392 925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 926 default:
1109a392 927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 928
a6ec74c1 929 case '%':
49704364 930 if (howlen == e_no_len)
18529408 931 len = 16; /* len is not specified */
a6ec74c1 932 checksum = len;
92d41999 933 cuv = 0;
a6ec74c1 934 cdouble = 0;
18529408 935 continue;
81d52ecd 936
18529408
IZ
937 case '(':
938 {
49704364 939 tempsym_t savsym = *symptr;
9e27e96a 940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 941 symptr->flags |= group_modifiers;
49704364 942 symptr->patend = savsym.grpend;
28be1210 943 symptr->previous = &savsym;
49704364 944 symptr->level++;
18529408 945 PUTBACK;
c6f750d1 946 if (len && unpack_only_one) len = 1;
18529408 947 while (len--) {
49704364 948 symptr->patptr = savsym.grpbeg;
f337b084
TH
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
49704364 953 break; /* No way to continue */
18529408
IZ
954 }
955 SPAGAIN;
28be1210 956 savsym.flags = symptr->flags & ~group_modifiers;
49704364 957 *symptr = savsym;
18529408
IZ
958 break;
959 }
28be1210 960 case '.' | TYPE_IS_SHRIEKING:
28be1210 961 case '.': {
9e27e96a 962 const char *from;
28be1210 963 SV *sv;
9e27e96a 964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
967 else {
968 tempsym_t *group = symptr;
969
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
972 }
973 sv = from <= s ?
00646304
CB
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 976 mXPUSHs(sv);
28be1210
TH
977 break;
978 }
28be1210 979 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 980 case '@':
28be1210 981 s = strbeg + symptr->strbeg;
28be1210 982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 983 {
08ca2aa3
TH
984 while (len > 0) {
985 if (s >= strend)
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
987 s += UTF8SKIP(s);
988 len--;
989 }
990 if (s > strend)
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 } else {
28be1210 993 if (strend-s < len)
fc241834 994 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 995 s += len;
08ca2aa3 996 }
a6ec74c1 997 break;
62f95557
IZ
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1000 len = 1;
08ca2aa3 1001 if (utf8) {
f7fe979e 1002 const char *hop, *last;
f337b084
TH
1003 I32 l = len;
1004 hop = last = strbeg;
1005 while (hop < s) {
1006 hop += UTF8SKIP(hop);
1007 if (--l == 0) {
08ca2aa3 1008 last = hop;
f337b084
TH
1009 l = len;
1010 }
fc241834 1011 }
f337b084
TH
1012 if (last > s)
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1014 s = last;
1015 break;
f337b084
TH
1016 }
1017 len = (s - strbeg) % len;
924ba076 1018 /* FALLTHROUGH */
a6ec74c1 1019 case 'X':
08ca2aa3
TH
1020 if (utf8) {
1021 while (len > 0) {
1022 if (s <= strbeg)
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1025 if (s <= strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1027 }
1028 len--;
1029 }
1030 } else {
fc241834
RGS
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033 s -= len;
08ca2aa3 1034 }
a6ec74c1 1035 break;
9e27e96a
AL
1036 case 'x' | TYPE_IS_SHRIEKING: {
1037 I32 ai32;
62f95557
IZ
1038 if (!len) /* Avoid division by 0 */
1039 len = 1;
230e1fce
NC
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1042 if (ai32 == 0) break;
1043 len -= ai32;
9e27e96a 1044 }
924ba076 1045 /* FALLTHROUGH */
a6ec74c1 1046 case 'x':
08ca2aa3
TH
1047 if (utf8) {
1048 while (len>0) {
1049 if (s >= strend)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += UTF8SKIP(s);
1052 len--;
1053 }
1054 } else {
fc241834
RGS
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1057 s += len;
f337b084 1058 }
a6ec74c1
JH
1059 break;
1060 case '/':
49704364 1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1062
a6ec74c1
JH
1063 case 'A':
1064 case 'Z':
1065 case 'a':
08ca2aa3
TH
1066 if (checksum) {
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1069 goto W_checksum;
1070 }
1071 if (utf8) {
1072 I32 l;
f7fe979e 1073 const char *hop;
08ca2aa3
TH
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1076 if (hop > strend)
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 break;
fc241834 1079 }
a6ec74c1 1080 }
08ca2aa3
TH
1081 if (hop > strend)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 len = hop - s;
1084 } else if (len > strend - s)
1085 len = strend - s;
1086
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
f7fe979e 1089 const char *ptr, *end;
f337b084
TH
1090 end = s + len;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
f7fe979e 1097 const char *ptr;
18bdf90a
TH
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1101 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 else ptr++;
28be1210 1104 if (ptr > s+len)
18bdf90a
TH
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1106 } else {
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 ptr++;
1110 }
08ca2aa3
TH
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1113
1114 if (utf8) {
1115 SvUTF8_on(sv);
1116 /* Undo any upgrade done due to need_utf8() */
f337b084 1117 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1118 sv_utf8_downgrade(sv, 0);
a6ec74c1 1119 }
6e449a3a 1120 mXPUSHs(sv);
08ca2aa3 1121 s += len;
a6ec74c1
JH
1122 break;
1123 case 'B':
08ca2aa3
TH
1124 case 'b': {
1125 char *str;
49704364 1126 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1127 len = (strend - s) * 8;
1128 if (checksum) {
f337b084 1129 if (utf8)
08ca2aa3 1130 while (len >= 8 && s < strend) {
f337b084 1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1132 len -= 8;
1133 }
f337b084 1134 else
fc241834 1135 while (len >= 8) {
08ca2aa3 1136 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1137 len -= 8;
1138 }
08ca2aa3
TH
1139 if (len && s < strend) {
1140 U8 bits;
f337b084
TH
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
a6ec74c1 1143 while (len-- > 0) {
92d41999 1144 if (bits & 1) cuv++;
a6ec74c1
JH
1145 bits >>= 1;
1146 }
f337b084 1147 else
a6ec74c1 1148 while (len-- > 0) {
08ca2aa3 1149 if (bits & 0x80) cuv++;
a6ec74c1
JH
1150 bits <<= 1;
1151 }
fc241834 1152 }
a6ec74c1
JH
1153 break;
1154 }
08ca2aa3 1155
561b68a9 1156 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1157 SvPOK_on(sv);
1158 str = SvPVX(sv);
1159 if (datumtype == 'b') {
f337b084 1160 U8 bits = 0;
f7fe979e 1161 const I32 ai32 = len;
08ca2aa3
TH
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1164 else if (utf8) {
1165 if (s >= strend) break;
f337b084 1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1169 }
08ca2aa3 1170 } else {
f337b084 1171 U8 bits = 0;
f7fe979e 1172 const I32 ai32 = len;
08ca2aa3
TH
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1175 else if (utf8) {
1176 if (s >= strend) break;
f337b084 1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1180 }
1181 }
1182 *str = '\0';
aa07b2f6 1183 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1184 XPUSHs(sv);
a6ec74c1 1185 break;
08ca2aa3 1186 }
a6ec74c1 1187 case 'H':
08ca2aa3 1188 case 'h': {
3297d27d 1189 char *str = NULL;
fc241834 1190 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1191 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1192 len = (strend - s) * 2;
858fe5e1
TC
1193 if (!checksum) {
1194 sv = sv_2mortal(newSV(len ? len : 1));
1195 SvPOK_on(sv);
1196 str = SvPVX(sv);
1197 }
a6ec74c1 1198 if (datumtype == 'h') {
f337b084 1199 U8 bits = 0;
9e27e96a 1200 I32 ai32 = len;
fc241834
RGS
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1203 else if (utf8) {
1204 if (s >= strend) break;
f337b084 1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1206 } else bits = * (U8 *) s++;
858fe5e1
TC
1207 if (!checksum)
1208 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1209 }
08ca2aa3 1210 } else {
f337b084 1211 U8 bits = 0;
f7fe979e 1212 const I32 ai32 = len;
08ca2aa3
TH
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1215 else if (utf8) {
1216 if (s >= strend) break;
f337b084 1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1218 } else bits = *(U8 *) s++;
858fe5e1
TC
1219 if (!checksum)
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1221 }
1222 }
858fe5e1
TC
1223 if (!checksum) {
1224 *str = '\0';
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1226 XPUSHs(sv);
1227 }
a6ec74c1 1228 break;
08ca2aa3 1229 }
1651fc44
ML
1230 case 'C':
1231 if (len == 0) {
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1235 break;
1236 }
924ba076 1237 /* FALLTHROUGH */
a6ec74c1 1238 case 'c':
1651fc44
ML
1239 while (len-- > 0 && s < strend) {
1240 int aint;
1241 if (utf8)
1242 {
1243 STRLEN retlen;
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 }
1250 else
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1253 aint -= 256;
08ca2aa3 1254 if (!checksum)
6e449a3a 1255 mPUSHi(aint);
73cb7263
NC
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1258 else
1259 cuv += aint;
a6ec74c1
JH
1260 }
1261 break;
08ca2aa3
TH
1262 case 'W':
1263 W_checksum:
1651fc44 1264 if (utf8) {
08ca2aa3 1265 while (len-- > 0 && s < strend) {
08ca2aa3 1266 STRLEN retlen;
f7fe979e 1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 s += retlen;
1272 if (!checksum)
6e449a3a 1273 mPUSHu(val);
08ca2aa3
TH
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
d6d3e8bd 1276 else
08ca2aa3 1277 cuv += val;
fc241834 1278 }
08ca2aa3 1279 } else if (!checksum)
a6ec74c1 1280 while (len-- > 0) {
f7fe979e 1281 const U8 ch = *(U8 *) s++;
6e449a3a 1282 mPUSHu(ch);
a6ec74c1 1283 }
08ca2aa3
TH
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1286 else
1287 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1288 break;
1289 case 'U':
35bcd338 1290 if (len == 0) {
c5333953 1291 if (explicit_length && howlen != e_star) {
08ca2aa3 1292 /* Switch to "bytes in UTF-8" mode */
f337b084 1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1294 else
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1297 }
35bcd338
JH
1298 break;
1299 }
08ca2aa3 1300 if (len > strend - s) len = strend - s;
fc241834 1301 if (!checksum) {
08ca2aa3
TH
1302 if (len && unpack_only_one) len = 1;
1303 EXTEND(SP, len);
1304 EXTEND_MORTAL(len);
fc241834 1305 }
08ca2aa3
TH
1306 while (len-- > 0 && s < strend) {
1307 STRLEN retlen;
1308 UV auv;
1309 if (utf8) {
1310 U8 result[UTF8_MAXLEN];
f7fe979e 1311 const char *ptr = s;
08ca2aa3 1312 STRLEN len;
08ca2aa3
TH
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
230e1fce
NC
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 'U'))
08ca2aa3
TH
1317 break;
1318 len = UTF8SKIP(result);
fc241834 1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1320 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1321 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1322 len,
1323 &retlen,
1324 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1325 s = ptr;
1326 } else {
3ece276e
KW
1327 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1328 strend - s,
1329 &retlen,
1330 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1331 if (retlen == (STRLEN) -1 || retlen == 0)
1332 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1333 s += retlen;
1334 }
1335 if (!checksum)
6e449a3a 1336 mPUSHu(auv);
73cb7263 1337 else if (checksum > bits_in_uv)
08ca2aa3 1338 cdouble += (NV) auv;
73cb7263 1339 else
08ca2aa3 1340 cuv += auv;
a6ec74c1
JH
1341 }
1342 break;
49704364
LW
1343 case 's' | TYPE_IS_SHRIEKING:
1344#if SHORTSIZE != SIZE16
73cb7263 1345 while (len-- > 0) {
08ca2aa3 1346 short ashort;
aaec8192 1347 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1348 if (!checksum)
6e449a3a 1349 mPUSHi(ashort);
73cb7263
NC
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ashort;
1352 else
1353 cuv += ashort;
49704364
LW
1354 }
1355 break;
1356#else
924ba076 1357 /* FALLTHROUGH */
a6ec74c1 1358#endif
49704364 1359 case 's':
73cb7263 1360 while (len-- > 0) {
08ca2aa3
TH
1361 I16 ai16;
1362
1363#if U16SIZE > SIZE16
1364 ai16 = 0;
1365#endif
aaec8192 1366 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1367#if U16SIZE > SIZE16
73cb7263
NC
1368 if (ai16 > 32767)
1369 ai16 -= 65536;
a6ec74c1 1370#endif
08ca2aa3 1371 if (!checksum)
6e449a3a 1372 mPUSHi(ai16);
73cb7263
NC
1373 else if (checksum > bits_in_uv)
1374 cdouble += (NV)ai16;
1375 else
1376 cuv += ai16;
a6ec74c1
JH
1377 }
1378 break;
49704364
LW
1379 case 'S' | TYPE_IS_SHRIEKING:
1380#if SHORTSIZE != SIZE16
73cb7263 1381 while (len-- > 0) {
08ca2aa3 1382 unsigned short aushort;
aaec8192
NC
1383 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1384 needs_swap);
08ca2aa3 1385 if (!checksum)
6e449a3a 1386 mPUSHu(aushort);
73cb7263
NC
1387 else if (checksum > bits_in_uv)
1388 cdouble += (NV)aushort;
1389 else
1390 cuv += aushort;
49704364
LW
1391 }
1392 break;
1393#else
924ba076 1394 /* FALLTHROUGH */
49704364 1395#endif
a6ec74c1
JH
1396 case 'v':
1397 case 'n':
1398 case 'S':
73cb7263 1399 while (len-- > 0) {
08ca2aa3
TH
1400 U16 au16;
1401#if U16SIZE > SIZE16
1402 au16 = 0;
1403#endif
aaec8192 1404 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1405 if (datumtype == 'n')
1406 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1407 if (datumtype == 'v')
1408 au16 = vtohs(au16);
08ca2aa3 1409 if (!checksum)
6e449a3a 1410 mPUSHu(au16);
73cb7263 1411 else if (checksum > bits_in_uv)
f337b084 1412 cdouble += (NV) au16;
73cb7263
NC
1413 else
1414 cuv += au16;
a6ec74c1
JH
1415 }
1416 break;
068bd2e7
MHM
1417 case 'v' | TYPE_IS_SHRIEKING:
1418 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1419 while (len-- > 0) {
08ca2aa3
TH
1420 I16 ai16;
1421# if U16SIZE > SIZE16
1422 ai16 = 0;
1423# endif
aaec8192 1424 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1425 /* There should never be any byte-swapping here. */
1426 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1427 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1428 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1429 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1430 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1431 if (!checksum)
6e449a3a 1432 mPUSHi(ai16);
73cb7263 1433 else if (checksum > bits_in_uv)
08ca2aa3 1434 cdouble += (NV) ai16;
73cb7263
NC
1435 else
1436 cuv += ai16;
068bd2e7
MHM
1437 }
1438 break;
a6ec74c1 1439 case 'i':
49704364 1440 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1441 while (len-- > 0) {
08ca2aa3 1442 int aint;
aaec8192 1443 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1444 if (!checksum)
6e449a3a 1445 mPUSHi(aint);
73cb7263
NC
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)aint;
1448 else
1449 cuv += aint;
a6ec74c1
JH
1450 }
1451 break;
1452 case 'I':
49704364 1453 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1454 while (len-- > 0) {
08ca2aa3 1455 unsigned int auint;
aaec8192 1456 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1457 if (!checksum)
6e449a3a 1458 mPUSHu(auint);
73cb7263
NC
1459 else if (checksum > bits_in_uv)
1460 cdouble += (NV)auint;
1461 else
1462 cuv += auint;
a6ec74c1
JH
1463 }
1464 break;
92d41999 1465 case 'j':
73cb7263 1466 while (len-- > 0) {
08ca2aa3 1467 IV aiv;
aaec8192 1468 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1469 if (!checksum)
6e449a3a 1470 mPUSHi(aiv);
73cb7263
NC
1471 else if (checksum > bits_in_uv)
1472 cdouble += (NV)aiv;
1473 else
1474 cuv += aiv;
92d41999
JH
1475 }
1476 break;
1477 case 'J':
73cb7263 1478 while (len-- > 0) {
08ca2aa3 1479 UV auv;
aaec8192 1480 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1481 if (!checksum)
6e449a3a 1482 mPUSHu(auv);
73cb7263
NC
1483 else if (checksum > bits_in_uv)
1484 cdouble += (NV)auv;
1485 else
1486 cuv += auv;
92d41999
JH
1487 }
1488 break;
49704364
LW
1489 case 'l' | TYPE_IS_SHRIEKING:
1490#if LONGSIZE != SIZE32
73cb7263 1491 while (len-- > 0) {
08ca2aa3 1492 long along;
aaec8192 1493 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1494 if (!checksum)
6e449a3a 1495 mPUSHi(along);
73cb7263
NC
1496 else if (checksum > bits_in_uv)
1497 cdouble += (NV)along;
1498 else
1499 cuv += along;
49704364
LW
1500 }
1501 break;
1502#else
924ba076 1503 /* FALLTHROUGH */
a6ec74c1 1504#endif
49704364 1505 case 'l':
73cb7263 1506 while (len-- > 0) {
08ca2aa3
TH
1507 I32 ai32;
1508#if U32SIZE > SIZE32
1509 ai32 = 0;
1510#endif
aaec8192 1511 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1512#if U32SIZE > SIZE32
08ca2aa3 1513 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1514#endif
08ca2aa3 1515 if (!checksum)
6e449a3a 1516 mPUSHi(ai32);
73cb7263
NC
1517 else if (checksum > bits_in_uv)
1518 cdouble += (NV)ai32;
1519 else
1520 cuv += ai32;
a6ec74c1
JH
1521 }
1522 break;
49704364
LW
1523 case 'L' | TYPE_IS_SHRIEKING:
1524#if LONGSIZE != SIZE32
73cb7263 1525 while (len-- > 0) {
08ca2aa3 1526 unsigned long aulong;
aaec8192 1527 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1528 if (!checksum)
6e449a3a 1529 mPUSHu(aulong);
73cb7263
NC
1530 else if (checksum > bits_in_uv)
1531 cdouble += (NV)aulong;
1532 else
1533 cuv += aulong;
49704364
LW
1534 }
1535 break;
1536#else
924ba076 1537 /* FALLTHROUGH */
49704364 1538#endif
a6ec74c1
JH
1539 case 'V':
1540 case 'N':
1541 case 'L':
73cb7263 1542 while (len-- > 0) {
08ca2aa3
TH
1543 U32 au32;
1544#if U32SIZE > SIZE32
1545 au32 = 0;
1546#endif
aaec8192 1547 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1548 if (datumtype == 'N')
1549 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1550 if (datumtype == 'V')
1551 au32 = vtohl(au32);
08ca2aa3 1552 if (!checksum)
6e449a3a 1553 mPUSHu(au32);
fc241834
RGS
1554 else if (checksum > bits_in_uv)
1555 cdouble += (NV)au32;
1556 else
1557 cuv += au32;
a6ec74c1
JH
1558 }
1559 break;
068bd2e7
MHM
1560 case 'V' | TYPE_IS_SHRIEKING:
1561 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1562 while (len-- > 0) {
08ca2aa3 1563 I32 ai32;
f8e5a5db 1564#if U32SIZE > SIZE32
08ca2aa3 1565 ai32 = 0;
f8e5a5db 1566#endif
aaec8192 1567 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1568 /* There should never be any byte swapping here. */
1569 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1570 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1572 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1573 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1574 if (!checksum)
6e449a3a 1575 mPUSHi(ai32);
73cb7263
NC
1576 else if (checksum > bits_in_uv)
1577 cdouble += (NV)ai32;
1578 else
1579 cuv += ai32;
068bd2e7
MHM
1580 }
1581 break;
a6ec74c1 1582 case 'p':
a6ec74c1 1583 while (len-- > 0) {
f7fe979e 1584 const char *aptr;
aaec8192 1585 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1586 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1587 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1588 }
1589 break;
1590 case 'w':
a6ec74c1
JH
1591 {
1592 UV auv = 0;
1593 U32 bytes = 0;
fc241834 1594
08ca2aa3
TH
1595 while (len > 0 && s < strend) {
1596 U8 ch;
f337b084 1597 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1598 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1599 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1600 if (ch < 0x80) {
a6ec74c1 1601 bytes = 0;
6e449a3a 1602 mPUSHu(auv);
a6ec74c1
JH
1603 len--;
1604 auv = 0;
08ca2aa3 1605 continue;
a6ec74c1 1606 }
08ca2aa3 1607 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1608 const char *t;
a6ec74c1 1609
f5992bc4 1610 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1611 while (s < strend) {
f337b084 1612 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1613 sv = mul128(sv, (U8)(ch & 0x7f));
1614 if (!(ch & 0x80)) {
a6ec74c1
JH
1615 bytes = 0;
1616 break;
1617 }
1618 }
10516c54 1619 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1620 while (*t == '0')
1621 t++;
1622 sv_chop(sv, t);
6e449a3a 1623 mPUSHs(sv);
a6ec74c1
JH
1624 len--;
1625 auv = 0;
1626 }
1627 }
1628 if ((s >= strend) && bytes)
49704364 1629 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1630 }
1631 break;
1632 case 'P':
49704364
LW
1633 if (symptr->howlen == e_star)
1634 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1635 EXTEND(SP, 1);
2d3e0934 1636 if (s + sizeof(char*) <= strend) {
08ca2aa3 1637 char *aptr;
aaec8192 1638 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1639 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1640 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1641 }
a6ec74c1 1642 break;
c174bf3b 1643#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1644 case 'q':
73cb7263 1645 while (len-- > 0) {
08ca2aa3 1646 Quad_t aquad;
aaec8192 1647 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1648 if (!checksum)
c174bf3b 1649 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aquad;
1652 else
1653 cuv += aquad;
1654 }
a6ec74c1
JH
1655 break;
1656 case 'Q':
73cb7263 1657 while (len-- > 0) {
08ca2aa3 1658 Uquad_t auquad;
aaec8192 1659 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1660 if (!checksum)
c174bf3b 1661 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)auquad;
1664 else
1665 cuv += auquad;
a6ec74c1
JH
1666 }
1667 break;
1640b983 1668#endif
a6ec74c1
JH
1669 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1670 case 'f':
73cb7263 1671 while (len-- > 0) {
08ca2aa3 1672 float afloat;
aaec8192 1673 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1674 if (!checksum)
6e449a3a 1675 mPUSHn(afloat);
08ca2aa3 1676 else
73cb7263 1677 cdouble += afloat;
fc241834 1678 }
a6ec74c1
JH
1679 break;
1680 case 'd':
73cb7263 1681 while (len-- > 0) {
08ca2aa3 1682 double adouble;
aaec8192 1683 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1684 if (!checksum)
6e449a3a 1685 mPUSHn(adouble);
08ca2aa3 1686 else
73cb7263 1687 cdouble += adouble;
fc241834 1688 }
a6ec74c1 1689 break;
92d41999 1690 case 'F':
73cb7263 1691 while (len-- > 0) {
275663fa 1692 NV_bytes anv;
aaec8192
NC
1693 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1694 datumtype, needs_swap);
08ca2aa3 1695 if (!checksum)
275663fa 1696 mPUSHn(anv.nv);
08ca2aa3 1697 else
275663fa 1698 cdouble += anv.nv;
fc241834 1699 }
92d41999
JH
1700 break;
1701#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1702 case 'D':
73cb7263 1703 while (len-- > 0) {
275663fa 1704 ld_bytes aldouble;
aaec8192
NC
1705 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1706 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1707 /* The most common long double format, the x86 80-bit
1708 * extended precision, has either 2 or 6 unused bytes,
1709 * which may contain garbage, which may contain
1710 * unintentional data. While we do zero the bytes of
1711 * the long double data in pack(), here in unpack() we
1712 * don't, because it's really hard to envision that
1713 * reading the long double off aldouble would be
e075ae47 1714 * affected by the unused bytes.
42262fd3
JH
1715 *
1716 * Note that trying to unpack 'long doubles' of 'long
1717 * doubles' packed in another system is in the general
1718 * case doomed without having more detail. */
08ca2aa3 1719 if (!checksum)
275663fa 1720 mPUSHn(aldouble.ld);
08ca2aa3 1721 else
275663fa 1722 cdouble += aldouble.ld;
92d41999
JH
1723 }
1724 break;
1725#endif
a6ec74c1 1726 case 'u':
858fe5e1 1727 if (!checksum) {
f7fe979e 1728 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1729 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1730 if (l) SvPOK_on(sv);
1731 }
1732 if (utf8) {
1733 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1734 I32 a, b, c, d;
db187877 1735 char hunk[3];
08ca2aa3 1736
08ca2aa3
TH
1737 while (len > 0) {
1738 next_uni_uu(aTHX_ &s, strend, &a);
1739 next_uni_uu(aTHX_ &s, strend, &b);
1740 next_uni_uu(aTHX_ &s, strend, &c);
1741 next_uni_uu(aTHX_ &s, strend, &d);
1742 hunk[0] = (char)((a << 2) | (b >> 4));
1743 hunk[1] = (char)((b << 4) | (c >> 2));
1744 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1745 if (!checksum)
1746 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1747 len -= 3;
1748 }
1749 if (s < strend) {
f7fe979e
AL
1750 if (*s == '\n') {
1751 s++;
1752 }
08ca2aa3
TH
1753 else {
1754 /* possible checksum byte */
f7fe979e
AL
1755 const char *skip = s+UTF8SKIP(s);
1756 if (skip < strend && *skip == '\n')
1757 s = skip+1;
08ca2aa3
TH
1758 }
1759 }
1760 }
1761 } else {
fc241834
RGS
1762 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1763 I32 a, b, c, d;
db187877 1764 char hunk[3];
a6ec74c1 1765
fc241834
RGS
1766 len = PL_uudmap[*(U8*)s++] & 077;
1767 while (len > 0) {
1768 if (s < strend && ISUUCHAR(*s))
1769 a = PL_uudmap[*(U8*)s++] & 077;
1770 else
1771 a = 0;
1772 if (s < strend && ISUUCHAR(*s))
1773 b = PL_uudmap[*(U8*)s++] & 077;
1774 else
1775 b = 0;
1776 if (s < strend && ISUUCHAR(*s))
1777 c = PL_uudmap[*(U8*)s++] & 077;
1778 else
1779 c = 0;
1780 if (s < strend && ISUUCHAR(*s))
1781 d = PL_uudmap[*(U8*)s++] & 077;
1782 else
1783 d = 0;
1784 hunk[0] = (char)((a << 2) | (b >> 4));
1785 hunk[1] = (char)((b << 4) | (c >> 2));
1786 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1787 if (!checksum)
1788 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1789 len -= 3;
1790 }
1791 if (*s == '\n')
1792 s++;
1793 else /* possible checksum byte */
1794 if (s + 1 < strend && s[1] == '\n')
1795 s += 2;
a6ec74c1 1796 }
08ca2aa3 1797 }
858fe5e1
TC
1798 if (!checksum)
1799 XPUSHs(sv);
a6ec74c1 1800 break;
99f862a1 1801 } /* End of switch */
49704364 1802
a6ec74c1 1803 if (checksum) {
1109a392 1804 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1805 (checksum > bits_in_uv &&
08ca2aa3
TH
1806 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1807 NV trouble, anv;
a6ec74c1 1808
08ca2aa3 1809 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1810 while (checksum >= 16) {
1811 checksum -= 16;
08ca2aa3 1812 anv *= 65536.0;
a6ec74c1 1813 }
a6ec74c1 1814 while (cdouble < 0.0)
08ca2aa3
TH
1815 cdouble += anv;
1816 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1817 sv = newSVnv(cdouble);
a6ec74c1
JH
1818 }
1819 else {
fa8ec7c1
NC
1820 if (checksum < bits_in_uv) {
1821 UV mask = ((UV)1 << checksum) - 1;
92d41999 1822 cuv &= mask;
a6ec74c1 1823 }
c4c5f44a 1824 sv = newSVuv(cuv);
a6ec74c1 1825 }
6e449a3a 1826 mXPUSHs(sv);
a6ec74c1
JH
1827 checksum = 0;
1828 }
fc241834 1829
49704364
LW
1830 if (symptr->flags & FLAG_SLASH){
1831 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1832 break;
49704364
LW
1833 if( next_symbol(symptr) ){
1834 if( symptr->howlen == e_number )
1835 Perl_croak(aTHX_ "Count after length/code in unpack" );
1836 if( beyond ){
1837 /* ...end of char buffer then no decent length available */
1838 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1839 } else {
1840 /* take top of stack (hope it's numeric) */
1841 len = POPi;
1842 if( len < 0 )
1843 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1844 }
1845 } else {
1846 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1847 }
1848 datumtype = symptr->code;
21c16052 1849 explicit_length = FALSE;
49704364
LW
1850 goto redo_switch;
1851 }
a6ec74c1 1852 }
49704364 1853
18529408
IZ
1854 if (new_s)
1855 *new_s = s;
1856 PUTBACK;
1857 return SP - PL_stack_base - start_sp_offset;
1858}
1859
1860PP(pp_unpack)
1861{
1862 dSP;
bab9c0ac 1863 dPOPPOPssrl;
18529408
IZ
1864 I32 gimme = GIMME_V;
1865 STRLEN llen;
1866 STRLEN rlen;
5c144d81
NC
1867 const char *pat = SvPV_const(left, llen);
1868 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1869 const char *strend = s + rlen;
1870 const char *patend = pat + llen;
08ca2aa3 1871 I32 cnt;
18529408
IZ
1872
1873 PUTBACK;
7accc089 1874 cnt = unpackstring(pat, patend, s, strend,
49704364 1875 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1876 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1877
18529408
IZ
1878 SPAGAIN;
1879 if ( !cnt && gimme == G_SCALAR )
1880 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1881 RETURN;
1882}
1883
f337b084 1884STATIC U8 *
e68aed92 1885doencodes(U8 *h, const U8 *s, I32 len)
a6ec74c1 1886{
f337b084 1887 *h++ = PL_uuemap[len];
a6ec74c1 1888 while (len > 2) {
f337b084
TH
1889 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1890 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1891 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1892 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1893 s += 3;
1894 len -= 3;
1895 }
1896 if (len > 0) {
e68aed92 1897 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1898 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1899 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1900 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1901 *h++ = PL_uuemap[0];
a6ec74c1 1902 }
f337b084
TH
1903 *h++ = '\n';
1904 return h;
a6ec74c1
JH
1905}
1906
1907STATIC SV *
f7fe979e 1908S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1909{
8b6e33c7
AL
1910 SV *result = newSVpvn(s, l);
1911 char *const result_c = SvPV_nolen(result); /* convenience */
1912 char *out = result_c;
1913 bool skip = 1;
1914 bool ignore = 0;
a6ec74c1 1915
7918f24d
NC
1916 PERL_ARGS_ASSERT_IS_AN_INT;
1917
a6ec74c1
JH
1918 while (*s) {
1919 switch (*s) {
1920 case ' ':
1921 break;
1922 case '+':
1923 if (!skip) {
1924 SvREFCNT_dec(result);
1925 return (NULL);
1926 }
1927 break;
1928 case '0':
1929 case '1':
1930 case '2':
1931 case '3':
1932 case '4':
1933 case '5':
1934 case '6':
1935 case '7':
1936 case '8':
1937 case '9':
1938 skip = 0;
1939 if (!ignore) {
1940 *(out++) = *s;
1941 }
1942 break;
1943 case '.':
1944 ignore = 1;
1945 break;
1946 default:
1947 SvREFCNT_dec(result);
1948 return (NULL);
1949 }
1950 s++;
1951 }
1952 *(out++) = '\0';
1953 SvCUR_set(result, out - result_c);
1954 return (result);
1955}
1956
1957/* pnum must be '\0' terminated */
1958STATIC int
1959S_div128(pTHX_ SV *pnum, bool *done)
1960{
8b6e33c7
AL
1961 STRLEN len;
1962 char * const s = SvPV(pnum, len);
1963 char *t = s;
1964 int m = 0;
1965
7918f24d
NC
1966 PERL_ARGS_ASSERT_DIV128;
1967
8b6e33c7
AL
1968 *done = 1;
1969 while (*t) {
1970 const int i = m * 10 + (*t - '0');
1971 const int r = (i >> 7); /* r < 10 */
1972 m = i & 0x7F;
1973 if (r) {
1974 *done = 0;
1975 }
1976 *(t++) = '0' + r;
a6ec74c1 1977 }
8b6e33c7
AL
1978 *(t++) = '\0';
1979 SvCUR_set(pnum, (STRLEN) (t - s));
1980 return (m);
a6ec74c1
JH
1981}
1982
18529408 1983/*
7accc089
JH
1984=for apidoc packlist
1985
1986The engine implementing pack() Perl function.
1987
bfce84ec
AL
1988=cut
1989*/
7accc089
JH
1990
1991void
5aaab254 1992Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1993{
aadb217d
JH
1994 tempsym_t sym;
1995
7918f24d
NC
1996 PERL_ARGS_ASSERT_PACKLIST;
1997
f7fe979e 1998 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1999
f337b084
TH
2000 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2001 Also make sure any UTF8 flag is loaded */
56eb0262 2002 SvPV_force_nolen(cat);
bfce84ec
AL
2003 if (DO_UTF8(cat))
2004 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2005
49704364
LW
2006 (void)pack_rec( cat, &sym, beglist, endlist );
2007}
2008
f337b084
TH
2009/* like sv_utf8_upgrade, but also repoint the group start markers */
2010STATIC void
2011marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2012 STRLEN len;
2013 tempsym_t *group;
f7fe979e
AL
2014 const char *from_ptr, *from_start, *from_end, **marks, **m;
2015 char *to_start, *to_ptr;
f337b084
TH
2016
2017 if (SvUTF8(sv)) return;
2018
aa07b2f6 2019 from_start = SvPVX_const(sv);
f337b084
TH
2020 from_end = from_start + SvCUR(sv);
2021 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2022 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2023 if (from_ptr == from_end) {
2024 /* Simple case: no character needs to be changed */
2025 SvUTF8_on(sv);
2026 return;
2027 }
2028
3473cf63 2029 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2030 Newx(to_start, len, char);
f337b084
TH
2031 Copy(from_start, to_start, from_ptr-from_start, char);
2032 to_ptr = to_start + (from_ptr-from_start);
2033
a02a5408 2034 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2035 for (group=sym_ptr; group; group = group->previous)
2036 marks[group->level] = from_start + group->strbeg;
2037 marks[sym_ptr->level+1] = from_end+1;
2038 for (m = marks; *m < from_ptr; m++)
2039 *m = to_start + (*m-from_start);
2040
2041 for (;from_ptr < from_end; from_ptr++) {
2042 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2043 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2044 }
2045 *to_ptr = 0;
2046
2047 while (*m == from_ptr) *m++ = to_ptr;
2048 if (m != marks + sym_ptr->level+1) {
2049 Safefree(marks);
2050 Safefree(to_start);
5637ef5b
NC
2051 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2052 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2053 }
2054 for (group=sym_ptr; group; group = group->previous)
2055 group->strbeg = marks[group->level] - to_start;
2056 Safefree(marks);
2057
2058 if (SvOOK(sv)) {
2059 if (SvIVX(sv)) {
b162af07 2060 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2061 from_start -= SvIVX(sv);
2062 SvIV_set(sv, 0);
2063 }
2064 SvFLAGS(sv) &= ~SVf_OOK;
2065 }
2066 if (SvLEN(sv) != 0)
2067 Safefree(from_start);
f880fe2f 2068 SvPV_set(sv, to_start);
b162af07
SP
2069 SvCUR_set(sv, to_ptr - to_start);
2070 SvLEN_set(sv, len);
f337b084
TH
2071 SvUTF8_on(sv);
2072}
2073
2074/* Exponential string grower. Makes string extension effectively O(n)
2075 needed says how many extra bytes we need (not counting the final '\0')
2076 Only grows the string if there is an actual lack of space
2077*/
2078STATIC char *
0bd48802 2079S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2080 const STRLEN cur = SvCUR(sv);
2081 const STRLEN len = SvLEN(sv);
f337b084 2082 STRLEN extend;
7918f24d
NC
2083
2084 PERL_ARGS_ASSERT_SV_EXP_GROW;
2085
f337b084
TH
2086 if (len - cur > needed) return SvPVX(sv);
2087 extend = needed > len ? needed : len;
2088 return SvGROW(sv, len+extend+1);
2089}
49704364 2090
93f6e112 2091static SV *
b197e565 2092S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2093{
2094 SvGETMAGIC(sv);
93f6e112
FC
2095 if (UNLIKELY(SvAMAGIC(sv)))
2096 sv = sv_2num(sv);
99f450cc 2097 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2098 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2099 const NV nv = SvNV_nomg(sv);
2100 if (c == 'w')
2101 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2102 else
2103 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2104 }
93f6e112 2105 return sv;
354b74ae
FC
2106}
2107
93f6e112
FC
2108#define SvIV_no_inf(sv,d) \
2109 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2110#define SvUV_no_inf(sv,d) \
2111 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2112
49704364
LW
2113STATIC
2114SV **
f337b084 2115S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2116{
49704364 2117 tempsym_t lookahead;
f337b084
TH
2118 I32 items = endlist - beglist;
2119 bool found = next_symbol(symptr);
2120 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2121 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2122 char* from;
f337b084 2123
7918f24d
NC
2124 PERL_ARGS_ASSERT_PACK_REC;
2125
f337b084
TH
2126 if (symptr->level == 0 && found && symptr->code == 'U') {
2127 marked_upgrade(aTHX_ cat, symptr);
2128 symptr->flags |= FLAG_DO_UTF8;
2129 utf8 = 0;
49704364 2130 }
f337b084 2131 symptr->strbeg = SvCUR(cat);
49704364
LW
2132
2133 while (found) {
f337b084
TH
2134 SV *fromstr;
2135 STRLEN fromlen;
2136 I32 len;
a0714e2c 2137 SV *lengthcode = NULL;
49704364 2138 I32 datumtype = symptr->code;
f337b084
TH
2139 howlen_t howlen = symptr->howlen;
2140 char *start = SvPVX(cat);
2141 char *cur = start + SvCUR(cat);
a1219b5e 2142 bool needs_swap;
49704364 2143
f337b084 2144#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
0c7df902 2145#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2146
2147 switch (howlen) {
fc241834 2148 case e_star:
f337b084
TH
2149 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2150 0 : items;
2151 break;
2152 default:
2153 /* e_no_len and e_number */
2154 len = symptr->length;
49704364
LW
2155 break;
2156 }
2157
f337b084 2158 if (len) {
a7a3cfaa 2159 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2160
a7a3cfaa
TH
2161 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2162 /* We can process this letter. */
2163 STRLEN size = props & PACK_SIZE_MASK;
2164 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2165 }
f337b084
TH
2166 }
2167
49704364
LW
2168 /* Look ahead for next symbol. Do we have code/code? */
2169 lookahead = *symptr;
2170 found = next_symbol(&lookahead);
246f24af
TH
2171 if (symptr->flags & FLAG_SLASH) {
2172 IV count;
f337b084 2173 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2174 if (strchr("aAZ", lookahead.code)) {
2175 if (lookahead.howlen == e_number) count = lookahead.length;
2176 else {
ce399ba6 2177 if (items > 0) {
48a5da33 2178 count = sv_len_utf8(*beglist);
ce399ba6 2179 }
246f24af
TH
2180 else count = 0;
2181 if (lookahead.code == 'Z') count++;
2182 }
2183 } else {
2184 if (lookahead.howlen == e_number && lookahead.length < items)
2185 count = lookahead.length;
2186 else count = items;
2187 }
2188 lookahead.howlen = e_number;
2189 lookahead.length = count;
2190 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2191 }
49704364 2192
a1219b5e
NC
2193 needs_swap = NEEDS_SWAP(datumtype);
2194
fc241834
RGS
2195 /* Code inside the switch must take care to properly update
2196 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2197 doesn't simply leave using break */
0c7df902 2198 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2199 default:
f337b084
TH
2200 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2201 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2202 case '%':
49704364 2203 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2204
28be1210 2205 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2206 case '.':
2207 if (howlen == e_star) from = start;
2208 else if (len == 0) from = cur;
2209 else {
2210 tempsym_t *group = symptr;
2211
2212 while (--len && group) group = group->previous;
2213 from = group ? start + group->strbeg : start;
2214 }
2215 fromstr = NEXTFROM;
354b74ae 2216 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2217 goto resize;
28be1210 2218 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2219 case '@':
28be1210
TH
2220 from = start + symptr->strbeg;
2221 resize:
28be1210 2222 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2223 if (len >= 0) {
2224 while (len && from < cur) {
2225 from += UTF8SKIP(from);
2226 len--;
2227 }
2228 if (from > cur)
2229 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2230 if (len) {
2231 /* Here we know from == cur */
2232 grow:
2233 GROWING(0, cat, start, cur, len);
2234 Zero(cur, len, char);
2235 cur += len;
2236 } else if (from < cur) {
2237 len = cur - from;
2238 goto shrink;
2239 } else goto no_change;
2240 } else {
2241 cur = from;
2242 len = -len;
2243 goto utf8_shrink;
f337b084 2244 }
28be1210
TH
2245 else {
2246 len -= cur - from;
f337b084 2247 if (len > 0) goto grow;
28be1210 2248 if (len == 0) goto no_change;
fc241834 2249 len = -len;
28be1210 2250 goto shrink;
f337b084 2251 }
a6ec74c1 2252 break;
81d52ecd 2253
fc241834 2254 case '(': {
49704364 2255 tempsym_t savsym = *symptr;
66c611c5
MHM
2256 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2257 symptr->flags |= group_modifiers;
49704364
LW
2258 symptr->patend = savsym.grpend;
2259 symptr->level++;
f337b084 2260 symptr->previous = &lookahead;
18529408 2261 while (len--) {
f337b084
TH
2262 U32 was_utf8;
2263 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2264 else symptr->flags &= ~FLAG_PARSE_UTF8;
2265 was_utf8 = SvUTF8(cat);
49704364 2266 symptr->patptr = savsym.grpbeg;
f337b084
TH
2267 beglist = pack_rec(cat, symptr, beglist, endlist);
2268 if (SvUTF8(cat) != was_utf8)
2269 /* This had better be an upgrade while in utf8==0 mode */
2270 utf8 = 1;
2271
49704364 2272 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2273 break; /* No way to continue */
2274 }
ee790063 2275 items = endlist - beglist;
f337b084
TH
2276 lookahead.flags = symptr->flags & ~group_modifiers;
2277 goto no_change;
18529408 2278 }
62f95557
IZ
2279 case 'X' | TYPE_IS_SHRIEKING:
2280 if (!len) /* Avoid division by 0 */
2281 len = 1;
f337b084
TH
2282 if (utf8) {
2283 char *hop, *last;
2284 I32 l = len;
2285 hop = last = start;
2286 while (hop < cur) {
2287 hop += UTF8SKIP(hop);
2288 if (--l == 0) {
2289 last = hop;
2290 l = len;
2291 }
2292 }
2293 if (last > cur)
2294 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2295 cur = last;
2296 break;
2297 }
2298 len = (cur-start) % len;
924ba076 2299 /* FALLTHROUGH */
a6ec74c1 2300 case 'X':
f337b084
TH
2301 if (utf8) {
2302 if (len < 1) goto no_change;
28be1210 2303 utf8_shrink:
f337b084
TH
2304 while (len > 0) {
2305 if (cur <= start)
28be1210
TH
2306 Perl_croak(aTHX_ "'%c' outside of string in pack",
2307 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2308 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2309 if (cur <= start)
28be1210
TH
2310 Perl_croak(aTHX_ "'%c' outside of string in pack",
2311 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2312 }
2313 len--;
2314 }
2315 } else {
fc241834 2316 shrink:
f337b084 2317 if (cur - start < len)
28be1210
TH
2318 Perl_croak(aTHX_ "'%c' outside of string in pack",
2319 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2320 cur -= len;
2321 }
2322 if (cur < start+symptr->strbeg) {
2323 /* Make sure group starts don't point into the void */
2324 tempsym_t *group;
9e27e96a 2325 const STRLEN length = cur-start;
f337b084
TH
2326 for (group = symptr;
2327 group && length < group->strbeg;
2328 group = group->previous) group->strbeg = length;
2329 lookahead.strbeg = length;
2330 }
a6ec74c1 2331 break;
fc241834
RGS
2332 case 'x' | TYPE_IS_SHRIEKING: {
2333 I32 ai32;
62f95557
IZ
2334 if (!len) /* Avoid division by 0 */
2335 len = 1;
230e1fce 2336 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2337 else ai32 = (cur - start) % len;
2338 if (ai32 == 0) goto no_change;
2339 len -= ai32;
2340 }
924ba076 2341 /* FALLTHROUGH */
a6ec74c1 2342 case 'x':
f337b084 2343 goto grow;
a6ec74c1
JH
2344 case 'A':
2345 case 'Z':
f337b084 2346 case 'a': {
f7fe979e 2347 const char *aptr;
f337b084 2348
a6ec74c1 2349 fromstr = NEXTFROM;
e62f0680 2350 aptr = SvPV_const(fromstr, fromlen);
f337b084 2351 if (DO_UTF8(fromstr)) {
f7fe979e 2352 const char *end, *s;
f337b084
TH
2353
2354 if (!utf8 && !SvUTF8(cat)) {
2355 marked_upgrade(aTHX_ cat, symptr);
2356 lookahead.flags |= FLAG_DO_UTF8;
2357 lookahead.strbeg = symptr->strbeg;
2358 utf8 = 1;
2359 start = SvPVX(cat);
2360 cur = start + SvCUR(cat);
2361 }
fc241834 2362 if (howlen == e_star) {
f337b084
TH
2363 if (utf8) goto string_copy;
2364 len = fromlen+1;
2365 }
2366 s = aptr;
2367 end = aptr + fromlen;
2368 fromlen = datumtype == 'Z' ? len-1 : len;
2369 while ((I32) fromlen > 0 && s < end) {
2370 s += UTF8SKIP(s);
2371 fromlen--;
2372 }
2373 if (s > end)
2374 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2375 if (utf8) {
fc241834 2376 len = fromlen;
f337b084
TH
2377 if (datumtype == 'Z') len++;
2378 fromlen = s-aptr;
2379 len += fromlen;
fc241834 2380
f337b084 2381 goto string_copy;
fc241834 2382 }
f337b084
TH
2383 fromlen = len - fromlen;
2384 if (datumtype == 'Z') fromlen--;
2385 if (howlen == e_star) {
2386 len = fromlen;
2387 if (datumtype == 'Z') len++;
fc241834 2388 }
f337b084 2389 GROWING(0, cat, start, cur, len);
fc241834 2390 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2391 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2392 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2393 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2394 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2395 cur += fromlen;
a6ec74c1 2396 len -= fromlen;
f337b084
TH
2397 } else if (utf8) {
2398 if (howlen == e_star) {
2399 len = fromlen;
2400 if (datumtype == 'Z') len++;
a6ec74c1 2401 }
f337b084
TH
2402 if (len <= (I32) fromlen) {
2403 fromlen = len;
2404 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2405 }
fc241834 2406 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2407 upgrade, so:
2408 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2409 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2410 len -= fromlen;
2411 while (fromlen > 0) {
230e1fce 2412 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2413 aptr++;
2414 fromlen--;
fc241834 2415 }
f337b084
TH
2416 } else {
2417 string_copy:
2418 if (howlen == e_star) {
2419 len = fromlen;
2420 if (datumtype == 'Z') len++;
2421 }
2422 if (len <= (I32) fromlen) {
2423 fromlen = len;
2424 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2425 }
f337b084
TH
2426 GROWING(0, cat, start, cur, len);
2427 Copy(aptr, cur, fromlen, char);
2428 cur += fromlen;
2429 len -= fromlen;
a6ec74c1 2430 }
f337b084
TH
2431 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2432 cur += len;
3c4fb04a 2433 SvTAINT(cat);
a6ec74c1 2434 break;
f337b084 2435 }
a6ec74c1 2436 case 'B':
f337b084 2437 case 'b': {
b83604b4 2438 const char *str, *end;
f337b084
TH
2439 I32 l, field_len;
2440 U8 bits;
2441 bool utf8_source;
2442 U32 utf8_flags;
a6ec74c1 2443
fc241834 2444 fromstr = NEXTFROM;
b83604b4 2445 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2446 end = str + fromlen;
2447 if (DO_UTF8(fromstr)) {
2448 utf8_source = TRUE;
041457d9 2449 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2450 } else {
2451 utf8_source = FALSE;
2452 utf8_flags = 0; /* Unused, but keep compilers happy */
2453 }
2454 if (howlen == e_star) len = fromlen;
2455 field_len = (len+7)/8;
2456 GROWING(utf8, cat, start, cur, field_len);
2457 if (len > (I32)fromlen) len = fromlen;
2458 bits = 0;
2459 l = 0;
2460 if (datumtype == 'B')
2461 while (l++ < len) {
2462 if (utf8_source) {
95b63a38 2463 UV val = 0;
f337b084
TH
2464 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2465 bits |= val & 1;
2466 } else bits |= *str++ & 1;
2467 if (l & 7) bits <<= 1;
fc241834 2468 else {
f337b084
TH
2469 PUSH_BYTE(utf8, cur, bits);
2470 bits = 0;
a6ec74c1
JH
2471 }
2472 }
f337b084
TH
2473 else
2474 /* datumtype == 'b' */
2475 while (l++ < len) {
2476 if (utf8_source) {
95b63a38 2477 UV val = 0;
f337b084
TH
2478 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2479 if (val & 1) bits |= 0x80;
2480 } else if (*str++ & 1)
2481 bits |= 0x80;
2482 if (l & 7) bits >>= 1;
fc241834 2483 else {
f337b084
TH
2484 PUSH_BYTE(utf8, cur, bits);
2485 bits = 0;
a6ec74c1
JH
2486 }
2487 }
f337b084
TH
2488 l--;
2489 if (l & 7) {
fc241834 2490 if (datumtype == 'B')
f337b084 2491 bits <<= 7 - (l & 7);
fc241834 2492 else
f337b084
TH
2493 bits >>= 7 - (l & 7);
2494 PUSH_BYTE(utf8, cur, bits);
2495 l += 7;
a6ec74c1 2496 }
f337b084
TH
2497 /* Determine how many chars are left in the requested field */
2498 l /= 8;
2499 if (howlen == e_star) field_len = 0;
2500 else field_len -= l;
2501 Zero(cur, field_len, char);
2502 cur += field_len;
a6ec74c1 2503 break;
f337b084 2504 }
a6ec74c1 2505 case 'H':
f337b084 2506 case 'h': {
10516c54 2507 const char *str, *end;
f337b084
TH
2508 I32 l, field_len;
2509 U8 bits;
2510 bool utf8_source;
2511 U32 utf8_flags;
a6ec74c1 2512
fc241834 2513 fromstr = NEXTFROM;
10516c54 2514 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2515 end = str + fromlen;
2516 if (DO_UTF8(fromstr)) {
2517 utf8_source = TRUE;
041457d9 2518 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2519 } else {
2520 utf8_source = FALSE;
2521 utf8_flags = 0; /* Unused, but keep compilers happy */
2522 }
2523 if (howlen == e_star) len = fromlen;
2524 field_len = (len+1)/2;
2525 GROWING(utf8, cat, start, cur, field_len);
2526 if (!utf8 && len > (I32)fromlen) len = fromlen;
2527 bits = 0;
2528 l = 0;
2529 if (datumtype == 'H')
2530 while (l++ < len) {
2531 if (utf8_source) {
95b63a38 2532 UV val = 0;
f337b084
TH
2533 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2534 if (val < 256 && isALPHA(val))
2535 bits |= (val + 9) & 0xf;
a6ec74c1 2536 else
f337b084
TH
2537 bits |= val & 0xf;
2538 } else if (isALPHA(*str))
2539 bits |= (*str++ + 9) & 0xf;
2540 else
2541 bits |= *str++ & 0xf;
2542 if (l & 1) bits <<= 4;
fc241834 2543 else {
f337b084
TH
2544 PUSH_BYTE(utf8, cur, bits);
2545 bits = 0;
a6ec74c1
JH
2546 }
2547 }
f337b084
TH
2548 else
2549 while (l++ < len) {
2550 if (utf8_source) {
95b63a38 2551 UV val = 0;
f337b084
TH
2552 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2553 if (val < 256 && isALPHA(val))
2554 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2555 else
f337b084
TH
2556 bits |= (val & 0xf) << 4;
2557 } else if (isALPHA(*str))
2558 bits |= ((*str++ + 9) & 0xf) << 4;
2559 else
2560 bits |= (*str++ & 0xf) << 4;
2561 if (l & 1) bits >>= 4;
fc241834 2562 else {
f337b084
TH
2563 PUSH_BYTE(utf8, cur, bits);
2564 bits = 0;
a6ec74c1 2565 }
fc241834 2566 }
f337b084
TH
2567 l--;
2568 if (l & 1) {
2569 PUSH_BYTE(utf8, cur, bits);
2570 l++;
2571 }
2572 /* Determine how many chars are left in the requested field */
2573 l /= 2;
2574 if (howlen == e_star) field_len = 0;
2575 else field_len -= l;
2576 Zero(cur, field_len, char);
2577 cur += field_len;
2578 break;
fc241834
RGS
2579 }
2580 case 'c':
f337b084
TH
2581 while (len-- > 0) {
2582 IV aiv;
2583 fromstr = NEXTFROM;
354b74ae 2584 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2585 if ((-128 > aiv || aiv > 127))
2586 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2587 "Character in 'c' format wrapped in pack");
585ec06d 2588 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2589 }
2590 break;
2591 case 'C':
f337b084
TH
2592 if (len == 0) {
2593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2594 break;
2595 }
a6ec74c1 2596 while (len-- > 0) {
f337b084 2597 IV aiv;
a6ec74c1 2598 fromstr = NEXTFROM;
354b74ae 2599 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2600 if ((0 > aiv || aiv > 0xff))
2601 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2602 "Character in 'C' format wrapped in pack");
1651fc44 2603 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2604 }
fc241834
RGS
2605 break;
2606 case 'W': {
2607 char *end;
670f1322 2608 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2609
2610 end = start+SvLEN(cat)-1;
2611 if (utf8) end -= UTF8_MAXLEN-1;
2612 while (len-- > 0) {
2613 UV auv;
2614 fromstr = NEXTFROM;
354b74ae 2615 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2616 if (in_bytes) auv = auv % 0x100;
2617 if (utf8) {
2618 W_utf8:
2619 if (cur > end) {
2620 *cur = '\0';
b162af07 2621 SvCUR_set(cat, cur - start);
fc241834
RGS
2622
2623 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2624 end = start+SvLEN(cat)-UTF8_MAXLEN;
2625 }
c80e42f3
KW
2626 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2627 auv,
041457d9 2628 warn_utf8 ?
230e1fce 2629 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2630 } else {
2631 if (auv >= 0x100) {
2632 if (!SvUTF8(cat)) {
2633 *cur = '\0';
b162af07 2634 SvCUR_set(cat, cur - start);
fc241834
RGS
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2638 utf8 = 1;
2639 start = SvPVX(cat);
2640 cur = start + SvCUR(cat);
2641 end = start+SvLEN(cat)-UTF8_MAXLEN;
2642 goto W_utf8;
2643 }
a2a5de95
NC
2644 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2645 "Character in 'W' format wrapped in pack");
fc241834
RGS
2646 auv &= 0xff;
2647 }
2648 if (cur >= end) {
2649 *cur = '\0';
b162af07 2650 SvCUR_set(cat, cur - start);
fc241834
RGS
2651 GROWING(0, cat, start, cur, len+1);
2652 end = start+SvLEN(cat)-1;
2653 }
fe2774ed 2654 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2655 }
2656 }
2657 break;
fc241834
RGS
2658 }
2659 case 'U': {
2660 char *end;
2661
2662 if (len == 0) {
2663 if (!(symptr->flags & FLAG_DO_UTF8)) {
2664 marked_upgrade(aTHX_ cat, symptr);
2665 lookahead.flags |= FLAG_DO_UTF8;
2666 lookahead.strbeg = symptr->strbeg;
2667 }
2668 utf8 = 0;
2669 goto no_change;
2670 }
2671
2672 end = start+SvLEN(cat);
2673 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2674 while (len-- > 0) {
fc241834 2675 UV auv;
a6ec74c1 2676 fromstr = NEXTFROM;
354b74ae 2677 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2678 if (utf8) {
230e1fce 2679 U8 buffer[UTF8_MAXLEN], *endb;
3ece276e 2680 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
041457d9 2681 warn_utf8 ?
fc241834
RGS
2682 0 : UNICODE_ALLOW_ANY);
2683 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2684 *cur = '\0';
b162af07 2685 SvCUR_set(cat, cur - start);
fc241834
RGS
2686 GROWING(0, cat, start, cur,
2687 len+(endb-buffer)*UTF8_EXPAND);
2688 end = start+SvLEN(cat);
2689 }
3a88beaa 2690 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
fc241834
RGS
2691 } else {
2692 if (cur >= end) {
2693 *cur = '\0';
b162af07 2694 SvCUR_set(cat, cur - start);
fc241834
RGS
2695 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2696 end = start+SvLEN(cat)-UTF8_MAXLEN;
2697 }
3ece276e 2698 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
041457d9 2699 warn_utf8 ?
230e1fce 2700 0 : UNICODE_ALLOW_ANY);
fc241834 2701 }
a6ec74c1 2702 }
a6ec74c1 2703 break;
fc241834 2704 }
a6ec74c1
JH
2705 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2706 case 'f':
a6ec74c1 2707 while (len-- > 0) {
f337b084
TH
2708 float afloat;
2709 NV anv;
a6ec74c1 2710 fromstr = NEXTFROM;
f337b084 2711 anv = SvNV(fromstr);
85bba25f 2712# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2713 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2714 * on Alpha; fake it if we don't have them.
2715 */
f337b084 2716 if (anv > FLT_MAX)
fc241834 2717 afloat = FLT_MAX;
f337b084 2718 else if (anv < -FLT_MAX)
fc241834 2719 afloat = -FLT_MAX;
f337b084 2720 else afloat = (float)anv;
baf3cf9c 2721# else
919894b7
DM
2722 /* a simple cast to float is undefined if outside
2723 * the range of values that can be represented */
2724 afloat = (float)(anv > FLT_MAX ? NV_INF :
2725 anv < -FLT_MAX ? -NV_INF : anv);
baf3cf9c 2726# endif
3a88beaa 2727 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2728 }
2729 break;
2730 case 'd':
a6ec74c1 2731 while (len-- > 0) {
f337b084
TH
2732 double adouble;
2733 NV anv;
a6ec74c1 2734 fromstr = NEXTFROM;
f337b084 2735 anv = SvNV(fromstr);
85bba25f 2736# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2737 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2738 * on Alpha; fake it if we don't have them.
2739 */
f337b084 2740 if (anv > DBL_MAX)
fc241834 2741 adouble = DBL_MAX;
f337b084 2742 else if (anv < -DBL_MAX)
fc241834 2743 adouble = -DBL_MAX;
f337b084 2744 else adouble = (double)anv;
baf3cf9c 2745# else
f337b084 2746 adouble = (double)anv;
baf3cf9c 2747# endif
3a88beaa 2748 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2749 }
2750 break;
fc241834 2751 case 'F': {
275663fa 2752 NV_bytes anv;
1109a392 2753 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2754 while (len-- > 0) {
2755 fromstr = NEXTFROM;
cd07c537
DM
2756#ifdef __GNUC__
2757 /* to work round a gcc/x86 bug; don't use SvNV */
2758 anv.nv = sv_2nv(fromstr);
2759#else
275663fa 2760 anv.nv = SvNV(fromstr);
cd07c537 2761#endif
3a88beaa 2762 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2763 }
2764 break;
fc241834 2765 }
92d41999 2766#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2767 case 'D': {
275663fa 2768 ld_bytes aldouble;
1109a392
MHM
2769 /* long doubles can have unused bits, which may be nonzero */
2770 Zero(&aldouble, 1, long double);
92d41999
JH
2771 while (len-- > 0) {
2772 fromstr = NEXTFROM;
cd07c537
DM
2773# ifdef __GNUC__
2774 /* to work round a gcc/x86 bug; don't use SvNV */
2775 aldouble.ld = (long double)sv_2nv(fromstr);
2776# else
275663fa 2777 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2778# endif
3a88beaa
NC
2779 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2780 needs_swap);
92d41999
JH
2781 }
2782 break;
fc241834 2783 }
92d41999 2784#endif
068bd2e7 2785 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2786 case 'n':
2787 while (len-- > 0) {
f337b084 2788 I16 ai16;
a6ec74c1 2789 fromstr = NEXTFROM;
354b74ae 2790 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2791 ai16 = PerlSock_htons(ai16);
3a88beaa 2792 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2793 }
2794 break;
068bd2e7 2795 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2796 case 'v':
2797 while (len-- > 0) {
f337b084 2798 I16 ai16;
a6ec74c1 2799 fromstr = NEXTFROM;
354b74ae 2800 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2801 ai16 = htovs(ai16);
3a88beaa 2802 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2803 }
2804 break;
49704364 2805 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2806#if SHORTSIZE != SIZE16
fc241834 2807 while (len-- > 0) {
f337b084 2808 unsigned short aushort;
fc241834 2809 fromstr = NEXTFROM;
354b74ae 2810 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2811 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2812 }
49704364
LW
2813 break;
2814#else
924ba076 2815 /* FALLTHROUGH */
a6ec74c1 2816#endif
49704364 2817 case 'S':
fc241834 2818 while (len-- > 0) {
f337b084 2819 U16 au16;
fc241834 2820 fromstr = NEXTFROM;
354b74ae 2821 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2822 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2823 }
2824 break;
49704364 2825 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2826#if SHORTSIZE != SIZE16
fc241834 2827 while (len-- > 0) {
f337b084 2828 short ashort;
fc241834 2829 fromstr = NEXTFROM;
354b74ae 2830 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2831 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2832 }
49704364
LW
2833 break;
2834#else
924ba076 2835 /* FALLTHROUGH */
a6ec74c1 2836#endif
49704364
LW
2837 case 's':
2838 while (len-- > 0) {
f337b084 2839 I16 ai16;
49704364 2840 fromstr = NEXTFROM;
354b74ae 2841 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2842 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2843 }
2844 break;
2845 case 'I':
49704364 2846 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2847 while (len-- > 0) {
f337b084 2848 unsigned int auint;
a6ec74c1 2849 fromstr = NEXTFROM;
354b74ae 2850 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2851 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2852 }
2853 break;
92d41999
JH
2854 case 'j':
2855 while (len-- > 0) {
f337b084 2856 IV aiv;
92d41999 2857 fromstr = NEXTFROM;
354b74ae 2858 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2859 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2860 }
2861 break;
2862 case 'J':
2863 while (len-- > 0) {
f337b084 2864 UV auv;
92d41999 2865 fromstr = NEXTFROM;
354b74ae 2866 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2867 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2868 }
2869 break;
a6ec74c1
JH
2870 case 'w':
2871 while (len-- > 0) {
f337b084 2872 NV anv;
a6ec74c1 2873 fromstr = NEXTFROM;
83388ae2 2874 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2875 anv = SvNV_nomg(fromstr);
a6ec74c1 2876
f337b084
TH
2877 if (anv < 0) {
2878 *cur = '\0';
b162af07 2879 SvCUR_set(cat, cur - start);
49704364 2880 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2881 }
a6ec74c1 2882
196b62db
NC
2883 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2884 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2885 any negative IVs will have already been got by the croak()
2886 above. IOK is untrue for fractions, so we test them
2887 against UV_MAX_P1. */
f337b084
TH
2888 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2889 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2890 char *in = buf + sizeof(buf);
1e9a122e 2891 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2892
2893 do {
eb160463 2894 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2895 auv >>= 7;
2896 } while (auv);
2897 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2898 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2899 in, (buf + sizeof(buf)) - in);
2900 } else if (SvPOKp(fromstr))
2901 goto w_string;
a6ec74c1 2902 else if (SvNOKp(fromstr)) {
0258719b 2903 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2904 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2905 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2906 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2907 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2908 Some C compilers are strict about integral constant
2909 expressions so we conservatively divide by a slightly
2910 smaller integer instead of multiplying by the exact
2911 floating-point value.
0258719b
NC
2912 */
2913#ifdef NV_MAX_10_EXP
f337b084 2914 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2915 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2916#else
f337b084 2917 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2918 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2919#endif
a6ec74c1
JH
2920 char *in = buf + sizeof(buf);
2921
8b6e33c7 2922 anv = Perl_floor(anv);
a6ec74c1 2923 do {
8b6e33c7 2924 const NV next = Perl_floor(anv / 128);
a6ec74c1 2925 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2926 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2927 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2928 anv = next;
2929 } while (anv > 0);
a6ec74c1 2930 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2931 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2932 in, (buf + sizeof(buf)) - in);
2933 } else {
8b6e33c7
AL
2934 const char *from;
2935 char *result, *in;
735b914b
JH
2936 SV *norm;
2937 STRLEN len;
2938 bool done;
2939
f337b084 2940 w_string:
735b914b 2941 /* Copy string and check for compliance */
1e9a122e 2942 from = SvPV_nomg_const(fromstr, len);
735b914b 2943 if ((norm = is_an_int(from, len)) == NULL)
49704364 2944 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2945
a02a5408 2946 Newx(result, len, char);
735b914b
JH
2947 in = result + len;
2948 done = FALSE;
f337b084 2949 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2950 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2951 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2952 in, (result + len) - in);
735b914b
JH
2953 Safefree(result);
2954 SvREFCNT_dec(norm); /* free norm */
fc241834 2955 }
a6ec74c1
JH
2956 }
2957 break;
2958 case 'i':
49704364 2959 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2960 while (len-- > 0) {
f337b084 2961 int aint;
a6ec74c1 2962 fromstr = NEXTFROM;
354b74ae 2963 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2964 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2965 }
2966 break;
068bd2e7 2967 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2968 case 'N':
2969 while (len-- > 0) {
f337b084 2970 U32 au32;
a6ec74c1 2971 fromstr = NEXTFROM;
354b74ae 2972 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2973 au32 = PerlSock_htonl(au32);
3a88beaa 2974 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2975 }
2976 break;
068bd2e7 2977 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2978 case 'V':
2979 while (len-- > 0) {
f337b084 2980 U32 au32;
a6ec74c1 2981 fromstr = NEXTFROM;
354b74ae 2982 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2983 au32 = htovl(au32);
3a88beaa 2984 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2985 }
2986 break;
49704364 2987 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2988#if LONGSIZE != SIZE32
fc241834 2989 while (len-- > 0) {
f337b084 2990 unsigned long aulong;
fc241834 2991 fromstr = NEXTFROM;
354b74ae 2992 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2993 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2994 }
49704364
LW
2995 break;
2996#else
2997 /* Fall though! */
a6ec74c1 2998#endif
49704364 2999 case 'L':
fc241834 3000 while (len-- > 0) {
f337b084 3001 U32 au32;
fc241834 3002 fromstr = NEXTFROM;
354b74ae 3003 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3004 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
3005 }
3006 break;
49704364 3007 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3008#if LONGSIZE != SIZE32
fc241834 3009 while (len-- > 0) {
f337b084 3010 long along;
fc241834 3011 fromstr = NEXTFROM;
354b74ae 3012 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3013 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3014 }
49704364
LW
3015 break;
3016#else
3017 /* Fall though! */
a6ec74c1 3018#endif
49704364
LW
3019 case 'l':
3020 while (len-- > 0) {
f337b084 3021 I32 ai32;
49704364 3022 fromstr = NEXTFROM;
354b74ae 3023 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3024 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3025 }
3026 break;
c174bf3b 3027#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3028 case 'Q':
3029 while (len-- > 0) {
f337b084 3030 Uquad_t auquad;
a6ec74c1 3031 fromstr = NEXTFROM;
354b74ae 3032 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3033 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3034 }
3035 break;
3036 case 'q':
3037 while (len-- > 0) {
f337b084 3038 Quad_t aquad;
a6ec74c1 3039 fromstr = NEXTFROM;
354b74ae 3040 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3041 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3042 }
3043 break;
1640b983 3044#endif
a6ec74c1
JH
3045 case 'P':
3046 len = 1; /* assume SV is correct length */
f337b084 3047 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3048 /* FALLTHROUGH */
a6ec74c1
JH
3049 case 'p':
3050 while (len-- > 0) {
83003860 3051 const char *aptr;
f337b084 3052
a6ec74c1 3053 fromstr = NEXTFROM;
28a4f200 3054 SvGETMAGIC(fromstr);
3055 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3056 else {
a6ec74c1
JH
3057 /* XXX better yet, could spirit away the string to
3058 * a safe spot and hang on to it until the result
3059 * of pack() (and all copies of the result) are
3060 * gone.
3061 */
041457d9 3062 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3063 !SvREADONLY(fromstr)))) {
3064 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3066 }
3067 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3068 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3069 else
2596d9fe 3070 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3071 }
3a88beaa 3072 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3073 }
3074 break;
fc241834 3075 case 'u': {
f7fe979e 3076 const char *aptr, *aend;
fc241834 3077 bool from_utf8;
f337b084 3078
a6ec74c1 3079 fromstr = NEXTFROM;
fc241834
RGS
3080 if (len <= 2) len = 45;
3081 else len = len / 3 * 3;
3082 if (len >= 64) {
a2a5de95
NC
3083 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3084 "Field too wide in 'u' format in pack");
fc241834
RGS
3085 len = 63;
3086 }
83003860 3087 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3088 from_utf8 = DO_UTF8(fromstr);
3089 if (from_utf8) {
3090 aend = aptr + fromlen;
3f63b0e5 3091 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3092 } else aend = NULL; /* Unused, but keep compilers happy */
3093 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3094 while (fromlen > 0) {
fc241834 3095 U8 *end;
a6ec74c1 3096 I32 todo;
fc241834 3097 U8 hunk[1+63/3*4+1];
a6ec74c1 3098
eb160463 3099 if ((I32)fromlen > len)
a6ec74c1
JH
3100 todo = len;
3101 else
3102 todo = fromlen;
fc241834
RGS
3103 if (from_utf8) {
3104 char buffer[64];
3105 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3106 'u' | TYPE_IS_PACK)) {
3107 *cur = '\0';
b162af07 3108 SvCUR_set(cat, cur - start);
5637ef5b
NC
3109 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3110 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3111 aptr, aend, buffer, (long) todo);
fc241834 3112 }
e68aed92 3113 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3114 } else {
e68aed92 3115 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3116 aptr += todo;
3117 }
3a88beaa 3118 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3119 fromlen -= todo;
3120 }
a6ec74c1
JH
3121 break;
3122 }
f337b084
TH
3123 }
3124 *cur = '\0';
b162af07 3125 SvCUR_set(cat, cur - start);
f337b084 3126 no_change:
49704364 3127 *symptr = lookahead;
a6ec74c1 3128 }
49704364 3129 return beglist;
18529408
IZ
3130}
3131#undef NEXTFROM
3132
3133
3134PP(pp_pack)
3135{
20b7effb 3136 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3137 SV *cat = TARG;
18529408 3138 STRLEN fromlen;
349d4f2f 3139 SV *pat_sv = *++MARK;
eb578fdb
KW
3140 const char *pat = SvPV_const(pat_sv, fromlen);
3141 const char *patend = pat + fromlen;
18529408
IZ
3142
3143 MARK++;
76f68e9b 3144 sv_setpvs(cat, "");
f337b084 3145 SvUTF8_off(cat);
18529408 3146
7accc089 3147 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3148
a6ec74c1
JH
3149 SvSETMAGIC(cat);
3150 SP = ORIGMARK;
3151 PUSHs(cat);
3152 RETURN;
3153}
a6ec74c1 3154
73cb7263
NC
3155/*
3156 * Local variables:
3157 * c-indentation-style: bsd
3158 * c-basic-offset: 4
14d04a33 3159 * indent-tabs-mode: nil
73cb7263
NC
3160 * End:
3161 *
14d04a33 3162 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3163 */