This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Merge the identical definitions of DO_BO_UNPACK and DO_BO_PACK.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132/* Only to be used inside a loop (see the break) */
133#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (utf8) { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 } else { \
137 Copy(s, OFF16(p), SIZE16, char); \
138 (s) += SIZE16; \
139 } \
140} STMT_END
141
142/* Only to be used inside a loop (see the break) */
143#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (utf8) { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 } else { \
147 Copy(s, OFF32(p), SIZE32, char); \
148 (s) += SIZE32; \
149 } \
150} STMT_END
151
152#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3 154
a1219b5e
NC
155#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
156# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
157#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
158# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
159#else
160# error "Unsupported byteorder"
161 /* Need to add code here to re-instate mixed endian support. */
162#endif
163
08ca2aa3 164/* Only to be used inside a loop (see the break) */
275663fa 165#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
166STMT_START { \
167 if (utf8) { \
f337b084 168 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 169 (char *) (buf), len, datumtype)) break; \
08ca2aa3 170 } else { \
275663fa
TC
171 Copy(s, (char *) (buf), len, char); \
172 s += len; \
08ca2aa3 173 } \
08ca2aa3
TH
174} STMT_END
175
275663fa
TC
176#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
177 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
178
f337b084 179#define PUSH_VAR(utf8, aptr, var) \
230e1fce 180 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 181
49704364
WL
182/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
183#define MAX_SUB_TEMPLATE_LEVEL 100
184
66c611c5 185/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
186#define FLAG_WAS_UTF8 0x40
187#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 188#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 189#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
190#define FLAG_SLASH 0x04
191#define FLAG_COMMA 0x02
192#define FLAG_PACK 0x01
193
a6ec74c1
JH
194STATIC SV *
195S_mul128(pTHX_ SV *sv, U8 m)
196{
197 STRLEN len;
198 char *s = SvPV(sv, len);
199 char *t;
a6ec74c1 200
7918f24d
NC
201 PERL_ARGS_ASSERT_MUL128;
202
a6ec74c1 203 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 204 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
205
206 sv_catsv(tmpNew, sv);
207 SvREFCNT_dec(sv); /* free old sv */
208 sv = tmpNew;
209 s = SvPV(sv, len);
210 }
211 t = s + len - 1;
212 while (!*t) /* trailing '\0'? */
213 t--;
214 while (t > s) {
f7fe979e 215 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
216 *(t--) = '0' + (char)(i % 10);
217 m = (char)(i / 10);
a6ec74c1
JH
218 }
219 return (sv);
220}
221
222/* Explosives and implosives. */
223
224#if 'I' == 73 && 'J' == 74
225/* On an ASCII/ISO kind of system */
226#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
227#else
228/*
229 Some other sort of character set - use memchr() so we don't match
230 the null byte.
231 */
232#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
233#endif
234
66c611c5 235/* type modifiers */
62f95557 236#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
237#define TYPE_IS_BIG_ENDIAN 0x200
238#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 239#define TYPE_IS_PACK 0x800
1109a392 240#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 241#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
242#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
243
7212898e
NC
244# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
245# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
246
247# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
248
3cc90e2e
NC
249# define DO_BO_UNPACK(var, type) \
250 STMT_START { \
a1219b5e 251 if (needs_swap) { \
e8fda8c4 252 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
253 } \
254 } STMT_END
255
256# define DO_BO_PACK(var, type) \
257 STMT_START { \
a1219b5e 258 if (needs_swap) { \
e8fda8c4 259 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
260 } \
261 } STMT_END
262
78d46eaa 263#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 264#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
265#define PACK_SIZE_MASK 0x3F
266
298bc19c 267#include "packsizetables.c"
78d46eaa 268
08ca2aa3 269STATIC U8
f7fe979e 270uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 271{
08ca2aa3 272 STRLEN retlen;
0bcc34c2 273 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 274 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 275 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
276 warnings), but these two mean we make no progress in the string and
277 might enter an infinite loop */
278 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
279 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
280 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 281 if (val >= 0x100) {
a2a5de95
NC
282 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
283 "Character in '%c' format wrapped in unpack",
284 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
285 val &= 0xff;
286 }
287 *s += retlen;
fe2774ed 288 return (U8)val;
08ca2aa3
TH
289}
290
f337b084
TH
291#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
292 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
293 *(U8 *)(s)++)
294
295STATIC bool
f7fe979e 296uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
297{
298 UV val;
299 STRLEN retlen;
f7fe979e 300 const char *from = *s;
08ca2aa3 301 int bad = 0;
f7fe979e 302 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
303 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
304 for (;buf_len > 0; buf_len--) {
305 if (from >= end) return FALSE;
f337b084 306 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
307 if (retlen == (STRLEN) -1 || retlen == 0) {
308 from += UTF8SKIP(from);
309 bad |= 1;
310 } else from += retlen;
311 if (val >= 0x100) {
312 bad |= 2;
313 val &= 0xff;
314 }
fe2774ed 315 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
316 }
317 /* We have enough characters for the buffer. Did we have problems ? */
318 if (bad) {
319 if (bad & 1) {
320 /* Rewalk the string fragment while warning */
f7fe979e 321 const char *ptr;
9e27e96a 322 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
323 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
324 if (ptr >= end) break;
f337b084 325 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
326 }
327 if (from > end) from = end;
328 }
a2a5de95
NC
329 if ((bad & 2))
330 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 331 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
332 "Character(s) in '%c' format wrapped in %s",
333 (int) TYPE_NO_MODIFIERS(datumtype),
334 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
335 }
336 *s = from;
337 return TRUE;
338}
339
340STATIC bool
f7fe979e 341next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 342{
97aff369 343 dVAR;
08ca2aa3 344 STRLEN retlen;
0bcc34c2 345 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
346 if (val >= 0x100 || !ISUUCHAR(val) ||
347 retlen == (STRLEN) -1 || retlen == 0) {
348 *out = 0;
349 return FALSE;
350 }
351 *out = PL_uudmap[val] & 077;
f337b084 352 *s += retlen;
08ca2aa3
TH
353 return TRUE;
354}
78d46eaa 355
64844641 356STATIC char *
14333449 357S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 358 const U8 * const end = start + len;
64844641 359
7918f24d
NC
360 PERL_ARGS_ASSERT_BYTES_TO_UNI;
361
f337b084 362 while (start < end) {
48fa4626
TS
363 const UV uv = NATIVE_TO_ASCII(*start);
364 if (UNI_IS_INVARIANT(uv))
365 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
366 else {
367 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
368 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
369 }
370 start++;
371 }
64844641 372 return dest;
f337b084
TH
373}
374
230e1fce
NC
375#define PUSH_BYTES(utf8, cur, buf, len) \
376STMT_START { \
64844641
AL
377 if (utf8) \
378 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
379 else { \
380 Copy(buf, cur, len, char); \
381 (cur) += (len); \
382 } \
f337b084
TH
383} STMT_END
384
385#define GROWING(utf8, cat, start, cur, in_len) \
386STMT_START { \
387 STRLEN glen = (in_len); \
3473cf63 388 if (utf8) glen *= UTF8_EXPAND; \
f337b084 389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 390 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
391 (cur) = (start) + SvCUR(cat); \
392 } \
393} STMT_END
394
395#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
396STMT_START { \
f7fe979e 397 const STRLEN glen = (in_len); \
f337b084 398 STRLEN gl = glen; \
3473cf63 399 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
401 *cur = '\0'; \
b162af07 402 SvCUR_set((cat), (cur) - (start)); \
0bd48802 403 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
404 (cur) = (start) + SvCUR(cat); \
405 } \
406 PUSH_BYTES(utf8, cur, buf, glen); \
407} STMT_END
408
409#define PUSH_BYTE(utf8, s, byte) \
410STMT_START { \
411 if (utf8) { \
f7fe979e 412 const U8 au8 = (byte); \
64844641 413 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
414 } else *(U8 *)(s)++ = (byte); \
415} STMT_END
416
417/* Only to be used inside a loop (see the break) */
418#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
419STMT_START { \
420 STRLEN retlen; \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
424 *cur = '\0'; \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
426 } \
427 str += retlen; \
428} STMT_END
429
f7fe979e
AL
430static const char *_action( const tempsym_t* symptr )
431{
10edeb5d 432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
433}
434
206947d2 435/* Returns the sizeof() struct described by pat */
028d1f6d 436STATIC I32
f337b084 437S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 438{
f337b084 439 I32 total = 0;
206947d2 440
7918f24d
NC
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
442
49704364 443 while (next_symbol(symptr)) {
f337b084 444 I32 len;
f7fe979e 445 int size;
f337b084
TH
446
447 switch (symptr->howlen) {
fc241834 448 case e_star:
49704364 449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 450 _action( symptr ) );
49704364 451 break;
f337b084
TH
452 default:
453 /* e_no_len and e_number */
454 len = symptr->length;
455 break;
49704364
WL
456 }
457
a7a3cfaa 458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 459 if (!size) {
f7fe979e 460 int star;
80a13697
NC
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
463 default:
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 466 _action( symptr ) );
28be1210
TH
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
80a13697 469 case '@':
28be1210 470 case '.':
80a13697
NC
471 case '/':
472 case 'U': /* XXXX Is it correct? */
473 case 'w':
474 case 'u':
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 476 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 477 _action( symptr ) );
80a13697
NC
478 case '%':
479 size = 0;
480 break;
481 case '(':
fc241834
RGS
482 {
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
491 *symptr = savsym;
492 break;
493 }
80a13697
NC
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
496 */
497 if (!len) /* Avoid division by 0 */
498 len = 1;
499 len = total % len; /* Assumed: the start is aligned. */
500 /* FALL THROUGH */
501 case 'X':
502 size = -1;
503 if (total < len)
f7fe979e 504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
505 break;
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
508 len = 1;
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
511 len = len - star;
512 else
513 len = 0;
514 /* FALL THROUGH */
515 case 'x':
516 case 'A':
517 case 'Z':
518 case 'a':
80a13697
NC
519 size = 1;
520 break;
521 case 'B':
522 case 'b':
523 len = (len + 7)/8;
524 size = 1;
525 break;
526 case 'H':
527 case 'h':
528 len = (len + 1)/2;
529 size = 1;
530 break;
78d46eaa 531
80a13697
NC
532 case 'P':
533 len = 1;
534 size = sizeof(char*);
78d46eaa
NC
535 break;
536 }
206947d2
IZ
537 }
538 total += len * size;
539 }
540 return total;
541}
542
49704364
WL
543
544/* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
546 */
f7fe979e 547STATIC const char *
5aaab254 548S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 549{
7918f24d
NC
550 PERL_ARGS_ASSERT_GROUP_END;
551
49704364 552 while (patptr < patend) {
f7fe979e 553 const char c = *patptr++;
49704364
WL
554
555 if (isSPACE(c))
556 continue;
557 else if (c == ender)
558 return patptr-1;
559 else if (c == '#') {
560 while (patptr < patend && *patptr != '\n')
561 patptr++;
562 continue;
563 } else if (c == '(')
564 patptr = group_end(patptr, patend, ')') + 1;
565 else if (c == '[')
566 patptr = group_end(patptr, patend, ']') + 1;
18529408 567 }
49704364
WL
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
569 ender);
570 return 0;
18529408
IZ
571}
572
49704364
WL
573
574/* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
fc241834 577 */
f7fe979e 578STATIC const char *
5aaab254 579S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
580{
581 I32 len = *patptr++ - '0';
7918f24d
NC
582
583 PERL_ARGS_ASSERT_GET_NUM;
584
49704364
WL
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
589 }
590 *lenptr = len;
591 return patptr;
592}
593
594/* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
596 */
597STATIC bool
f337b084 598S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 599{
f7fe979e 600 const char* patptr = symptr->patptr;
0bcc34c2 601 const char* const patend = symptr->patend;
49704364 602
7918f24d
NC
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
604
49704364
WL
605 symptr->flags &= ~FLAG_SLASH;
606
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
609 patptr++;
610 else if (*patptr == '#') {
611 patptr++;
612 while (patptr < patend && *patptr != '\n')
613 patptr++;
614 if (patptr < patend)
615 patptr++;
616 } else {
fc241834 617 /* We should have found a template code */
49704364 618 I32 code = *patptr++ & 0xFF;
66c611c5 619 U32 inherited_modifiers = 0;
49704364
WL
620
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 625 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
626 }
627 continue;
628 }
fc241834 629
49704364 630 /* for '(', skip to ')' */
fc241834 631 if (code == '(') {
49704364
WL
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 634 _action( symptr ) );
49704364
WL
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 639 _action( symptr ) );
49704364
WL
640 }
641
66c611c5
MHM
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
646 }
647
1109a392
MHM
648 /* look for modifiers */
649 while (patptr < patend) {
b7787f18
AL
650 const char *allowed;
651 I32 modifier;
1109a392
MHM
652 switch (*patptr) {
653 case '!':
654 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 655 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
656 break;
657 case '>':
658 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 659 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
660 break;
661 case '<':
662 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 663 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
664 break;
665 default:
b7787f18
AL
666 allowed = "";
667 modifier = 0;
1109a392
MHM
668 break;
669 }
66c611c5 670
1109a392
MHM
671 if (modifier == 0)
672 break;
66c611c5 673
1109a392
MHM
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 676 allowed, _action( symptr ) );
66c611c5
MHM
677
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 684 *patptr, _action( symptr ) );
66c611c5 685
a2a5de95
NC
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 _action( symptr ) );
1109a392 691 }
66c611c5 692
1109a392
MHM
693 code |= modifier;
694 patptr++;
49704364
WL
695 }
696
66c611c5
MHM
697 /* inherit modifiers */
698 code |= inherited_modifiers;
699
fc241834 700 /* look for count and/or / */
49704364
WL
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
705
706 } else if (*patptr == '*') {
707 patptr++;
708 symptr->howlen = e_star;
709
710 } else if (*patptr == '[') {
f7fe979e 711 const char* lenptr = ++patptr;
49704364
WL
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
717 if( *lenptr != ']' )
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 719 _action( symptr ) );
49704364
WL
720 } else {
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
725 *symptr = savsym;
726 }
727 } else {
728 symptr->howlen = e_no_len;
729 symptr->length = 1;
730 }
731
732 /* try to find / */
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
735 patptr++;
736 else if (*patptr == '#') {
737 patptr++;
738 while (patptr < patend && *patptr != '\n')
739 patptr++;
740 if (patptr < patend)
741 patptr++;
742 } else {
66c611c5 743 if (*patptr == '/') {
49704364
WL
744 symptr->flags |= FLAG_SLASH;
745 patptr++;
66c611c5
MHM
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 749 _action( symptr ) );
49704364
WL
750 }
751 break;
752 }
18529408 753 }
49704364
WL
754 } else {
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
757 symptr->length = 1;
758 }
759
760 symptr->code = code;
fc241834 761 symptr->patptr = patptr;
49704364 762 return TRUE;
18529408 763 }
49704364 764 }
fc241834 765 symptr->patptr = patptr;
49704364 766 return FALSE;
18529408
IZ
767}
768
18529408 769/*
fc241834 770 There is no way to cleanly handle the case where we should process the
08ca2aa3 771 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
776 themselves if they need to do a lot of unpacks like this on it
777*/
fc241834 778STATIC bool
08ca2aa3
TH
779need_utf8(const char *pat, const char *patend)
780{
781 bool first = TRUE;
7918f24d
NC
782
783 PERL_ARGS_ASSERT_NEED_UTF8;
784
08ca2aa3
TH
785 while (pat < patend) {
786 if (pat[0] == '#') {
787 pat++;
f7fe979e 788 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
793 pat++;
794 }
795 return FALSE;
796}
797
798STATIC char
799first_symbol(const char *pat, const char *patend) {
7918f24d
NC
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801
08ca2aa3
TH
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
804 pat++;
f7fe979e 805 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
806 if (!pat) return 0;
807 pat++;
808 }
809 return 0;
810}
811
812/*
7accc089
JH
813=for apidoc unpackstring
814
21ebfc7a
DM
815The engine implementing the unpack() Perl function.
816
817Using the template pat..patend, this function unpacks the string
818s..strend into a number of mortal SVs, which it pushes onto the perl
819argument (@_) stack (so you will need to issue a C<PUTBACK> before and
820C<SPAGAIN> after the call to this function). It returns the number of
821pushed elements.
822
823The strend and patend pointers should point to the byte following the last
824character of each string.
825
826Although this function returns its values on the perl argument stack, it
827doesn't take any parameters from that stack (and thus in particular
828there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
829example).
7accc089
JH
830
831=cut */
832
833I32
f7fe979e 834Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 835{
f7fe979e 836 tempsym_t sym;
08ca2aa3 837
7918f24d
NC
838 PERL_ARGS_ASSERT_UNPACKSTRING;
839
f337b084 840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
230e1fce 845 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
846 SAVEFREEPV(s);
847 strend = s + len;
f337b084 848 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
849 }
850
f337b084
TH
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
08ca2aa3 853
f7fe979e 854 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
855
856 return unpack_rec(&sym, s, s, strend, NULL );
857}
858
4136a0f7 859STATIC I32
f7fe979e 860S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 861{
27da23d5 862 dVAR; dSP;
3297d27d 863 SV *sv = NULL;
f7fe979e 864 const I32 start_sp_offset = SP - PL_stack_base;
49704364 865 howlen_t howlen;
a6ec74c1 866 I32 checksum = 0;
92d41999 867 UV cuv = 0;
a6ec74c1 868 NV cdouble = 0.0;
f337b084 869 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 870 bool beyond = FALSE;
21c16052 871 bool explicit_length;
9e27e96a 872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
874
875 PERL_ARGS_ASSERT_UNPACK_REC;
876
28be1210 877 symptr->strbeg = s - strbeg;
49704364 878
49704364 879 while (next_symbol(symptr)) {
a7a3cfaa 880 packprops_t props;
9e27e96a 881 I32 len;
f337b084 882 I32 datumtype = symptr->code;
a1219b5e 883 bool needs_swap;
206947d2 884 /* do first one only unless in list context
08ca2aa3 885 / is implemented by unpacking the count, then popping it from the
206947d2 886 stack, so must check that we're not in the middle of a / */
49704364 887 if ( unpack_only_one
206947d2 888 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 889 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 890 break;
49704364 891
f337b084 892 switch (howlen = symptr->howlen) {
fc241834
RGS
893 case e_star:
894 len = strend - strbeg; /* long enough */
49704364 895 break;
f337b084
TH
896 default:
897 /* e_no_len and e_number */
898 len = symptr->length;
899 break;
49704364 900 }
18529408 901
21c16052 902 explicit_length = TRUE;
a6ec74c1 903 redo_switch:
49704364 904 beyond = s >= strend;
a7a3cfaa
TH
905
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
907 if (props) {
908 /* props nonzero means we can process this letter. */
9e27e96a
AL
909 const long size = props & PACK_SIZE_MASK;
910 const long howmany = (strend - s) / size;
a7a3cfaa
TH
911 if (len > howmany)
912 len = howmany;
913
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
916 EXTEND(SP, len);
917 EXTEND_MORTAL(len);
78d46eaa
NC
918 }
919 }
a7a3cfaa 920
a1219b5e
NC
921 needs_swap = NEEDS_SWAP(datumtype);
922
1109a392 923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 924 default:
1109a392 925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 926
a6ec74c1 927 case '%':
49704364 928 if (howlen == e_no_len)
18529408 929 len = 16; /* len is not specified */
a6ec74c1 930 checksum = len;
92d41999 931 cuv = 0;
a6ec74c1 932 cdouble = 0;
18529408 933 continue;
a6ec74c1 934 break;
18529408
IZ
935 case '(':
936 {
49704364 937 tempsym_t savsym = *symptr;
9e27e96a 938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 939 symptr->flags |= group_modifiers;
49704364 940 symptr->patend = savsym.grpend;
28be1210 941 symptr->previous = &savsym;
49704364 942 symptr->level++;
18529408 943 PUTBACK;
c6f750d1 944 if (len && unpack_only_one) len = 1;
18529408 945 while (len--) {
49704364 946 symptr->patptr = savsym.grpbeg;
f337b084
TH
947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
948 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
949 unpack_rec(symptr, s, strbeg, strend, &s);
950 if (s == strend && savsym.howlen == e_star)
49704364 951 break; /* No way to continue */
18529408
IZ
952 }
953 SPAGAIN;
28be1210 954 savsym.flags = symptr->flags & ~group_modifiers;
49704364 955 *symptr = savsym;
18529408
IZ
956 break;
957 }
28be1210 958 case '.' | TYPE_IS_SHRIEKING:
28be1210 959 case '.': {
9e27e96a 960 const char *from;
28be1210 961 SV *sv;
9e27e96a 962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
963 if (howlen == e_star) from = strbeg;
964 else if (len <= 0) from = s;
965 else {
966 tempsym_t *group = symptr;
967
968 while (--len && group) group = group->previous;
969 from = group ? strbeg + group->strbeg : strbeg;
970 }
971 sv = from <= s ?
00646304
CB
972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 974 mXPUSHs(sv);
28be1210
TH
975 break;
976 }
28be1210 977 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 978 case '@':
28be1210 979 s = strbeg + symptr->strbeg;
28be1210 980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 981 {
08ca2aa3
TH
982 while (len > 0) {
983 if (s >= strend)
984 Perl_croak(aTHX_ "'@' outside of string in unpack");
985 s += UTF8SKIP(s);
986 len--;
987 }
988 if (s > strend)
989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
990 } else {
28be1210 991 if (strend-s < len)
fc241834 992 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 993 s += len;
08ca2aa3 994 }
a6ec74c1 995 break;
62f95557
IZ
996 case 'X' | TYPE_IS_SHRIEKING:
997 if (!len) /* Avoid division by 0 */
998 len = 1;
08ca2aa3 999 if (utf8) {
f7fe979e 1000 const char *hop, *last;
f337b084
TH
1001 I32 l = len;
1002 hop = last = strbeg;
1003 while (hop < s) {
1004 hop += UTF8SKIP(hop);
1005 if (--l == 0) {
08ca2aa3 1006 last = hop;
f337b084
TH
1007 l = len;
1008 }
fc241834 1009 }
f337b084
TH
1010 if (last > s)
1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1012 s = last;
1013 break;
f337b084
TH
1014 }
1015 len = (s - strbeg) % len;
62f95557 1016 /* FALL THROUGH */
a6ec74c1 1017 case 'X':
08ca2aa3
TH
1018 if (utf8) {
1019 while (len > 0) {
1020 if (s <= strbeg)
1021 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1022 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1023 if (s <= strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1025 }
1026 len--;
1027 }
1028 } else {
fc241834
RGS
1029 if (len > s - strbeg)
1030 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1031 s -= len;
08ca2aa3 1032 }
a6ec74c1 1033 break;
9e27e96a
AL
1034 case 'x' | TYPE_IS_SHRIEKING: {
1035 I32 ai32;
62f95557
IZ
1036 if (!len) /* Avoid division by 0 */
1037 len = 1;
230e1fce
NC
1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1039 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1040 if (ai32 == 0) break;
1041 len -= ai32;
9e27e96a 1042 }
62f95557 1043 /* FALL THROUGH */
a6ec74c1 1044 case 'x':
08ca2aa3
TH
1045 if (utf8) {
1046 while (len>0) {
1047 if (s >= strend)
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1049 s += UTF8SKIP(s);
1050 len--;
1051 }
1052 } else {
fc241834
RGS
1053 if (len > strend - s)
1054 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 s += len;
f337b084 1056 }
a6ec74c1
JH
1057 break;
1058 case '/':
49704364
WL
1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1060 break;
a6ec74c1
JH
1061 case 'A':
1062 case 'Z':
1063 case 'a':
08ca2aa3
TH
1064 if (checksum) {
1065 /* Preliminary length estimate is assumed done in 'W' */
1066 if (len > strend - s) len = strend - s;
1067 goto W_checksum;
1068 }
1069 if (utf8) {
1070 I32 l;
f7fe979e 1071 const char *hop;
08ca2aa3
TH
1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1073 if (hop >= strend) {
1074 if (hop > strend)
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 break;
fc241834 1077 }
a6ec74c1 1078 }
08ca2aa3
TH
1079 if (hop > strend)
1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1081 len = hop - s;
1082 } else if (len > strend - s)
1083 len = strend - s;
1084
1085 if (datumtype == 'Z') {
1086 /* 'Z' strips stuff after first null */
f7fe979e 1087 const char *ptr, *end;
f337b084
TH
1088 end = s + len;
1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1090 sv = newSVpvn(s, ptr-s);
1091 if (howlen == e_star) /* exact for 'Z*' */
1092 len = ptr-s + (ptr != strend ? 1 : 0);
1093 } else if (datumtype == 'A') {
1094 /* 'A' strips both nulls and spaces */
f7fe979e 1095 const char *ptr;
18bdf90a
TH
1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1097 for (ptr = s+len-1; ptr >= s; ptr--)
1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1099 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1101 else ptr++;
28be1210 1102 if (ptr > s+len)
18bdf90a
TH
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 } else {
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1107 ptr++;
1108 }
08ca2aa3
TH
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1111
1112 if (utf8) {
1113 SvUTF8_on(sv);
1114 /* Undo any upgrade done due to need_utf8() */
f337b084 1115 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1116 sv_utf8_downgrade(sv, 0);
a6ec74c1 1117 }
6e449a3a 1118 mXPUSHs(sv);
08ca2aa3 1119 s += len;
a6ec74c1
JH
1120 break;
1121 case 'B':
08ca2aa3
TH
1122 case 'b': {
1123 char *str;
49704364 1124 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1125 len = (strend - s) * 8;
1126 if (checksum) {
f337b084 1127 if (utf8)
08ca2aa3 1128 while (len >= 8 && s < strend) {
f337b084 1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1130 len -= 8;
1131 }
f337b084 1132 else
fc241834 1133 while (len >= 8) {
08ca2aa3 1134 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1135 len -= 8;
1136 }
08ca2aa3
TH
1137 if (len && s < strend) {
1138 U8 bits;
f337b084
TH
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
a6ec74c1 1141 while (len-- > 0) {
92d41999 1142 if (bits & 1) cuv++;
a6ec74c1
JH
1143 bits >>= 1;
1144 }
f337b084 1145 else
a6ec74c1 1146 while (len-- > 0) {
08ca2aa3 1147 if (bits & 0x80) cuv++;
a6ec74c1
JH
1148 bits <<= 1;
1149 }
fc241834 1150 }
a6ec74c1
JH
1151 break;
1152 }
08ca2aa3 1153
561b68a9 1154 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1155 SvPOK_on(sv);
1156 str = SvPVX(sv);
1157 if (datumtype == 'b') {
f337b084 1158 U8 bits = 0;
f7fe979e 1159 const I32 ai32 = len;
08ca2aa3
TH
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1162 else if (utf8) {
1163 if (s >= strend) break;
f337b084 1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1167 }
08ca2aa3 1168 } else {
f337b084 1169 U8 bits = 0;
f7fe979e 1170 const I32 ai32 = len;
08ca2aa3
TH
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1173 else if (utf8) {
1174 if (s >= strend) break;
f337b084 1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1178 }
1179 }
1180 *str = '\0';
aa07b2f6 1181 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1182 XPUSHs(sv);
a6ec74c1 1183 break;
08ca2aa3 1184 }
a6ec74c1 1185 case 'H':
08ca2aa3 1186 case 'h': {
3297d27d 1187 char *str = NULL;
fc241834 1188 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1189 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1190 len = (strend - s) * 2;
858fe5e1
TC
1191 if (!checksum) {
1192 sv = sv_2mortal(newSV(len ? len : 1));
1193 SvPOK_on(sv);
1194 str = SvPVX(sv);
1195 }
a6ec74c1 1196 if (datumtype == 'h') {
f337b084 1197 U8 bits = 0;
9e27e96a 1198 I32 ai32 = len;
fc241834
RGS
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1201 else if (utf8) {
1202 if (s >= strend) break;
f337b084 1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1204 } else bits = * (U8 *) s++;
858fe5e1
TC
1205 if (!checksum)
1206 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1207 }
08ca2aa3 1208 } else {
f337b084 1209 U8 bits = 0;
f7fe979e 1210 const I32 ai32 = len;
08ca2aa3
TH
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1213 else if (utf8) {
1214 if (s >= strend) break;
f337b084 1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1216 } else bits = *(U8 *) s++;
858fe5e1
TC
1217 if (!checksum)
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1219 }
1220 }
858fe5e1
TC
1221 if (!checksum) {
1222 *str = '\0';
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1224 XPUSHs(sv);
1225 }
a6ec74c1 1226 break;
08ca2aa3 1227 }
1651fc44
ML
1228 case 'C':
1229 if (len == 0) {
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1233 break;
1234 }
1235 /* FALL THROUGH */
a6ec74c1 1236 case 'c':
1651fc44
ML
1237 while (len-- > 0 && s < strend) {
1238 int aint;
1239 if (utf8)
1240 {
1241 STRLEN retlen;
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1 || retlen == 0)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1246 s += retlen;
1247 }
1248 else
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1251 aint -= 256;
08ca2aa3 1252 if (!checksum)
6e449a3a 1253 mPUSHi(aint);
73cb7263
NC
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1256 else
1257 cuv += aint;
a6ec74c1
JH
1258 }
1259 break;
08ca2aa3
TH
1260 case 'W':
1261 W_checksum:
1651fc44 1262 if (utf8) {
08ca2aa3 1263 while (len-- > 0 && s < strend) {
08ca2aa3 1264 STRLEN retlen;
f7fe979e 1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1267 if (retlen == (STRLEN) -1 || retlen == 0)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1269 s += retlen;
1270 if (!checksum)
6e449a3a 1271 mPUSHu(val);
08ca2aa3
TH
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
d6d3e8bd 1274 else
08ca2aa3 1275 cuv += val;
fc241834 1276 }
08ca2aa3 1277 } else if (!checksum)
a6ec74c1 1278 while (len-- > 0) {
f7fe979e 1279 const U8 ch = *(U8 *) s++;
6e449a3a 1280 mPUSHu(ch);
a6ec74c1 1281 }
08ca2aa3
TH
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 else
1285 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1286 break;
1287 case 'U':
35bcd338 1288 if (len == 0) {
c5333953 1289 if (explicit_length && howlen != e_star) {
08ca2aa3 1290 /* Switch to "bytes in UTF-8" mode */
f337b084 1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1292 else
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1295 }
35bcd338
JH
1296 break;
1297 }
08ca2aa3 1298 if (len > strend - s) len = strend - s;
fc241834 1299 if (!checksum) {
08ca2aa3
TH
1300 if (len && unpack_only_one) len = 1;
1301 EXTEND(SP, len);
1302 EXTEND_MORTAL(len);
fc241834 1303 }
08ca2aa3
TH
1304 while (len-- > 0 && s < strend) {
1305 STRLEN retlen;
1306 UV auv;
1307 if (utf8) {
1308 U8 result[UTF8_MAXLEN];
f7fe979e 1309 const char *ptr = s;
08ca2aa3 1310 STRLEN len;
08ca2aa3
TH
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
230e1fce
NC
1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1314 'U'))
08ca2aa3
TH
1315 break;
1316 len = UTF8SKIP(result);
fc241834 1317 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1318 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1319 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1320 s = ptr;
1321 } else {
cde84f2a 1322 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1325 s += retlen;
1326 }
1327 if (!checksum)
6e449a3a 1328 mPUSHu(auv);
73cb7263 1329 else if (checksum > bits_in_uv)
08ca2aa3 1330 cdouble += (NV) auv;
73cb7263 1331 else
08ca2aa3 1332 cuv += auv;
a6ec74c1
JH
1333 }
1334 break;
49704364
WL
1335 case 's' | TYPE_IS_SHRIEKING:
1336#if SHORTSIZE != SIZE16
73cb7263 1337 while (len-- > 0) {
08ca2aa3 1338 short ashort;
f337b084
TH
1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1340 DO_BO_UNPACK(ashort, s);
08ca2aa3 1341 if (!checksum)
6e449a3a 1342 mPUSHi(ashort);
73cb7263
NC
1343 else if (checksum > bits_in_uv)
1344 cdouble += (NV)ashort;
1345 else
1346 cuv += ashort;
49704364
WL
1347 }
1348 break;
1349#else
1350 /* Fallthrough! */
a6ec74c1 1351#endif
49704364 1352 case 's':
73cb7263 1353 while (len-- > 0) {
08ca2aa3
TH
1354 I16 ai16;
1355
1356#if U16SIZE > SIZE16
1357 ai16 = 0;
1358#endif
f337b084 1359 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1360 DO_BO_UNPACK(ai16, 16);
1109a392 1361#if U16SIZE > SIZE16
73cb7263
NC
1362 if (ai16 > 32767)
1363 ai16 -= 65536;
a6ec74c1 1364#endif
08ca2aa3 1365 if (!checksum)
6e449a3a 1366 mPUSHi(ai16);
73cb7263
NC
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1369 else
1370 cuv += ai16;
a6ec74c1
JH
1371 }
1372 break;
49704364
WL
1373 case 'S' | TYPE_IS_SHRIEKING:
1374#if SHORTSIZE != SIZE16
73cb7263 1375 while (len-- > 0) {
08ca2aa3 1376 unsigned short aushort;
f337b084
TH
1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1378 DO_BO_UNPACK(aushort, s);
08ca2aa3 1379 if (!checksum)
6e449a3a 1380 mPUSHu(aushort);
73cb7263
NC
1381 else if (checksum > bits_in_uv)
1382 cdouble += (NV)aushort;
1383 else
1384 cuv += aushort;
49704364
WL
1385 }
1386 break;
1387#else
486ec47a 1388 /* Fallthrough! */
49704364 1389#endif
a6ec74c1
JH
1390 case 'v':
1391 case 'n':
1392 case 'S':
73cb7263 1393 while (len-- > 0) {
08ca2aa3
TH
1394 U16 au16;
1395#if U16SIZE > SIZE16
1396 au16 = 0;
1397#endif
f337b084 1398 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1399 DO_BO_UNPACK(au16, 16);
73cb7263
NC
1400 if (datumtype == 'n')
1401 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1402 if (datumtype == 'v')
1403 au16 = vtohs(au16);
08ca2aa3 1404 if (!checksum)
6e449a3a 1405 mPUSHu(au16);
73cb7263 1406 else if (checksum > bits_in_uv)
f337b084 1407 cdouble += (NV) au16;
73cb7263
NC
1408 else
1409 cuv += au16;
a6ec74c1
JH
1410 }
1411 break;
068bd2e7
MHM
1412 case 'v' | TYPE_IS_SHRIEKING:
1413 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1414 while (len-- > 0) {
08ca2aa3
TH
1415 I16 ai16;
1416# if U16SIZE > SIZE16
1417 ai16 = 0;
1418# endif
f337b084 1419 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1420 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1421 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1422 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1423 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1424 if (!checksum)
6e449a3a 1425 mPUSHi(ai16);
73cb7263 1426 else if (checksum > bits_in_uv)
08ca2aa3 1427 cdouble += (NV) ai16;
73cb7263
NC
1428 else
1429 cuv += ai16;
068bd2e7
MHM
1430 }
1431 break;
a6ec74c1 1432 case 'i':
49704364 1433 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1434 while (len-- > 0) {
08ca2aa3 1435 int aint;
f337b084
TH
1436 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1437 DO_BO_UNPACK(aint, i);
08ca2aa3 1438 if (!checksum)
6e449a3a 1439 mPUSHi(aint);
73cb7263
NC
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)aint;
1442 else
1443 cuv += aint;
a6ec74c1
JH
1444 }
1445 break;
1446 case 'I':
49704364 1447 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1448 while (len-- > 0) {
08ca2aa3 1449 unsigned int auint;
f337b084
TH
1450 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1451 DO_BO_UNPACK(auint, i);
08ca2aa3 1452 if (!checksum)
6e449a3a 1453 mPUSHu(auint);
73cb7263
NC
1454 else if (checksum > bits_in_uv)
1455 cdouble += (NV)auint;
1456 else
1457 cuv += auint;
a6ec74c1
JH
1458 }
1459 break;
92d41999 1460 case 'j':
73cb7263 1461 while (len-- > 0) {
08ca2aa3 1462 IV aiv;
f337b084 1463 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1464#if IVSIZE == INTSIZE
f337b084 1465 DO_BO_UNPACK(aiv, i);
1109a392 1466#elif IVSIZE == LONGSIZE
f337b084 1467 DO_BO_UNPACK(aiv, l);
1109a392 1468#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1469 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1470#else
1471 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1472#endif
08ca2aa3 1473 if (!checksum)
6e449a3a 1474 mPUSHi(aiv);
73cb7263
NC
1475 else if (checksum > bits_in_uv)
1476 cdouble += (NV)aiv;
1477 else
1478 cuv += aiv;
92d41999
JH
1479 }
1480 break;
1481 case 'J':
73cb7263 1482 while (len-- > 0) {
08ca2aa3 1483 UV auv;
f337b084 1484 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1485#if IVSIZE == INTSIZE
f337b084 1486 DO_BO_UNPACK(auv, i);
08ca2aa3 1487#elif IVSIZE == LONGSIZE
f337b084 1488 DO_BO_UNPACK(auv, l);
08ca2aa3 1489#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1490 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1491#else
1492 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1493#endif
08ca2aa3 1494 if (!checksum)
6e449a3a 1495 mPUSHu(auv);
73cb7263
NC
1496 else if (checksum > bits_in_uv)
1497 cdouble += (NV)auv;
1498 else
1499 cuv += auv;
92d41999
JH
1500 }
1501 break;
49704364
WL
1502 case 'l' | TYPE_IS_SHRIEKING:
1503#if LONGSIZE != SIZE32
73cb7263 1504 while (len-- > 0) {
08ca2aa3 1505 long along;
f337b084
TH
1506 SHIFT_VAR(utf8, s, strend, along, datumtype);
1507 DO_BO_UNPACK(along, l);
08ca2aa3 1508 if (!checksum)
6e449a3a 1509 mPUSHi(along);
73cb7263
NC
1510 else if (checksum > bits_in_uv)
1511 cdouble += (NV)along;
1512 else
1513 cuv += along;
49704364
WL
1514 }
1515 break;
1516#else
1517 /* Fallthrough! */
a6ec74c1 1518#endif
49704364 1519 case 'l':
73cb7263 1520 while (len-- > 0) {
08ca2aa3
TH
1521 I32 ai32;
1522#if U32SIZE > SIZE32
1523 ai32 = 0;
1524#endif
f337b084 1525 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1526 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1527#if U32SIZE > SIZE32
08ca2aa3 1528 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1529#endif
08ca2aa3 1530 if (!checksum)
6e449a3a 1531 mPUSHi(ai32);
73cb7263
NC
1532 else if (checksum > bits_in_uv)
1533 cdouble += (NV)ai32;
1534 else
1535 cuv += ai32;
a6ec74c1
JH
1536 }
1537 break;
49704364
WL
1538 case 'L' | TYPE_IS_SHRIEKING:
1539#if LONGSIZE != SIZE32
73cb7263 1540 while (len-- > 0) {
08ca2aa3 1541 unsigned long aulong;
f337b084
TH
1542 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1543 DO_BO_UNPACK(aulong, l);
08ca2aa3 1544 if (!checksum)
6e449a3a 1545 mPUSHu(aulong);
73cb7263
NC
1546 else if (checksum > bits_in_uv)
1547 cdouble += (NV)aulong;
1548 else
1549 cuv += aulong;
49704364
WL
1550 }
1551 break;
1552#else
1553 /* Fall through! */
1554#endif
a6ec74c1
JH
1555 case 'V':
1556 case 'N':
1557 case 'L':
73cb7263 1558 while (len-- > 0) {
08ca2aa3
TH
1559 U32 au32;
1560#if U32SIZE > SIZE32
1561 au32 = 0;
1562#endif
f337b084 1563 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1564 DO_BO_UNPACK(au32, 32);
73cb7263
NC
1565 if (datumtype == 'N')
1566 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1567 if (datumtype == 'V')
1568 au32 = vtohl(au32);
08ca2aa3 1569 if (!checksum)
6e449a3a 1570 mPUSHu(au32);
fc241834
RGS
1571 else if (checksum > bits_in_uv)
1572 cdouble += (NV)au32;
1573 else
1574 cuv += au32;
a6ec74c1
JH
1575 }
1576 break;
068bd2e7
MHM
1577 case 'V' | TYPE_IS_SHRIEKING:
1578 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1579 while (len-- > 0) {
08ca2aa3 1580 I32 ai32;
f8e5a5db 1581#if U32SIZE > SIZE32
08ca2aa3 1582 ai32 = 0;
f8e5a5db 1583#endif
f337b084 1584 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263
NC
1585 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1586 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1587 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1588 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1589 if (!checksum)
6e449a3a 1590 mPUSHi(ai32);
73cb7263
NC
1591 else if (checksum > bits_in_uv)
1592 cdouble += (NV)ai32;
1593 else
1594 cuv += ai32;
068bd2e7
MHM
1595 }
1596 break;
a6ec74c1 1597 case 'p':
a6ec74c1 1598 while (len-- > 0) {
f7fe979e 1599 const char *aptr;
f337b084 1600 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
7e38f754 1601 DO_BO_UNPACK(aptr, pointer);
c4c5f44a 1602 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1603 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1604 }
1605 break;
1606 case 'w':
a6ec74c1
JH
1607 {
1608 UV auv = 0;
1609 U32 bytes = 0;
fc241834 1610
08ca2aa3
TH
1611 while (len > 0 && s < strend) {
1612 U8 ch;
f337b084 1613 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1614 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1615 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1616 if (ch < 0x80) {
a6ec74c1 1617 bytes = 0;
6e449a3a 1618 mPUSHu(auv);
a6ec74c1
JH
1619 len--;
1620 auv = 0;
08ca2aa3 1621 continue;
a6ec74c1 1622 }
08ca2aa3 1623 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1624 const char *t;
a6ec74c1 1625
f5992bc4 1626 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1627 while (s < strend) {
f337b084 1628 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1629 sv = mul128(sv, (U8)(ch & 0x7f));
1630 if (!(ch & 0x80)) {
a6ec74c1
JH
1631 bytes = 0;
1632 break;
1633 }
1634 }
10516c54 1635 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1636 while (*t == '0')
1637 t++;
1638 sv_chop(sv, t);
6e449a3a 1639 mPUSHs(sv);
a6ec74c1
JH
1640 len--;
1641 auv = 0;
1642 }
1643 }
1644 if ((s >= strend) && bytes)
49704364 1645 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1646 }
1647 break;
1648 case 'P':
49704364
WL
1649 if (symptr->howlen == e_star)
1650 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1651 EXTEND(SP, 1);
2d3e0934 1652 if (s + sizeof(char*) <= strend) {
08ca2aa3 1653 char *aptr;
f337b084 1654 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
7e38f754 1655 DO_BO_UNPACK(aptr, pointer);
fc241834 1656 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1657 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1658 }
a6ec74c1
JH
1659 break;
1660#ifdef HAS_QUAD
1661 case 'q':
73cb7263 1662 while (len-- > 0) {
08ca2aa3 1663 Quad_t aquad;
f337b084
TH
1664 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1665 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1666 if (!checksum)
6e449a3a
MHM
1667 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1668 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1669 else if (checksum > bits_in_uv)
1670 cdouble += (NV)aquad;
1671 else
1672 cuv += aquad;
1673 }
a6ec74c1
JH
1674 break;
1675 case 'Q':
73cb7263 1676 while (len-- > 0) {
08ca2aa3 1677 Uquad_t auquad;
f337b084
TH
1678 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1679 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1680 if (!checksum)
6e449a3a
MHM
1681 mPUSHs(auquad <= UV_MAX ?
1682 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1683 else if (checksum > bits_in_uv)
1684 cdouble += (NV)auquad;
1685 else
1686 cuv += auquad;
a6ec74c1
JH
1687 }
1688 break;
08ca2aa3 1689#endif /* HAS_QUAD */
a6ec74c1
JH
1690 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1691 case 'f':
73cb7263 1692 while (len-- > 0) {
08ca2aa3 1693 float afloat;
f337b084 1694 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
7e38f754 1695 DO_BO_UNPACK(afloat, float);
08ca2aa3 1696 if (!checksum)
6e449a3a 1697 mPUSHn(afloat);
08ca2aa3 1698 else
73cb7263 1699 cdouble += afloat;
fc241834 1700 }
a6ec74c1
JH
1701 break;
1702 case 'd':
73cb7263 1703 while (len-- > 0) {
08ca2aa3 1704 double adouble;
f337b084 1705 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
7e38f754 1706 DO_BO_UNPACK(adouble, double);
08ca2aa3 1707 if (!checksum)
6e449a3a 1708 mPUSHn(adouble);
08ca2aa3 1709 else
73cb7263 1710 cdouble += adouble;
fc241834 1711 }
a6ec74c1 1712 break;
92d41999 1713 case 'F':
73cb7263 1714 while (len-- > 0) {
275663fa
TC
1715 NV_bytes anv;
1716 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
7e38f754 1717 DO_BO_UNPACK(anv.nv, NV);
08ca2aa3 1718 if (!checksum)
275663fa 1719 mPUSHn(anv.nv);
08ca2aa3 1720 else
275663fa 1721 cdouble += anv.nv;
fc241834 1722 }
92d41999
JH
1723 break;
1724#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1725 case 'D':
73cb7263 1726 while (len-- > 0) {
275663fa
TC
1727 ld_bytes aldouble;
1728 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
7e38f754 1729 DO_BO_UNPACK(aldouble.ld, long double);
08ca2aa3 1730 if (!checksum)
275663fa 1731 mPUSHn(aldouble.ld);
08ca2aa3 1732 else
275663fa 1733 cdouble += aldouble.ld;
92d41999
JH
1734 }
1735 break;
1736#endif
a6ec74c1 1737 case 'u':
858fe5e1 1738 if (!checksum) {
f7fe979e 1739 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1740 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1741 if (l) SvPOK_on(sv);
1742 }
1743 if (utf8) {
1744 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1745 I32 a, b, c, d;
db187877 1746 char hunk[3];
08ca2aa3 1747
08ca2aa3
TH
1748 while (len > 0) {
1749 next_uni_uu(aTHX_ &s, strend, &a);
1750 next_uni_uu(aTHX_ &s, strend, &b);
1751 next_uni_uu(aTHX_ &s, strend, &c);
1752 next_uni_uu(aTHX_ &s, strend, &d);
1753 hunk[0] = (char)((a << 2) | (b >> 4));
1754 hunk[1] = (char)((b << 4) | (c >> 2));
1755 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1756 if (!checksum)
1757 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1758 len -= 3;
1759 }
1760 if (s < strend) {
f7fe979e
AL
1761 if (*s == '\n') {
1762 s++;
1763 }
08ca2aa3
TH
1764 else {
1765 /* possible checksum byte */
f7fe979e
AL
1766 const char *skip = s+UTF8SKIP(s);
1767 if (skip < strend && *skip == '\n')
1768 s = skip+1;
08ca2aa3
TH
1769 }
1770 }
1771 }
1772 } else {
fc241834
RGS
1773 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1774 I32 a, b, c, d;
db187877 1775 char hunk[3];
a6ec74c1 1776
fc241834
RGS
1777 len = PL_uudmap[*(U8*)s++] & 077;
1778 while (len > 0) {
1779 if (s < strend && ISUUCHAR(*s))
1780 a = PL_uudmap[*(U8*)s++] & 077;
1781 else
1782 a = 0;
1783 if (s < strend && ISUUCHAR(*s))
1784 b = PL_uudmap[*(U8*)s++] & 077;
1785 else
1786 b = 0;
1787 if (s < strend && ISUUCHAR(*s))
1788 c = PL_uudmap[*(U8*)s++] & 077;
1789 else
1790 c = 0;
1791 if (s < strend && ISUUCHAR(*s))
1792 d = PL_uudmap[*(U8*)s++] & 077;
1793 else
1794 d = 0;
1795 hunk[0] = (char)((a << 2) | (b >> 4));
1796 hunk[1] = (char)((b << 4) | (c >> 2));
1797 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1798 if (!checksum)
1799 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1800 len -= 3;
1801 }
1802 if (*s == '\n')
1803 s++;
1804 else /* possible checksum byte */
1805 if (s + 1 < strend && s[1] == '\n')
1806 s += 2;
a6ec74c1 1807 }
08ca2aa3 1808 }
858fe5e1
TC
1809 if (!checksum)
1810 XPUSHs(sv);
a6ec74c1
JH
1811 break;
1812 }
49704364 1813
a6ec74c1 1814 if (checksum) {
1109a392 1815 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1816 (checksum > bits_in_uv &&
08ca2aa3
TH
1817 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1818 NV trouble, anv;
a6ec74c1 1819
08ca2aa3 1820 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1821 while (checksum >= 16) {
1822 checksum -= 16;
08ca2aa3 1823 anv *= 65536.0;
a6ec74c1 1824 }
a6ec74c1 1825 while (cdouble < 0.0)
08ca2aa3
TH
1826 cdouble += anv;
1827 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1828 sv = newSVnv(cdouble);
a6ec74c1
JH
1829 }
1830 else {
fa8ec7c1
NC
1831 if (checksum < bits_in_uv) {
1832 UV mask = ((UV)1 << checksum) - 1;
92d41999 1833 cuv &= mask;
a6ec74c1 1834 }
c4c5f44a 1835 sv = newSVuv(cuv);
a6ec74c1 1836 }
6e449a3a 1837 mXPUSHs(sv);
a6ec74c1
JH
1838 checksum = 0;
1839 }
fc241834 1840
49704364
WL
1841 if (symptr->flags & FLAG_SLASH){
1842 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1843 break;
49704364
WL
1844 if( next_symbol(symptr) ){
1845 if( symptr->howlen == e_number )
1846 Perl_croak(aTHX_ "Count after length/code in unpack" );
1847 if( beyond ){
1848 /* ...end of char buffer then no decent length available */
1849 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1850 } else {
1851 /* take top of stack (hope it's numeric) */
1852 len = POPi;
1853 if( len < 0 )
1854 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1855 }
1856 } else {
1857 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1858 }
1859 datumtype = symptr->code;
21c16052 1860 explicit_length = FALSE;
49704364
WL
1861 goto redo_switch;
1862 }
a6ec74c1 1863 }
49704364 1864
18529408
IZ
1865 if (new_s)
1866 *new_s = s;
1867 PUTBACK;
1868 return SP - PL_stack_base - start_sp_offset;
1869}
1870
1871PP(pp_unpack)
1872{
97aff369 1873 dVAR;
18529408 1874 dSP;
bab9c0ac 1875 dPOPPOPssrl;
18529408
IZ
1876 I32 gimme = GIMME_V;
1877 STRLEN llen;
1878 STRLEN rlen;
5c144d81
NC
1879 const char *pat = SvPV_const(left, llen);
1880 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1881 const char *strend = s + rlen;
1882 const char *patend = pat + llen;
08ca2aa3 1883 I32 cnt;
18529408
IZ
1884
1885 PUTBACK;
7accc089 1886 cnt = unpackstring(pat, patend, s, strend,
49704364 1887 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1888 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1889
18529408
IZ
1890 SPAGAIN;
1891 if ( !cnt && gimme == G_SCALAR )
1892 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1893 RETURN;
1894}
1895
f337b084 1896STATIC U8 *
f7fe979e 1897doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1898{
f337b084 1899 *h++ = PL_uuemap[len];
a6ec74c1 1900 while (len > 2) {
f337b084
TH
1901 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1902 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1903 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1904 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1905 s += 3;
1906 len -= 3;
1907 }
1908 if (len > 0) {
f7fe979e 1909 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1910 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1911 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1912 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1913 *h++ = PL_uuemap[0];
a6ec74c1 1914 }
f337b084
TH
1915 *h++ = '\n';
1916 return h;
a6ec74c1
JH
1917}
1918
1919STATIC SV *
f7fe979e 1920S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1921{
8b6e33c7
AL
1922 SV *result = newSVpvn(s, l);
1923 char *const result_c = SvPV_nolen(result); /* convenience */
1924 char *out = result_c;
1925 bool skip = 1;
1926 bool ignore = 0;
a6ec74c1 1927
7918f24d
NC
1928 PERL_ARGS_ASSERT_IS_AN_INT;
1929
a6ec74c1
JH
1930 while (*s) {
1931 switch (*s) {
1932 case ' ':
1933 break;
1934 case '+':
1935 if (!skip) {
1936 SvREFCNT_dec(result);
1937 return (NULL);
1938 }
1939 break;
1940 case '0':
1941 case '1':
1942 case '2':
1943 case '3':
1944 case '4':
1945 case '5':
1946 case '6':
1947 case '7':
1948 case '8':
1949 case '9':
1950 skip = 0;
1951 if (!ignore) {
1952 *(out++) = *s;
1953 }
1954 break;
1955 case '.':
1956 ignore = 1;
1957 break;
1958 default:
1959 SvREFCNT_dec(result);
1960 return (NULL);
1961 }
1962 s++;
1963 }
1964 *(out++) = '\0';
1965 SvCUR_set(result, out - result_c);
1966 return (result);
1967}
1968
1969/* pnum must be '\0' terminated */
1970STATIC int
1971S_div128(pTHX_ SV *pnum, bool *done)
1972{
8b6e33c7
AL
1973 STRLEN len;
1974 char * const s = SvPV(pnum, len);
1975 char *t = s;
1976 int m = 0;
1977
7918f24d
NC
1978 PERL_ARGS_ASSERT_DIV128;
1979
8b6e33c7
AL
1980 *done = 1;
1981 while (*t) {
1982 const int i = m * 10 + (*t - '0');
1983 const int r = (i >> 7); /* r < 10 */
1984 m = i & 0x7F;
1985 if (r) {
1986 *done = 0;
1987 }
1988 *(t++) = '0' + r;
a6ec74c1 1989 }
8b6e33c7
AL
1990 *(t++) = '\0';
1991 SvCUR_set(pnum, (STRLEN) (t - s));
1992 return (m);
a6ec74c1
JH
1993}
1994
18529408 1995/*
7accc089
JH
1996=for apidoc packlist
1997
1998The engine implementing pack() Perl function.
1999
bfce84ec
AL
2000=cut
2001*/
7accc089
JH
2002
2003void
5aaab254 2004Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2005{
97aff369 2006 dVAR;
aadb217d
JH
2007 tempsym_t sym;
2008
7918f24d
NC
2009 PERL_ARGS_ASSERT_PACKLIST;
2010
f7fe979e 2011 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2012
f337b084
TH
2013 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2014 Also make sure any UTF8 flag is loaded */
56eb0262 2015 SvPV_force_nolen(cat);
bfce84ec
AL
2016 if (DO_UTF8(cat))
2017 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2018
49704364
WL
2019 (void)pack_rec( cat, &sym, beglist, endlist );
2020}
2021
f337b084
TH
2022/* like sv_utf8_upgrade, but also repoint the group start markers */
2023STATIC void
2024marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2025 STRLEN len;
2026 tempsym_t *group;
f7fe979e
AL
2027 const char *from_ptr, *from_start, *from_end, **marks, **m;
2028 char *to_start, *to_ptr;
f337b084
TH
2029
2030 if (SvUTF8(sv)) return;
2031
aa07b2f6 2032 from_start = SvPVX_const(sv);
f337b084
TH
2033 from_end = from_start + SvCUR(sv);
2034 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2035 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2036 if (from_ptr == from_end) {
2037 /* Simple case: no character needs to be changed */
2038 SvUTF8_on(sv);
2039 return;
2040 }
2041
3473cf63 2042 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2043 Newx(to_start, len, char);
f337b084
TH
2044 Copy(from_start, to_start, from_ptr-from_start, char);
2045 to_ptr = to_start + (from_ptr-from_start);
2046
a02a5408 2047 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2048 for (group=sym_ptr; group; group = group->previous)
2049 marks[group->level] = from_start + group->strbeg;
2050 marks[sym_ptr->level+1] = from_end+1;
2051 for (m = marks; *m < from_ptr; m++)
2052 *m = to_start + (*m-from_start);
2053
2054 for (;from_ptr < from_end; from_ptr++) {
2055 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2056 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2057 }
2058 *to_ptr = 0;
2059
2060 while (*m == from_ptr) *m++ = to_ptr;
2061 if (m != marks + sym_ptr->level+1) {
2062 Safefree(marks);
2063 Safefree(to_start);
5637ef5b
NC
2064 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2065 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2066 }
2067 for (group=sym_ptr; group; group = group->previous)
2068 group->strbeg = marks[group->level] - to_start;
2069 Safefree(marks);
2070
2071 if (SvOOK(sv)) {
2072 if (SvIVX(sv)) {
b162af07 2073 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2074 from_start -= SvIVX(sv);
2075 SvIV_set(sv, 0);
2076 }
2077 SvFLAGS(sv) &= ~SVf_OOK;
2078 }
2079 if (SvLEN(sv) != 0)
2080 Safefree(from_start);
f880fe2f 2081 SvPV_set(sv, to_start);
b162af07
SP
2082 SvCUR_set(sv, to_ptr - to_start);
2083 SvLEN_set(sv, len);
f337b084
TH
2084 SvUTF8_on(sv);
2085}
2086
2087/* Exponential string grower. Makes string extension effectively O(n)
2088 needed says how many extra bytes we need (not counting the final '\0')
2089 Only grows the string if there is an actual lack of space
2090*/
2091STATIC char *
0bd48802 2092S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2093 const STRLEN cur = SvCUR(sv);
2094 const STRLEN len = SvLEN(sv);
f337b084 2095 STRLEN extend;
7918f24d
NC
2096
2097 PERL_ARGS_ASSERT_SV_EXP_GROW;
2098
f337b084
TH
2099 if (len - cur > needed) return SvPVX(sv);
2100 extend = needed > len ? needed : len;
2101 return SvGROW(sv, len+extend+1);
2102}
49704364
WL
2103
2104STATIC
2105SV **
f337b084 2106S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2107{
97aff369 2108 dVAR;
49704364 2109 tempsym_t lookahead;
f337b084
TH
2110 I32 items = endlist - beglist;
2111 bool found = next_symbol(symptr);
2112 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2113 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2114
7918f24d
NC
2115 PERL_ARGS_ASSERT_PACK_REC;
2116
f337b084
TH
2117 if (symptr->level == 0 && found && symptr->code == 'U') {
2118 marked_upgrade(aTHX_ cat, symptr);
2119 symptr->flags |= FLAG_DO_UTF8;
2120 utf8 = 0;
49704364 2121 }
f337b084 2122 symptr->strbeg = SvCUR(cat);
49704364
WL
2123
2124 while (found) {
f337b084
TH
2125 SV *fromstr;
2126 STRLEN fromlen;
2127 I32 len;
a0714e2c 2128 SV *lengthcode = NULL;
49704364 2129 I32 datumtype = symptr->code;
f337b084
TH
2130 howlen_t howlen = symptr->howlen;
2131 char *start = SvPVX(cat);
2132 char *cur = start + SvCUR(cat);
a1219b5e 2133 bool needs_swap;
49704364 2134
f337b084
TH
2135#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2136
2137 switch (howlen) {
fc241834 2138 case e_star:
f337b084
TH
2139 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2140 0 : items;
2141 break;
2142 default:
2143 /* e_no_len and e_number */
2144 len = symptr->length;
49704364
WL
2145 break;
2146 }
2147
f337b084 2148 if (len) {
a7a3cfaa 2149 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2150
a7a3cfaa
TH
2151 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2152 /* We can process this letter. */
2153 STRLEN size = props & PACK_SIZE_MASK;
2154 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2155 }
f337b084
TH
2156 }
2157
49704364
WL
2158 /* Look ahead for next symbol. Do we have code/code? */
2159 lookahead = *symptr;
2160 found = next_symbol(&lookahead);
246f24af
TH
2161 if (symptr->flags & FLAG_SLASH) {
2162 IV count;
f337b084 2163 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2164 if (strchr("aAZ", lookahead.code)) {
2165 if (lookahead.howlen == e_number) count = lookahead.length;
2166 else {
ce399ba6 2167 if (items > 0) {
48a5da33 2168 count = sv_len_utf8(*beglist);
ce399ba6 2169 }
246f24af
TH
2170 else count = 0;
2171 if (lookahead.code == 'Z') count++;
2172 }
2173 } else {
2174 if (lookahead.howlen == e_number && lookahead.length < items)
2175 count = lookahead.length;
2176 else count = items;
2177 }
2178 lookahead.howlen = e_number;
2179 lookahead.length = count;
2180 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2181 }
49704364 2182
a1219b5e
NC
2183 needs_swap = NEEDS_SWAP(datumtype);
2184
fc241834
RGS
2185 /* Code inside the switch must take care to properly update
2186 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2187 doesn't simply leave using break */
1109a392 2188 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2189 default:
f337b084
TH
2190 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2191 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2192 case '%':
49704364 2193 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2194 {
2195 char *from;
28be1210 2196 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2197 case '.':
2198 if (howlen == e_star) from = start;
2199 else if (len == 0) from = cur;
2200 else {
2201 tempsym_t *group = symptr;
2202
2203 while (--len && group) group = group->previous;
2204 from = group ? start + group->strbeg : start;
2205 }
2206 fromstr = NEXTFROM;
2207 len = SvIV(fromstr);
2208 goto resize;
28be1210 2209 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2210 case '@':
28be1210
TH
2211 from = start + symptr->strbeg;
2212 resize:
28be1210 2213 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2214 if (len >= 0) {
2215 while (len && from < cur) {
2216 from += UTF8SKIP(from);
2217 len--;
2218 }
2219 if (from > cur)
2220 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2221 if (len) {
2222 /* Here we know from == cur */
2223 grow:
2224 GROWING(0, cat, start, cur, len);
2225 Zero(cur, len, char);
2226 cur += len;
2227 } else if (from < cur) {
2228 len = cur - from;
2229 goto shrink;
2230 } else goto no_change;
2231 } else {
2232 cur = from;
2233 len = -len;
2234 goto utf8_shrink;
f337b084 2235 }
28be1210
TH
2236 else {
2237 len -= cur - from;
f337b084 2238 if (len > 0) goto grow;
28be1210 2239 if (len == 0) goto no_change;
fc241834 2240 len = -len;
28be1210 2241 goto shrink;
f337b084 2242 }
a6ec74c1 2243 break;
28be1210 2244 }
fc241834 2245 case '(': {
49704364 2246 tempsym_t savsym = *symptr;
66c611c5
MHM
2247 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2248 symptr->flags |= group_modifiers;
49704364
WL
2249 symptr->patend = savsym.grpend;
2250 symptr->level++;
f337b084 2251 symptr->previous = &lookahead;
18529408 2252 while (len--) {
f337b084
TH
2253 U32 was_utf8;
2254 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2255 else symptr->flags &= ~FLAG_PARSE_UTF8;
2256 was_utf8 = SvUTF8(cat);
49704364 2257 symptr->patptr = savsym.grpbeg;
f337b084
TH
2258 beglist = pack_rec(cat, symptr, beglist, endlist);
2259 if (SvUTF8(cat) != was_utf8)
2260 /* This had better be an upgrade while in utf8==0 mode */
2261 utf8 = 1;
2262
49704364 2263 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2264 break; /* No way to continue */
2265 }
ee790063 2266 items = endlist - beglist;
f337b084
TH
2267 lookahead.flags = symptr->flags & ~group_modifiers;
2268 goto no_change;
18529408 2269 }
62f95557
IZ
2270 case 'X' | TYPE_IS_SHRIEKING:
2271 if (!len) /* Avoid division by 0 */
2272 len = 1;
f337b084
TH
2273 if (utf8) {
2274 char *hop, *last;
2275 I32 l = len;
2276 hop = last = start;
2277 while (hop < cur) {
2278 hop += UTF8SKIP(hop);
2279 if (--l == 0) {
2280 last = hop;
2281 l = len;
2282 }
2283 }
2284 if (last > cur)
2285 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2286 cur = last;
2287 break;
2288 }
2289 len = (cur-start) % len;
62f95557 2290 /* FALL THROUGH */
a6ec74c1 2291 case 'X':
f337b084
TH
2292 if (utf8) {
2293 if (len < 1) goto no_change;
28be1210 2294 utf8_shrink:
f337b084
TH
2295 while (len > 0) {
2296 if (cur <= start)
28be1210
TH
2297 Perl_croak(aTHX_ "'%c' outside of string in pack",
2298 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2299 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2300 if (cur <= start)
28be1210
TH
2301 Perl_croak(aTHX_ "'%c' outside of string in pack",
2302 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2303 }
2304 len--;
2305 }
2306 } else {
fc241834 2307 shrink:
f337b084 2308 if (cur - start < len)
28be1210
TH
2309 Perl_croak(aTHX_ "'%c' outside of string in pack",
2310 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2311 cur -= len;
2312 }
2313 if (cur < start+symptr->strbeg) {
2314 /* Make sure group starts don't point into the void */
2315 tempsym_t *group;
9e27e96a 2316 const STRLEN length = cur-start;
f337b084
TH
2317 for (group = symptr;
2318 group && length < group->strbeg;
2319 group = group->previous) group->strbeg = length;
2320 lookahead.strbeg = length;
2321 }
a6ec74c1 2322 break;
fc241834
RGS
2323 case 'x' | TYPE_IS_SHRIEKING: {
2324 I32 ai32;
62f95557
IZ
2325 if (!len) /* Avoid division by 0 */
2326 len = 1;
230e1fce 2327 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2328 else ai32 = (cur - start) % len;
2329 if (ai32 == 0) goto no_change;
2330 len -= ai32;
2331 }
2332 /* FALL THROUGH */
a6ec74c1 2333 case 'x':
f337b084 2334 goto grow;
a6ec74c1
JH
2335 case 'A':
2336 case 'Z':
f337b084 2337 case 'a': {
f7fe979e 2338 const char *aptr;
f337b084 2339
a6ec74c1 2340 fromstr = NEXTFROM;
e62f0680 2341 aptr = SvPV_const(fromstr, fromlen);
f337b084 2342 if (DO_UTF8(fromstr)) {
f7fe979e 2343 const char *end, *s;
f337b084
TH
2344
2345 if (!utf8 && !SvUTF8(cat)) {
2346 marked_upgrade(aTHX_ cat, symptr);
2347 lookahead.flags |= FLAG_DO_UTF8;
2348 lookahead.strbeg = symptr->strbeg;
2349 utf8 = 1;
2350 start = SvPVX(cat);
2351 cur = start + SvCUR(cat);
2352 }
fc241834 2353 if (howlen == e_star) {
f337b084
TH
2354 if (utf8) goto string_copy;
2355 len = fromlen+1;
2356 }
2357 s = aptr;
2358 end = aptr + fromlen;
2359 fromlen = datumtype == 'Z' ? len-1 : len;
2360 while ((I32) fromlen > 0 && s < end) {
2361 s += UTF8SKIP(s);
2362 fromlen--;
2363 }
2364 if (s > end)
2365 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2366 if (utf8) {
fc241834 2367 len = fromlen;
f337b084
TH
2368 if (datumtype == 'Z') len++;
2369 fromlen = s-aptr;
2370 len += fromlen;
fc241834 2371
f337b084 2372 goto string_copy;
fc241834 2373 }
f337b084
TH
2374 fromlen = len - fromlen;
2375 if (datumtype == 'Z') fromlen--;
2376 if (howlen == e_star) {
2377 len = fromlen;
2378 if (datumtype == 'Z') len++;
fc241834 2379 }
f337b084 2380 GROWING(0, cat, start, cur, len);
fc241834 2381 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2382 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2383 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2384 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2385 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2386 cur += fromlen;
a6ec74c1 2387 len -= fromlen;
f337b084
TH
2388 } else if (utf8) {
2389 if (howlen == e_star) {
2390 len = fromlen;
2391 if (datumtype == 'Z') len++;
a6ec74c1 2392 }
f337b084
TH
2393 if (len <= (I32) fromlen) {
2394 fromlen = len;
2395 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2396 }
fc241834 2397 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2398 upgrade, so:
2399 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2400 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2401 len -= fromlen;
2402 while (fromlen > 0) {
230e1fce 2403 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2404 aptr++;
2405 fromlen--;
fc241834 2406 }
f337b084
TH
2407 } else {
2408 string_copy:
2409 if (howlen == e_star) {
2410 len = fromlen;
2411 if (datumtype == 'Z') len++;
2412 }
2413 if (len <= (I32) fromlen) {
2414 fromlen = len;
2415 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2416 }
f337b084
TH
2417 GROWING(0, cat, start, cur, len);
2418 Copy(aptr, cur, fromlen, char);
2419 cur += fromlen;
2420 len -= fromlen;
a6ec74c1 2421 }
f337b084
TH
2422 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2423 cur += len;
3c4fb04a 2424 SvTAINT(cat);
a6ec74c1 2425 break;
f337b084 2426 }
a6ec74c1 2427 case 'B':
f337b084 2428 case 'b': {
b83604b4 2429 const char *str, *end;
f337b084
TH
2430 I32 l, field_len;
2431 U8 bits;
2432 bool utf8_source;
2433 U32 utf8_flags;
a6ec74c1 2434
fc241834 2435 fromstr = NEXTFROM;
b83604b4 2436 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2437 end = str + fromlen;
2438 if (DO_UTF8(fromstr)) {
2439 utf8_source = TRUE;
041457d9 2440 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2441 } else {
2442 utf8_source = FALSE;
2443 utf8_flags = 0; /* Unused, but keep compilers happy */
2444 }
2445 if (howlen == e_star) len = fromlen;
2446 field_len = (len+7)/8;
2447 GROWING(utf8, cat, start, cur, field_len);
2448 if (len > (I32)fromlen) len = fromlen;
2449 bits = 0;
2450 l = 0;
2451 if (datumtype == 'B')
2452 while (l++ < len) {
2453 if (utf8_source) {
95b63a38 2454 UV val = 0;
f337b084
TH
2455 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2456 bits |= val & 1;
2457 } else bits |= *str++ & 1;
2458 if (l & 7) bits <<= 1;
fc241834 2459 else {
f337b084
TH
2460 PUSH_BYTE(utf8, cur, bits);
2461 bits = 0;
a6ec74c1
JH
2462 }
2463 }
f337b084
TH
2464 else
2465 /* datumtype == 'b' */
2466 while (l++ < len) {
2467 if (utf8_source) {
95b63a38 2468 UV val = 0;
f337b084
TH
2469 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2470 if (val & 1) bits |= 0x80;
2471 } else if (*str++ & 1)
2472 bits |= 0x80;
2473 if (l & 7) bits >>= 1;
fc241834 2474 else {
f337b084
TH
2475 PUSH_BYTE(utf8, cur, bits);
2476 bits = 0;
a6ec74c1
JH
2477 }
2478 }
f337b084
TH
2479 l--;
2480 if (l & 7) {
fc241834 2481 if (datumtype == 'B')
f337b084 2482 bits <<= 7 - (l & 7);
fc241834 2483 else
f337b084
TH
2484 bits >>= 7 - (l & 7);
2485 PUSH_BYTE(utf8, cur, bits);
2486 l += 7;
a6ec74c1 2487 }
f337b084
TH
2488 /* Determine how many chars are left in the requested field */
2489 l /= 8;
2490 if (howlen == e_star) field_len = 0;
2491 else field_len -= l;
2492 Zero(cur, field_len, char);
2493 cur += field_len;
a6ec74c1 2494 break;
f337b084 2495 }
a6ec74c1 2496 case 'H':
f337b084 2497 case 'h': {
10516c54 2498 const char *str, *end;
f337b084
TH
2499 I32 l, field_len;
2500 U8 bits;
2501 bool utf8_source;
2502 U32 utf8_flags;
a6ec74c1 2503
fc241834 2504 fromstr = NEXTFROM;
10516c54 2505 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2506 end = str + fromlen;
2507 if (DO_UTF8(fromstr)) {
2508 utf8_source = TRUE;
041457d9 2509 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2510 } else {
2511 utf8_source = FALSE;
2512 utf8_flags = 0; /* Unused, but keep compilers happy */
2513 }
2514 if (howlen == e_star) len = fromlen;
2515 field_len = (len+1)/2;
2516 GROWING(utf8, cat, start, cur, field_len);
2517 if (!utf8 && len > (I32)fromlen) len = fromlen;
2518 bits = 0;
2519 l = 0;
2520 if (datumtype == 'H')
2521 while (l++ < len) {
2522 if (utf8_source) {
95b63a38 2523 UV val = 0;
f337b084
TH
2524 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2525 if (val < 256 && isALPHA(val))
2526 bits |= (val + 9) & 0xf;
a6ec74c1 2527 else
f337b084
TH
2528 bits |= val & 0xf;
2529 } else if (isALPHA(*str))
2530 bits |= (*str++ + 9) & 0xf;
2531 else
2532 bits |= *str++ & 0xf;
2533 if (l & 1) bits <<= 4;
fc241834 2534 else {
f337b084
TH
2535 PUSH_BYTE(utf8, cur, bits);
2536 bits = 0;
a6ec74c1
JH
2537 }
2538 }
f337b084
TH
2539 else
2540 while (l++ < len) {
2541 if (utf8_source) {
95b63a38 2542 UV val = 0;
f337b084
TH
2543 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2544 if (val < 256 && isALPHA(val))
2545 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2546 else
f337b084
TH
2547 bits |= (val & 0xf) << 4;
2548 } else if (isALPHA(*str))
2549 bits |= ((*str++ + 9) & 0xf) << 4;
2550 else
2551 bits |= (*str++ & 0xf) << 4;
2552 if (l & 1) bits >>= 4;
fc241834 2553 else {
f337b084
TH
2554 PUSH_BYTE(utf8, cur, bits);
2555 bits = 0;
a6ec74c1 2556 }
fc241834 2557 }
f337b084
TH
2558 l--;
2559 if (l & 1) {
2560 PUSH_BYTE(utf8, cur, bits);
2561 l++;
2562 }
2563 /* Determine how many chars are left in the requested field */
2564 l /= 2;
2565 if (howlen == e_star) field_len = 0;
2566 else field_len -= l;
2567 Zero(cur, field_len, char);
2568 cur += field_len;
2569 break;
fc241834
RGS
2570 }
2571 case 'c':
f337b084
TH
2572 while (len-- > 0) {
2573 IV aiv;
2574 fromstr = NEXTFROM;
2575 aiv = SvIV(fromstr);
a2a5de95
NC
2576 if ((-128 > aiv || aiv > 127))
2577 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2578 "Character in 'c' format wrapped in pack");
585ec06d 2579 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2580 }
2581 break;
2582 case 'C':
f337b084
TH
2583 if (len == 0) {
2584 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2585 break;
2586 }
a6ec74c1 2587 while (len-- > 0) {
f337b084 2588 IV aiv;
a6ec74c1 2589 fromstr = NEXTFROM;
f337b084 2590 aiv = SvIV(fromstr);
a2a5de95
NC
2591 if ((0 > aiv || aiv > 0xff))
2592 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2593 "Character in 'C' format wrapped in pack");
1651fc44 2594 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2595 }
fc241834
RGS
2596 break;
2597 case 'W': {
2598 char *end;
670f1322 2599 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2600
2601 end = start+SvLEN(cat)-1;
2602 if (utf8) end -= UTF8_MAXLEN-1;
2603 while (len-- > 0) {
2604 UV auv;
2605 fromstr = NEXTFROM;
2606 auv = SvUV(fromstr);
2607 if (in_bytes) auv = auv % 0x100;
2608 if (utf8) {
2609 W_utf8:
2610 if (cur > end) {
2611 *cur = '\0';
b162af07 2612 SvCUR_set(cat, cur - start);
fc241834
RGS
2613
2614 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2615 end = start+SvLEN(cat)-UTF8_MAXLEN;
2616 }
230e1fce
NC
2617 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2618 NATIVE_TO_UNI(auv),
041457d9 2619 warn_utf8 ?
230e1fce 2620 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2621 } else {
2622 if (auv >= 0x100) {
2623 if (!SvUTF8(cat)) {
2624 *cur = '\0';
b162af07 2625 SvCUR_set(cat, cur - start);
fc241834
RGS
2626 marked_upgrade(aTHX_ cat, symptr);
2627 lookahead.flags |= FLAG_DO_UTF8;
2628 lookahead.strbeg = symptr->strbeg;
2629 utf8 = 1;
2630 start = SvPVX(cat);
2631 cur = start + SvCUR(cat);
2632 end = start+SvLEN(cat)-UTF8_MAXLEN;
2633 goto W_utf8;
2634 }
a2a5de95
NC
2635 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2636 "Character in 'W' format wrapped in pack");
fc241834
RGS
2637 auv &= 0xff;
2638 }
2639 if (cur >= end) {
2640 *cur = '\0';
b162af07 2641 SvCUR_set(cat, cur - start);
fc241834
RGS
2642 GROWING(0, cat, start, cur, len+1);
2643 end = start+SvLEN(cat)-1;
2644 }
fe2774ed 2645 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2646 }
2647 }
2648 break;
fc241834
RGS
2649 }
2650 case 'U': {
2651 char *end;
2652
2653 if (len == 0) {
2654 if (!(symptr->flags & FLAG_DO_UTF8)) {
2655 marked_upgrade(aTHX_ cat, symptr);
2656 lookahead.flags |= FLAG_DO_UTF8;
2657 lookahead.strbeg = symptr->strbeg;
2658 }
2659 utf8 = 0;
2660 goto no_change;
2661 }
2662
2663 end = start+SvLEN(cat);
2664 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2665 while (len-- > 0) {
fc241834 2666 UV auv;
a6ec74c1 2667 fromstr = NEXTFROM;
fc241834
RGS
2668 auv = SvUV(fromstr);
2669 if (utf8) {
230e1fce 2670 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2671 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2672 warn_utf8 ?
fc241834
RGS
2673 0 : UNICODE_ALLOW_ANY);
2674 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2675 *cur = '\0';
b162af07 2676 SvCUR_set(cat, cur - start);
fc241834
RGS
2677 GROWING(0, cat, start, cur,
2678 len+(endb-buffer)*UTF8_EXPAND);
2679 end = start+SvLEN(cat);
2680 }
64844641 2681 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2682 } else {
2683 if (cur >= end) {
2684 *cur = '\0';
b162af07 2685 SvCUR_set(cat, cur - start);
fc241834
RGS
2686 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2687 end = start+SvLEN(cat)-UTF8_MAXLEN;
2688 }
230e1fce 2689 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2690 warn_utf8 ?
230e1fce 2691 0 : UNICODE_ALLOW_ANY);
fc241834 2692 }
a6ec74c1 2693 }
a6ec74c1 2694 break;
fc241834 2695 }
a6ec74c1
JH
2696 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2697 case 'f':
a6ec74c1 2698 while (len-- > 0) {
f337b084
TH
2699 float afloat;
2700 NV anv;
a6ec74c1 2701 fromstr = NEXTFROM;
f337b084 2702 anv = SvNV(fromstr);
85bba25f 2703# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2704 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2705 * on Alpha; fake it if we don't have them.
2706 */
f337b084 2707 if (anv > FLT_MAX)
fc241834 2708 afloat = FLT_MAX;
f337b084 2709 else if (anv < -FLT_MAX)
fc241834 2710 afloat = -FLT_MAX;
f337b084 2711 else afloat = (float)anv;
baf3cf9c 2712# else
f337b084 2713 afloat = (float)anv;
baf3cf9c 2714# endif
7e38f754 2715 DO_BO_PACK(afloat, float);
f337b084 2716 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2717 }
2718 break;
2719 case 'd':
a6ec74c1 2720 while (len-- > 0) {
f337b084
TH
2721 double adouble;
2722 NV anv;
a6ec74c1 2723 fromstr = NEXTFROM;
f337b084 2724 anv = SvNV(fromstr);
85bba25f 2725# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2726 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2727 * on Alpha; fake it if we don't have them.
2728 */
f337b084 2729 if (anv > DBL_MAX)
fc241834 2730 adouble = DBL_MAX;
f337b084 2731 else if (anv < -DBL_MAX)
fc241834 2732 adouble = -DBL_MAX;
f337b084 2733 else adouble = (double)anv;
baf3cf9c 2734# else
f337b084 2735 adouble = (double)anv;
baf3cf9c 2736# endif
7e38f754 2737 DO_BO_PACK(adouble, double);
f337b084 2738 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2739 }
2740 break;
fc241834 2741 case 'F': {
275663fa 2742 NV_bytes anv;
1109a392 2743 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2744 while (len-- > 0) {
2745 fromstr = NEXTFROM;
cd07c537
DM
2746#ifdef __GNUC__
2747 /* to work round a gcc/x86 bug; don't use SvNV */
2748 anv.nv = sv_2nv(fromstr);
2749#else
275663fa 2750 anv.nv = SvNV(fromstr);
cd07c537 2751#endif
7e38f754 2752 DO_BO_PACK(anv, NV);
275663fa 2753 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2754 }
2755 break;
fc241834 2756 }
92d41999 2757#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2758 case 'D': {
275663fa 2759 ld_bytes aldouble;
1109a392
MHM
2760 /* long doubles can have unused bits, which may be nonzero */
2761 Zero(&aldouble, 1, long double);
92d41999
JH
2762 while (len-- > 0) {
2763 fromstr = NEXTFROM;
cd07c537
DM
2764# ifdef __GNUC__
2765 /* to work round a gcc/x86 bug; don't use SvNV */
2766 aldouble.ld = (long double)sv_2nv(fromstr);
2767# else
275663fa 2768 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2769# endif
7e38f754 2770 DO_BO_PACK(aldouble, long double);
275663fa 2771 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2772 }
2773 break;
fc241834 2774 }
92d41999 2775#endif
068bd2e7 2776 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2777 case 'n':
2778 while (len-- > 0) {
f337b084 2779 I16 ai16;
a6ec74c1 2780 fromstr = NEXTFROM;
ef108786 2781 ai16 = (I16)SvIV(fromstr);
ef108786 2782 ai16 = PerlSock_htons(ai16);
f337b084 2783 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2784 }
2785 break;
068bd2e7 2786 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2787 case 'v':
2788 while (len-- > 0) {
f337b084 2789 I16 ai16;
a6ec74c1 2790 fromstr = NEXTFROM;
ef108786 2791 ai16 = (I16)SvIV(fromstr);
ef108786 2792 ai16 = htovs(ai16);
f337b084 2793 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2794 }
2795 break;
49704364 2796 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2797#if SHORTSIZE != SIZE16
fc241834 2798 while (len-- > 0) {
f337b084 2799 unsigned short aushort;
fc241834
RGS
2800 fromstr = NEXTFROM;
2801 aushort = SvUV(fromstr);
2802 DO_BO_PACK(aushort, s);
f337b084 2803 PUSH_VAR(utf8, cur, aushort);
fc241834 2804 }
49704364
WL
2805 break;
2806#else
2807 /* Fall through! */
a6ec74c1 2808#endif
49704364 2809 case 'S':
fc241834 2810 while (len-- > 0) {
f337b084 2811 U16 au16;
fc241834
RGS
2812 fromstr = NEXTFROM;
2813 au16 = (U16)SvUV(fromstr);
2814 DO_BO_PACK(au16, 16);
f337b084 2815 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2816 }
2817 break;
49704364 2818 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2819#if SHORTSIZE != SIZE16
fc241834 2820 while (len-- > 0) {
f337b084 2821 short ashort;
fc241834
RGS
2822 fromstr = NEXTFROM;
2823 ashort = SvIV(fromstr);
2824 DO_BO_PACK(ashort, s);
f337b084 2825 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2826 }
49704364
WL
2827 break;
2828#else
2829 /* Fall through! */
a6ec74c1 2830#endif
49704364
WL
2831 case 's':
2832 while (len-- > 0) {
f337b084 2833 I16 ai16;
49704364 2834 fromstr = NEXTFROM;
ef108786
MHM
2835 ai16 = (I16)SvIV(fromstr);
2836 DO_BO_PACK(ai16, 16);
f337b084 2837 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2838 }
2839 break;
2840 case 'I':
49704364 2841 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2842 while (len-- > 0) {
f337b084 2843 unsigned int auint;
a6ec74c1
JH
2844 fromstr = NEXTFROM;
2845 auint = SvUV(fromstr);
1109a392 2846 DO_BO_PACK(auint, i);
f337b084 2847 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2848 }
2849 break;
92d41999
JH
2850 case 'j':
2851 while (len-- > 0) {
f337b084 2852 IV aiv;
92d41999
JH
2853 fromstr = NEXTFROM;
2854 aiv = SvIV(fromstr);
1109a392
MHM
2855#if IVSIZE == INTSIZE
2856 DO_BO_PACK(aiv, i);
2857#elif IVSIZE == LONGSIZE
2858 DO_BO_PACK(aiv, l);
2859#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2860 DO_BO_PACK(aiv, 64);
f337b084
TH
2861#else
2862 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 2863#endif
f337b084 2864 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2865 }
2866 break;
2867 case 'J':
2868 while (len-- > 0) {
f337b084 2869 UV auv;
92d41999
JH
2870 fromstr = NEXTFROM;
2871 auv = SvUV(fromstr);
1109a392
MHM
2872#if UVSIZE == INTSIZE
2873 DO_BO_PACK(auv, i);
2874#elif UVSIZE == LONGSIZE
2875 DO_BO_PACK(auv, l);
2876#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2877 DO_BO_PACK(auv, 64);
f337b084
TH
2878#else
2879 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 2880#endif
f337b084 2881 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2882 }
2883 break;
a6ec74c1
JH
2884 case 'w':
2885 while (len-- > 0) {
f337b084 2886 NV anv;
a6ec74c1 2887 fromstr = NEXTFROM;
15e9f109 2888 anv = SvNV(fromstr);
a6ec74c1 2889
f337b084
TH
2890 if (anv < 0) {
2891 *cur = '\0';
b162af07 2892 SvCUR_set(cat, cur - start);
49704364 2893 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2894 }
a6ec74c1 2895
196b62db
NC
2896 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2897 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2898 any negative IVs will have already been got by the croak()
2899 above. IOK is untrue for fractions, so we test them
2900 against UV_MAX_P1. */
f337b084
TH
2901 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2902 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2903 char *in = buf + sizeof(buf);
196b62db 2904 UV auv = SvUV(fromstr);
a6ec74c1
JH
2905
2906 do {
eb160463 2907 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2908 auv >>= 7;
2909 } while (auv);
2910 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2911 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2912 in, (buf + sizeof(buf)) - in);
2913 } else if (SvPOKp(fromstr))
2914 goto w_string;
a6ec74c1 2915 else if (SvNOKp(fromstr)) {
0258719b 2916 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2917 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2918 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2919 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2920 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2921 Some C compilers are strict about integral constant
2922 expressions so we conservatively divide by a slightly
2923 smaller integer instead of multiplying by the exact
2924 floating-point value.
0258719b
NC
2925 */
2926#ifdef NV_MAX_10_EXP
f337b084 2927 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2928 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2929#else
f337b084 2930 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2931 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2932#endif
a6ec74c1
JH
2933 char *in = buf + sizeof(buf);
2934
8b6e33c7 2935 anv = Perl_floor(anv);
a6ec74c1 2936 do {
8b6e33c7 2937 const NV next = Perl_floor(anv / 128);
a6ec74c1 2938 if (in <= buf) /* this cannot happen ;-) */
49704364 2939 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2940 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2941 anv = next;
2942 } while (anv > 0);
a6ec74c1 2943 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2944 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2945 in, (buf + sizeof(buf)) - in);
2946 } else {
8b6e33c7
AL
2947 const char *from;
2948 char *result, *in;
735b914b
JH
2949 SV *norm;
2950 STRLEN len;
2951 bool done;
2952
f337b084 2953 w_string:
735b914b 2954 /* Copy string and check for compliance */
349d4f2f 2955 from = SvPV_const(fromstr, len);
735b914b 2956 if ((norm = is_an_int(from, len)) == NULL)
49704364 2957 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2958
a02a5408 2959 Newx(result, len, char);
735b914b
JH
2960 in = result + len;
2961 done = FALSE;
f337b084 2962 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2963 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2964 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2965 in, (result + len) - in);
735b914b
JH
2966 Safefree(result);
2967 SvREFCNT_dec(norm); /* free norm */
fc241834 2968 }
a6ec74c1
JH
2969 }
2970 break;
2971 case 'i':
49704364 2972 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2973 while (len-- > 0) {
f337b084 2974 int aint;
a6ec74c1
JH
2975 fromstr = NEXTFROM;
2976 aint = SvIV(fromstr);
1109a392 2977 DO_BO_PACK(aint, i);
f337b084 2978 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
2979 }
2980 break;
068bd2e7 2981 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2982 case 'N':
2983 while (len-- > 0) {
f337b084 2984 U32 au32;
a6ec74c1 2985 fromstr = NEXTFROM;
ef108786 2986 au32 = SvUV(fromstr);
ef108786 2987 au32 = PerlSock_htonl(au32);
f337b084 2988 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2989 }
2990 break;
068bd2e7 2991 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2992 case 'V':
2993 while (len-- > 0) {
f337b084 2994 U32 au32;
a6ec74c1 2995 fromstr = NEXTFROM;
ef108786 2996 au32 = SvUV(fromstr);
ef108786 2997 au32 = htovl(au32);
f337b084 2998 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2999 }
3000 break;
49704364 3001 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3002#if LONGSIZE != SIZE32
fc241834 3003 while (len-- > 0) {
f337b084 3004 unsigned long aulong;
fc241834
RGS
3005 fromstr = NEXTFROM;
3006 aulong = SvUV(fromstr);
3007 DO_BO_PACK(aulong, l);
f337b084 3008 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3009 }
49704364
WL
3010 break;
3011#else
3012 /* Fall though! */
a6ec74c1 3013#endif
49704364 3014 case 'L':
fc241834 3015 while (len-- > 0) {
f337b084 3016 U32 au32;
fc241834
RGS
3017 fromstr = NEXTFROM;
3018 au32 = SvUV(fromstr);
3019 DO_BO_PACK(au32, 32);
f337b084 3020 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3021 }
3022 break;
49704364 3023 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3024#if LONGSIZE != SIZE32
fc241834 3025 while (len-- > 0) {
f337b084 3026 long along;
fc241834
RGS
3027 fromstr = NEXTFROM;
3028 along = SvIV(fromstr);
3029 DO_BO_PACK(along, l);
f337b084 3030 PUSH_VAR(utf8, cur, along);
a6ec74c1 3031 }
49704364
WL
3032 break;
3033#else
3034 /* Fall though! */
a6ec74c1 3035#endif
49704364
WL
3036 case 'l':
3037 while (len-- > 0) {
f337b084 3038 I32 ai32;
49704364 3039 fromstr = NEXTFROM;
ef108786
MHM
3040 ai32 = SvIV(fromstr);
3041 DO_BO_PACK(ai32, 32);
f337b084 3042 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3043 }
3044 break;
3045#ifdef HAS_QUAD
3046 case 'Q':
3047 while (len-- > 0) {
f337b084 3048 Uquad_t auquad;
a6ec74c1 3049 fromstr = NEXTFROM;
f337b084 3050 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3051 DO_BO_PACK(auquad, 64);
f337b084 3052 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3053 }
3054 break;
3055 case 'q':
3056 while (len-- > 0) {
f337b084 3057 Quad_t aquad;
a6ec74c1
JH
3058 fromstr = NEXTFROM;
3059 aquad = (Quad_t)SvIV(fromstr);
1109a392 3060 DO_BO_PACK(aquad, 64);
f337b084 3061 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3062 }
3063 break;
f337b084 3064#endif /* HAS_QUAD */
a6ec74c1
JH
3065 case 'P':
3066 len = 1; /* assume SV is correct length */
f337b084 3067 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3068 /* Fall through! */
a6ec74c1
JH
3069 case 'p':
3070 while (len-- > 0) {
83003860 3071 const char *aptr;
f337b084 3072
a6ec74c1 3073 fromstr = NEXTFROM;
28a4f200
TH
3074 SvGETMAGIC(fromstr);
3075 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3076 else {
a6ec74c1
JH
3077 /* XXX better yet, could spirit away the string to
3078 * a safe spot and hang on to it until the result
3079 * of pack() (and all copies of the result) are
3080 * gone.
3081 */
041457d9 3082 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3083 !SvREADONLY(fromstr)))) {
3084 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3085 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3086 }
3087 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3088 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3089 else
2596d9fe 3090 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3091 }
7e38f754 3092 DO_BO_PACK(aptr, pointer);
f337b084 3093 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3094 }
3095 break;
fc241834 3096 case 'u': {
f7fe979e 3097 const char *aptr, *aend;
fc241834 3098 bool from_utf8;
f337b084 3099
a6ec74c1 3100 fromstr = NEXTFROM;
fc241834
RGS
3101 if (len <= 2) len = 45;
3102 else len = len / 3 * 3;
3103 if (len >= 64) {
a2a5de95
NC
3104 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3105 "Field too wide in 'u' format in pack");
fc241834
RGS
3106 len = 63;
3107 }
83003860 3108 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3109 from_utf8 = DO_UTF8(fromstr);
3110 if (from_utf8) {
3111 aend = aptr + fromlen;
3f63b0e5 3112 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3113 } else aend = NULL; /* Unused, but keep compilers happy */
3114 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3115 while (fromlen > 0) {
fc241834 3116 U8 *end;
a6ec74c1 3117 I32 todo;
fc241834 3118 U8 hunk[1+63/3*4+1];
a6ec74c1 3119
eb160463 3120 if ((I32)fromlen > len)
a6ec74c1
JH
3121 todo = len;
3122 else
3123 todo = fromlen;
fc241834
RGS
3124 if (from_utf8) {
3125 char buffer[64];
3126 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3127 'u' | TYPE_IS_PACK)) {
3128 *cur = '\0';
b162af07 3129 SvCUR_set(cat, cur - start);
5637ef5b
NC
3130 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3131 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3132 aptr, aend, buffer, (long) todo);
fc241834
RGS
3133 }
3134 end = doencodes(hunk, buffer, todo);
3135 } else {
3136 end = doencodes(hunk, aptr, todo);
3137 aptr += todo;
3138 }
3139 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3140 fromlen -= todo;
3141 }
a6ec74c1
JH
3142 break;
3143 }
f337b084
TH
3144 }
3145 *cur = '\0';
b162af07 3146 SvCUR_set(cat, cur - start);
f337b084 3147 no_change:
49704364 3148 *symptr = lookahead;
a6ec74c1 3149 }
49704364 3150 return beglist;
18529408
IZ
3151}
3152#undef NEXTFROM
3153
3154
3155PP(pp_pack)
3156{
97aff369 3157 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3158 SV *cat = TARG;
18529408 3159 STRLEN fromlen;
349d4f2f 3160 SV *pat_sv = *++MARK;
eb578fdb
KW
3161 const char *pat = SvPV_const(pat_sv, fromlen);
3162 const char *patend = pat + fromlen;
18529408
IZ
3163
3164 MARK++;
76f68e9b 3165 sv_setpvs(cat, "");
f337b084 3166 SvUTF8_off(cat);
18529408 3167
7accc089 3168 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3169
a6ec74c1
JH
3170 SvSETMAGIC(cat);
3171 SP = ORIGMARK;
3172 PUSHs(cat);
3173 RETURN;
3174}
a6ec74c1 3175
73cb7263
NC
3176/*
3177 * Local variables:
3178 * c-indentation-style: bsd
3179 * c-basic-offset: 4
14d04a33 3180 * indent-tabs-mode: nil
73cb7263
NC
3181 * End:
3182 *
14d04a33 3183 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3184 */