This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Do not printf U32 and I32 (both) as %i.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
f337b084 154 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e
NC
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
78d46eaa 244#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
246#define PACK_SIZE_MASK 0x3F
247
298bc19c 248#include "packsizetables.c"
78d46eaa 249
20aa3a38
NC
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
08ca2aa3 258STATIC U8
f7fe979e 259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 260{
08ca2aa3 261 STRLEN retlen;
0bcc34c2 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 264 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 270 if (val >= 0x100) {
a2a5de95
NC
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
274 val &= 0xff;
275 }
276 *s += retlen;
fe2774ed 277 return (U8)val;
08ca2aa3
TH
278}
279
f337b084
TH
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
282 *(U8 *)(s)++)
283
284STATIC bool
f7fe979e 285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
286{
287 UV val;
288 STRLEN retlen;
f7fe979e 289 const char *from = *s;
08ca2aa3 290 int bad = 0;
f7fe979e 291 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
228e69a7 295 if (UNLIKELY(needs_swap))
20aa3a38
NC
296 buf += buf_len;
297
08ca2aa3
TH
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
f337b084 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
228e69a7 309 if (UNLIKELY(needs_swap))
20aa3a38
NC
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
f7fe979e 318 const char *ptr;
9e27e96a 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
c80e42f3 322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
323 }
324 if (from > end) from = end;
325 }
a2a5de95
NC
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 328 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
f7fe979e 338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 339{
97aff369 340 dVAR;
08ca2aa3 341 STRLEN retlen;
0bcc34c2 342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
345 *out = 0;
346 return FALSE;
347 }
348 *out = PL_uudmap[val] & 077;
f337b084 349 *s += retlen;
08ca2aa3
TH
350 return TRUE;
351}
78d46eaa 352
64844641 353STATIC char *
3a88beaa 354S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
7918f24d
NC
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
356
228e69a7 357 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
358 const U8 *p = start + len;
359 while (p-- > start) {
55d09dc8 360 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
361 }
362 } else {
363 const U8 * const end = start + len;
364 while (start < end) {
55d09dc8 365 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
366 start++;
367 }
f337b084 368 }
64844641 369 return dest;
f337b084
TH
370}
371
3a88beaa 372#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 373STMT_START { \
228e69a7 374 if (UNLIKELY(utf8)) \
3a88beaa 375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
230e1fce 376 else { \
228e69a7 377 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
378 S_reverse_copy((char *)(buf), cur, len); \
379 else \
380 Copy(buf, cur, len, char); \
230e1fce
NC
381 (cur) += (len); \
382 } \
f337b084
TH
383} STMT_END
384
385#define GROWING(utf8, cat, start, cur, in_len) \
386STMT_START { \
387 STRLEN glen = (in_len); \
3473cf63 388 if (utf8) glen *= UTF8_EXPAND; \
f337b084 389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 390 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
391 (cur) = (start) + SvCUR(cat); \
392 } \
393} STMT_END
394
395#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
396STMT_START { \
f7fe979e 397 const STRLEN glen = (in_len); \
f337b084 398 STRLEN gl = glen; \
3473cf63 399 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
401 *cur = '\0'; \
b162af07 402 SvCUR_set((cat), (cur) - (start)); \
0bd48802 403 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
404 (cur) = (start) + SvCUR(cat); \
405 } \
3a88beaa 406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
407} STMT_END
408
409#define PUSH_BYTE(utf8, s, byte) \
410STMT_START { \
411 if (utf8) { \
f7fe979e 412 const U8 au8 = (byte); \
3a88beaa 413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
f337b084
TH
414 } else *(U8 *)(s)++ = (byte); \
415} STMT_END
416
417/* Only to be used inside a loop (see the break) */
418#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
419STMT_START { \
420 STRLEN retlen; \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
424 *cur = '\0'; \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
426 } \
427 str += retlen; \
428} STMT_END
429
f7fe979e
AL
430static const char *_action( const tempsym_t* symptr )
431{
10edeb5d 432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
433}
434
206947d2 435/* Returns the sizeof() struct described by pat */
028d1f6d 436STATIC I32
f337b084 437S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 438{
f337b084 439 I32 total = 0;
206947d2 440
7918f24d
NC
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
442
49704364 443 while (next_symbol(symptr)) {
f337b084 444 I32 len;
f7fe979e 445 int size;
f337b084
TH
446
447 switch (symptr->howlen) {
fc241834 448 case e_star:
49704364 449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 450 _action( symptr ) );
81d52ecd 451
f337b084
TH
452 default:
453 /* e_no_len and e_number */
454 len = symptr->length;
455 break;
49704364
WL
456 }
457
a7a3cfaa 458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 459 if (!size) {
f7fe979e 460 int star;
80a13697
NC
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
463 default:
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 466 _action( symptr ) );
28be1210
TH
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
80a13697 469 case '@':
28be1210 470 case '.':
80a13697
NC
471 case '/':
472 case 'U': /* XXXX Is it correct? */
473 case 'w':
474 case 'u':
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 476 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 477 _action( symptr ) );
80a13697
NC
478 case '%':
479 size = 0;
480 break;
481 case '(':
fc241834
RGS
482 {
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
491 *symptr = savsym;
492 break;
493 }
80a13697
NC
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
496 */
497 if (!len) /* Avoid division by 0 */
498 len = 1;
499 len = total % len; /* Assumed: the start is aligned. */
924ba076 500 /* FALLTHROUGH */
80a13697
NC
501 case 'X':
502 size = -1;
503 if (total < len)
f7fe979e 504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
505 break;
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
508 len = 1;
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
511 len = len - star;
512 else
513 len = 0;
924ba076 514 /* FALLTHROUGH */
80a13697
NC
515 case 'x':
516 case 'A':
517 case 'Z':
518 case 'a':
80a13697
NC
519 size = 1;
520 break;
521 case 'B':
522 case 'b':
523 len = (len + 7)/8;
524 size = 1;
525 break;
526 case 'H':
527 case 'h':
528 len = (len + 1)/2;
529 size = 1;
530 break;
78d46eaa 531
80a13697
NC
532 case 'P':
533 len = 1;
534 size = sizeof(char*);
78d46eaa
NC
535 break;
536 }
206947d2
IZ
537 }
538 total += len * size;
539 }
540 return total;
541}
542
49704364
WL
543
544/* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
546 */
f7fe979e 547STATIC const char *
5aaab254 548S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 549{
7918f24d
NC
550 PERL_ARGS_ASSERT_GROUP_END;
551
49704364 552 while (patptr < patend) {
f7fe979e 553 const char c = *patptr++;
49704364
WL
554
555 if (isSPACE(c))
556 continue;
557 else if (c == ender)
558 return patptr-1;
559 else if (c == '#') {
560 while (patptr < patend && *patptr != '\n')
561 patptr++;
562 continue;
563 } else if (c == '(')
564 patptr = group_end(patptr, patend, ')') + 1;
565 else if (c == '[')
566 patptr = group_end(patptr, patend, ']') + 1;
18529408 567 }
49704364
WL
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
569 ender);
81d52ecd 570 NOT_REACHED; /* NOTREACHED */
18529408
IZ
571}
572
49704364
WL
573
574/* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
fc241834 577 */
f7fe979e 578STATIC const char *
5aaab254 579S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
580{
581 I32 len = *patptr++ - '0';
7918f24d
NC
582
583 PERL_ARGS_ASSERT_GET_NUM;
584
49704364
WL
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
589 }
590 *lenptr = len;
591 return patptr;
592}
593
594/* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
596 */
597STATIC bool
f337b084 598S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 599{
f7fe979e 600 const char* patptr = symptr->patptr;
0bcc34c2 601 const char* const patend = symptr->patend;
49704364 602
7918f24d
NC
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
604
49704364
WL
605 symptr->flags &= ~FLAG_SLASH;
606
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
609 patptr++;
610 else if (*patptr == '#') {
611 patptr++;
612 while (patptr < patend && *patptr != '\n')
613 patptr++;
614 if (patptr < patend)
615 patptr++;
616 } else {
fc241834 617 /* We should have found a template code */
49704364 618 I32 code = *patptr++ & 0xFF;
66c611c5 619 U32 inherited_modifiers = 0;
49704364
WL
620
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 625 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
626 }
627 continue;
628 }
fc241834 629
49704364 630 /* for '(', skip to ')' */
fc241834 631 if (code == '(') {
49704364
WL
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 634 _action( symptr ) );
49704364
WL
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 639 _action( symptr ) );
49704364
WL
640 }
641
66c611c5
MHM
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
646 }
647
1109a392
MHM
648 /* look for modifiers */
649 while (patptr < patend) {
b7787f18
AL
650 const char *allowed;
651 I32 modifier;
1109a392
MHM
652 switch (*patptr) {
653 case '!':
654 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 655 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
656 break;
657 case '>':
658 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 659 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
660 break;
661 case '<':
662 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 663 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
664 break;
665 default:
b7787f18
AL
666 allowed = "";
667 modifier = 0;
1109a392
MHM
668 break;
669 }
66c611c5 670
1109a392
MHM
671 if (modifier == 0)
672 break;
66c611c5 673
1109a392
MHM
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 676 allowed, _action( symptr ) );
66c611c5
MHM
677
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 684 *patptr, _action( symptr ) );
66c611c5 685
a2a5de95
NC
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 _action( symptr ) );
1109a392 691 }
66c611c5 692
1109a392
MHM
693 code |= modifier;
694 patptr++;
49704364
WL
695 }
696
66c611c5
MHM
697 /* inherit modifiers */
698 code |= inherited_modifiers;
699
fc241834 700 /* look for count and/or / */
49704364
WL
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
705
706 } else if (*patptr == '*') {
707 patptr++;
708 symptr->howlen = e_star;
709
710 } else if (*patptr == '[') {
f7fe979e 711 const char* lenptr = ++patptr;
49704364
WL
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
717 if( *lenptr != ']' )
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 719 _action( symptr ) );
49704364
WL
720 } else {
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
725 *symptr = savsym;
726 }
727 } else {
728 symptr->howlen = e_no_len;
729 symptr->length = 1;
730 }
731
732 /* try to find / */
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
735 patptr++;
736 else if (*patptr == '#') {
737 patptr++;
738 while (patptr < patend && *patptr != '\n')
739 patptr++;
740 if (patptr < patend)
741 patptr++;
742 } else {
66c611c5 743 if (*patptr == '/') {
49704364
WL
744 symptr->flags |= FLAG_SLASH;
745 patptr++;
66c611c5
MHM
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 749 _action( symptr ) );
49704364
WL
750 }
751 break;
752 }
18529408 753 }
49704364
WL
754 } else {
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
757 symptr->length = 1;
758 }
759
760 symptr->code = code;
fc241834 761 symptr->patptr = patptr;
49704364 762 return TRUE;
18529408 763 }
49704364 764 }
fc241834 765 symptr->patptr = patptr;
49704364 766 return FALSE;
18529408
IZ
767}
768
18529408 769/*
fc241834 770 There is no way to cleanly handle the case where we should process the
08ca2aa3 771 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
776 themselves if they need to do a lot of unpacks like this on it
777*/
fc241834 778STATIC bool
08ca2aa3
TH
779need_utf8(const char *pat, const char *patend)
780{
781 bool first = TRUE;
7918f24d
NC
782
783 PERL_ARGS_ASSERT_NEED_UTF8;
784
08ca2aa3
TH
785 while (pat < patend) {
786 if (pat[0] == '#') {
787 pat++;
f7fe979e 788 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
793 pat++;
794 }
795 return FALSE;
796}
797
798STATIC char
799first_symbol(const char *pat, const char *patend) {
7918f24d
NC
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801
08ca2aa3
TH
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
804 pat++;
f7fe979e 805 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
806 if (!pat) return 0;
807 pat++;
808 }
809 return 0;
810}
811
812/*
dcccc8ff
KW
813
814=head1 Pack and Unpack
815
7accc089
JH
816=for apidoc unpackstring
817
21ebfc7a
DM
818The engine implementing the unpack() Perl function.
819
820Using the template pat..patend, this function unpacks the string
821s..strend into a number of mortal SVs, which it pushes onto the perl
822argument (@_) stack (so you will need to issue a C<PUTBACK> before and
72d33970 823C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
824pushed elements.
825
826The strend and patend pointers should point to the byte following the last
827character of each string.
828
829Although this function returns its values on the perl argument stack, it
830doesn't take any parameters from that stack (and thus in particular
831there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
832example).
7accc089
JH
833
834=cut */
835
836I32
f7fe979e 837Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 838{
f7fe979e 839 tempsym_t sym;
08ca2aa3 840
7918f24d
NC
841 PERL_ARGS_ASSERT_UNPACKSTRING;
842
f337b084 843 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
844 else if (need_utf8(pat, patend)) {
845 /* We probably should try to avoid this in case a scalar context call
846 wouldn't get to the "U0" */
847 STRLEN len = strend - s;
230e1fce 848 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
849 SAVEFREEPV(s);
850 strend = s + len;
f337b084 851 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
852 }
853
f337b084
TH
854 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
855 flags |= FLAG_PARSE_UTF8;
08ca2aa3 856
f7fe979e 857 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
858
859 return unpack_rec(&sym, s, s, strend, NULL );
860}
861
4136a0f7 862STATIC I32
f7fe979e 863S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 864{
27da23d5 865 dVAR; dSP;
3297d27d 866 SV *sv = NULL;
f7fe979e 867 const I32 start_sp_offset = SP - PL_stack_base;
49704364 868 howlen_t howlen;
a6ec74c1 869 I32 checksum = 0;
92d41999 870 UV cuv = 0;
a6ec74c1 871 NV cdouble = 0.0;
f337b084 872 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 873 bool beyond = FALSE;
21c16052 874 bool explicit_length;
9e27e96a 875 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 876 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
877
878 PERL_ARGS_ASSERT_UNPACK_REC;
879
28be1210 880 symptr->strbeg = s - strbeg;
49704364 881
49704364 882 while (next_symbol(symptr)) {
a7a3cfaa 883 packprops_t props;
9e27e96a 884 I32 len;
f337b084 885 I32 datumtype = symptr->code;
a1219b5e 886 bool needs_swap;
206947d2 887 /* do first one only unless in list context
08ca2aa3 888 / is implemented by unpacking the count, then popping it from the
206947d2 889 stack, so must check that we're not in the middle of a / */
49704364 890 if ( unpack_only_one
206947d2 891 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 892 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 893 break;
49704364 894
f337b084 895 switch (howlen = symptr->howlen) {
fc241834
RGS
896 case e_star:
897 len = strend - strbeg; /* long enough */
49704364 898 break;
f337b084
TH
899 default:
900 /* e_no_len and e_number */
901 len = symptr->length;
902 break;
49704364 903 }
18529408 904
21c16052 905 explicit_length = TRUE;
a6ec74c1 906 redo_switch:
49704364 907 beyond = s >= strend;
a7a3cfaa
TH
908
909 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
910 if (props) {
911 /* props nonzero means we can process this letter. */
9e27e96a
AL
912 const long size = props & PACK_SIZE_MASK;
913 const long howmany = (strend - s) / size;
a7a3cfaa
TH
914 if (len > howmany)
915 len = howmany;
916
917 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
918 if (len && unpack_only_one) len = 1;
919 EXTEND(SP, len);
920 EXTEND_MORTAL(len);
78d46eaa
NC
921 }
922 }
a7a3cfaa 923
a1219b5e
NC
924 needs_swap = NEEDS_SWAP(datumtype);
925
1109a392 926 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 927 default:
1109a392 928 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 929
a6ec74c1 930 case '%':
49704364 931 if (howlen == e_no_len)
18529408 932 len = 16; /* len is not specified */
a6ec74c1 933 checksum = len;
92d41999 934 cuv = 0;
a6ec74c1 935 cdouble = 0;
18529408 936 continue;
81d52ecd 937
18529408
IZ
938 case '(':
939 {
49704364 940 tempsym_t savsym = *symptr;
9e27e96a 941 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 942 symptr->flags |= group_modifiers;
49704364 943 symptr->patend = savsym.grpend;
28be1210 944 symptr->previous = &savsym;
49704364 945 symptr->level++;
18529408 946 PUTBACK;
c6f750d1 947 if (len && unpack_only_one) len = 1;
18529408 948 while (len--) {
49704364 949 symptr->patptr = savsym.grpbeg;
f337b084
TH
950 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
951 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
952 unpack_rec(symptr, s, strbeg, strend, &s);
953 if (s == strend && savsym.howlen == e_star)
49704364 954 break; /* No way to continue */
18529408
IZ
955 }
956 SPAGAIN;
28be1210 957 savsym.flags = symptr->flags & ~group_modifiers;
49704364 958 *symptr = savsym;
18529408
IZ
959 break;
960 }
28be1210 961 case '.' | TYPE_IS_SHRIEKING:
28be1210 962 case '.': {
9e27e96a 963 const char *from;
28be1210 964 SV *sv;
9e27e96a 965 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
966 if (howlen == e_star) from = strbeg;
967 else if (len <= 0) from = s;
968 else {
969 tempsym_t *group = symptr;
970
971 while (--len && group) group = group->previous;
972 from = group ? strbeg + group->strbeg : strbeg;
973 }
974 sv = from <= s ?
00646304
CB
975 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
976 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 977 mXPUSHs(sv);
28be1210
TH
978 break;
979 }
28be1210 980 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 981 case '@':
28be1210 982 s = strbeg + symptr->strbeg;
28be1210 983 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 984 {
08ca2aa3
TH
985 while (len > 0) {
986 if (s >= strend)
987 Perl_croak(aTHX_ "'@' outside of string in unpack");
988 s += UTF8SKIP(s);
989 len--;
990 }
991 if (s > strend)
992 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
993 } else {
28be1210 994 if (strend-s < len)
fc241834 995 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 996 s += len;
08ca2aa3 997 }
a6ec74c1 998 break;
62f95557
IZ
999 case 'X' | TYPE_IS_SHRIEKING:
1000 if (!len) /* Avoid division by 0 */
1001 len = 1;
08ca2aa3 1002 if (utf8) {
f7fe979e 1003 const char *hop, *last;
f337b084
TH
1004 I32 l = len;
1005 hop = last = strbeg;
1006 while (hop < s) {
1007 hop += UTF8SKIP(hop);
1008 if (--l == 0) {
08ca2aa3 1009 last = hop;
f337b084
TH
1010 l = len;
1011 }
fc241834 1012 }
f337b084
TH
1013 if (last > s)
1014 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1015 s = last;
1016 break;
f337b084
TH
1017 }
1018 len = (s - strbeg) % len;
924ba076 1019 /* FALLTHROUGH */
a6ec74c1 1020 case 'X':
08ca2aa3
TH
1021 if (utf8) {
1022 while (len > 0) {
1023 if (s <= strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1025 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1026 if (s <= strbeg)
1027 Perl_croak(aTHX_ "'X' outside of string in unpack");
1028 }
1029 len--;
1030 }
1031 } else {
fc241834
RGS
1032 if (len > s - strbeg)
1033 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1034 s -= len;
08ca2aa3 1035 }
a6ec74c1 1036 break;
9e27e96a
AL
1037 case 'x' | TYPE_IS_SHRIEKING: {
1038 I32 ai32;
62f95557
IZ
1039 if (!len) /* Avoid division by 0 */
1040 len = 1;
230e1fce
NC
1041 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1042 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1043 if (ai32 == 0) break;
1044 len -= ai32;
9e27e96a 1045 }
924ba076 1046 /* FALLTHROUGH */
a6ec74c1 1047 case 'x':
08ca2aa3
TH
1048 if (utf8) {
1049 while (len>0) {
1050 if (s >= strend)
1051 Perl_croak(aTHX_ "'x' outside of string in unpack");
1052 s += UTF8SKIP(s);
1053 len--;
1054 }
1055 } else {
fc241834
RGS
1056 if (len > strend - s)
1057 Perl_croak(aTHX_ "'x' outside of string in unpack");
1058 s += len;
f337b084 1059 }
a6ec74c1
JH
1060 break;
1061 case '/':
49704364 1062 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1063
a6ec74c1
JH
1064 case 'A':
1065 case 'Z':
1066 case 'a':
08ca2aa3
TH
1067 if (checksum) {
1068 /* Preliminary length estimate is assumed done in 'W' */
1069 if (len > strend - s) len = strend - s;
1070 goto W_checksum;
1071 }
1072 if (utf8) {
1073 I32 l;
f7fe979e 1074 const char *hop;
08ca2aa3
TH
1075 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1076 if (hop >= strend) {
1077 if (hop > strend)
1078 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1079 break;
fc241834 1080 }
a6ec74c1 1081 }
08ca2aa3
TH
1082 if (hop > strend)
1083 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1084 len = hop - s;
1085 } else if (len > strend - s)
1086 len = strend - s;
1087
1088 if (datumtype == 'Z') {
1089 /* 'Z' strips stuff after first null */
f7fe979e 1090 const char *ptr, *end;
f337b084
TH
1091 end = s + len;
1092 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1093 sv = newSVpvn(s, ptr-s);
1094 if (howlen == e_star) /* exact for 'Z*' */
1095 len = ptr-s + (ptr != strend ? 1 : 0);
1096 } else if (datumtype == 'A') {
1097 /* 'A' strips both nulls and spaces */
f7fe979e 1098 const char *ptr;
18bdf90a
TH
1099 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1100 for (ptr = s+len-1; ptr >= s; ptr--)
1101 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1102 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1103 if (ptr >= s) ptr += UTF8SKIP(ptr);
1104 else ptr++;
28be1210 1105 if (ptr > s+len)
18bdf90a
TH
1106 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1107 } else {
1108 for (ptr = s+len-1; ptr >= s; ptr--)
1109 if (*ptr != 0 && !isSPACE(*ptr)) break;
1110 ptr++;
1111 }
08ca2aa3
TH
1112 sv = newSVpvn(s, ptr-s);
1113 } else sv = newSVpvn(s, len);
1114
1115 if (utf8) {
1116 SvUTF8_on(sv);
1117 /* Undo any upgrade done due to need_utf8() */
f337b084 1118 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1119 sv_utf8_downgrade(sv, 0);
a6ec74c1 1120 }
6e449a3a 1121 mXPUSHs(sv);
08ca2aa3 1122 s += len;
a6ec74c1
JH
1123 break;
1124 case 'B':
08ca2aa3
TH
1125 case 'b': {
1126 char *str;
49704364 1127 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1128 len = (strend - s) * 8;
1129 if (checksum) {
f337b084 1130 if (utf8)
08ca2aa3 1131 while (len >= 8 && s < strend) {
f337b084 1132 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1133 len -= 8;
1134 }
f337b084 1135 else
fc241834 1136 while (len >= 8) {
08ca2aa3 1137 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1138 len -= 8;
1139 }
08ca2aa3
TH
1140 if (len && s < strend) {
1141 U8 bits;
f337b084
TH
1142 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1143 if (datumtype == 'b')
a6ec74c1 1144 while (len-- > 0) {
92d41999 1145 if (bits & 1) cuv++;
a6ec74c1
JH
1146 bits >>= 1;
1147 }
f337b084 1148 else
a6ec74c1 1149 while (len-- > 0) {
08ca2aa3 1150 if (bits & 0x80) cuv++;
a6ec74c1
JH
1151 bits <<= 1;
1152 }
fc241834 1153 }
a6ec74c1
JH
1154 break;
1155 }
08ca2aa3 1156
561b68a9 1157 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1158 SvPOK_on(sv);
1159 str = SvPVX(sv);
1160 if (datumtype == 'b') {
f337b084 1161 U8 bits = 0;
f7fe979e 1162 const I32 ai32 = len;
08ca2aa3
TH
1163 for (len = 0; len < ai32; len++) {
1164 if (len & 7) bits >>= 1;
1165 else if (utf8) {
1166 if (s >= strend) break;
f337b084 1167 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1168 } else bits = *(U8 *) s++;
1169 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1170 }
08ca2aa3 1171 } else {
f337b084 1172 U8 bits = 0;
f7fe979e 1173 const I32 ai32 = len;
08ca2aa3
TH
1174 for (len = 0; len < ai32; len++) {
1175 if (len & 7) bits <<= 1;
1176 else if (utf8) {
1177 if (s >= strend) break;
f337b084 1178 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1179 } else bits = *(U8 *) s++;
1180 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1181 }
1182 }
1183 *str = '\0';
aa07b2f6 1184 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1185 XPUSHs(sv);
a6ec74c1 1186 break;
08ca2aa3 1187 }
a6ec74c1 1188 case 'H':
08ca2aa3 1189 case 'h': {
3297d27d 1190 char *str = NULL;
fc241834 1191 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1192 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1193 len = (strend - s) * 2;
858fe5e1
TC
1194 if (!checksum) {
1195 sv = sv_2mortal(newSV(len ? len : 1));
1196 SvPOK_on(sv);
1197 str = SvPVX(sv);
1198 }
a6ec74c1 1199 if (datumtype == 'h') {
f337b084 1200 U8 bits = 0;
9e27e96a 1201 I32 ai32 = len;
fc241834
RGS
1202 for (len = 0; len < ai32; len++) {
1203 if (len & 1) bits >>= 4;
1204 else if (utf8) {
1205 if (s >= strend) break;
f337b084 1206 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1207 } else bits = * (U8 *) s++;
858fe5e1
TC
1208 if (!checksum)
1209 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1210 }
08ca2aa3 1211 } else {
f337b084 1212 U8 bits = 0;
f7fe979e 1213 const I32 ai32 = len;
08ca2aa3
TH
1214 for (len = 0; len < ai32; len++) {
1215 if (len & 1) bits <<= 4;
1216 else if (utf8) {
1217 if (s >= strend) break;
f337b084 1218 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1219 } else bits = *(U8 *) s++;
858fe5e1
TC
1220 if (!checksum)
1221 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1222 }
1223 }
858fe5e1
TC
1224 if (!checksum) {
1225 *str = '\0';
1226 SvCUR_set(sv, str - SvPVX_const(sv));
1227 XPUSHs(sv);
1228 }
a6ec74c1 1229 break;
08ca2aa3 1230 }
1651fc44
ML
1231 case 'C':
1232 if (len == 0) {
1233 if (explicit_length)
1234 /* Switch to "character" mode */
1235 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1236 break;
1237 }
924ba076 1238 /* FALLTHROUGH */
a6ec74c1 1239 case 'c':
1651fc44
ML
1240 while (len-- > 0 && s < strend) {
1241 int aint;
1242 if (utf8)
1243 {
1244 STRLEN retlen;
1245 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1246 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1247 if (retlen == (STRLEN) -1 || retlen == 0)
1248 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1249 s += retlen;
1250 }
1251 else
1252 aint = *(U8 *)(s)++;
1253 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1254 aint -= 256;
08ca2aa3 1255 if (!checksum)
6e449a3a 1256 mPUSHi(aint);
73cb7263
NC
1257 else if (checksum > bits_in_uv)
1258 cdouble += (NV)aint;
1259 else
1260 cuv += aint;
a6ec74c1
JH
1261 }
1262 break;
08ca2aa3
TH
1263 case 'W':
1264 W_checksum:
1651fc44 1265 if (utf8) {
08ca2aa3 1266 while (len-- > 0 && s < strend) {
08ca2aa3 1267 STRLEN retlen;
f7fe979e 1268 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1269 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1270 if (retlen == (STRLEN) -1 || retlen == 0)
1271 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1272 s += retlen;
1273 if (!checksum)
6e449a3a 1274 mPUSHu(val);
08ca2aa3
TH
1275 else if (checksum > bits_in_uv)
1276 cdouble += (NV) val;
d6d3e8bd 1277 else
08ca2aa3 1278 cuv += val;
fc241834 1279 }
08ca2aa3 1280 } else if (!checksum)
a6ec74c1 1281 while (len-- > 0) {
f7fe979e 1282 const U8 ch = *(U8 *) s++;
6e449a3a 1283 mPUSHu(ch);
a6ec74c1 1284 }
08ca2aa3
TH
1285 else if (checksum > bits_in_uv)
1286 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1287 else
1288 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1289 break;
1290 case 'U':
35bcd338 1291 if (len == 0) {
c5333953 1292 if (explicit_length && howlen != e_star) {
08ca2aa3 1293 /* Switch to "bytes in UTF-8" mode */
f337b084 1294 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1295 else
1296 /* Should be impossible due to the need_utf8() test */
1297 Perl_croak(aTHX_ "U0 mode on a byte string");
1298 }
35bcd338
JH
1299 break;
1300 }
08ca2aa3 1301 if (len > strend - s) len = strend - s;
fc241834 1302 if (!checksum) {
08ca2aa3
TH
1303 if (len && unpack_only_one) len = 1;
1304 EXTEND(SP, len);
1305 EXTEND_MORTAL(len);
fc241834 1306 }
08ca2aa3
TH
1307 while (len-- > 0 && s < strend) {
1308 STRLEN retlen;
1309 UV auv;
1310 if (utf8) {
1311 U8 result[UTF8_MAXLEN];
f7fe979e 1312 const char *ptr = s;
08ca2aa3 1313 STRLEN len;
08ca2aa3
TH
1314 /* Bug: warns about bad utf8 even if we are short on bytes
1315 and will break out of the loop */
230e1fce
NC
1316 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1317 'U'))
08ca2aa3
TH
1318 break;
1319 len = UTF8SKIP(result);
fc241834 1320 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1321 (char *) &result[1], len-1, 'U')) break;
c80e42f3 1322 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1323 s = ptr;
1324 } else {
c80e42f3 1325 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1326 if (retlen == (STRLEN) -1 || retlen == 0)
1327 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1328 s += retlen;
1329 }
1330 if (!checksum)
6e449a3a 1331 mPUSHu(auv);
73cb7263 1332 else if (checksum > bits_in_uv)
08ca2aa3 1333 cdouble += (NV) auv;
73cb7263 1334 else
08ca2aa3 1335 cuv += auv;
a6ec74c1
JH
1336 }
1337 break;
49704364
WL
1338 case 's' | TYPE_IS_SHRIEKING:
1339#if SHORTSIZE != SIZE16
73cb7263 1340 while (len-- > 0) {
08ca2aa3 1341 short ashort;
aaec8192 1342 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1343 if (!checksum)
6e449a3a 1344 mPUSHi(ashort);
73cb7263
NC
1345 else if (checksum > bits_in_uv)
1346 cdouble += (NV)ashort;
1347 else
1348 cuv += ashort;
49704364
WL
1349 }
1350 break;
1351#else
924ba076 1352 /* FALLTHROUGH */
a6ec74c1 1353#endif
49704364 1354 case 's':
73cb7263 1355 while (len-- > 0) {
08ca2aa3
TH
1356 I16 ai16;
1357
1358#if U16SIZE > SIZE16
1359 ai16 = 0;
1360#endif
aaec8192 1361 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1362#if U16SIZE > SIZE16
73cb7263
NC
1363 if (ai16 > 32767)
1364 ai16 -= 65536;
a6ec74c1 1365#endif
08ca2aa3 1366 if (!checksum)
6e449a3a 1367 mPUSHi(ai16);
73cb7263
NC
1368 else if (checksum > bits_in_uv)
1369 cdouble += (NV)ai16;
1370 else
1371 cuv += ai16;
a6ec74c1
JH
1372 }
1373 break;
49704364
WL
1374 case 'S' | TYPE_IS_SHRIEKING:
1375#if SHORTSIZE != SIZE16
73cb7263 1376 while (len-- > 0) {
08ca2aa3 1377 unsigned short aushort;
aaec8192
NC
1378 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1379 needs_swap);
08ca2aa3 1380 if (!checksum)
6e449a3a 1381 mPUSHu(aushort);
73cb7263
NC
1382 else if (checksum > bits_in_uv)
1383 cdouble += (NV)aushort;
1384 else
1385 cuv += aushort;
49704364
WL
1386 }
1387 break;
1388#else
924ba076 1389 /* FALLTHROUGH */
49704364 1390#endif
a6ec74c1
JH
1391 case 'v':
1392 case 'n':
1393 case 'S':
73cb7263 1394 while (len-- > 0) {
08ca2aa3
TH
1395 U16 au16;
1396#if U16SIZE > SIZE16
1397 au16 = 0;
1398#endif
aaec8192 1399 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1400 if (datumtype == 'n')
1401 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1402 if (datumtype == 'v')
1403 au16 = vtohs(au16);
08ca2aa3 1404 if (!checksum)
6e449a3a 1405 mPUSHu(au16);
73cb7263 1406 else if (checksum > bits_in_uv)
f337b084 1407 cdouble += (NV) au16;
73cb7263
NC
1408 else
1409 cuv += au16;
a6ec74c1
JH
1410 }
1411 break;
068bd2e7
MHM
1412 case 'v' | TYPE_IS_SHRIEKING:
1413 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1414 while (len-- > 0) {
08ca2aa3
TH
1415 I16 ai16;
1416# if U16SIZE > SIZE16
1417 ai16 = 0;
1418# endif
aaec8192 1419 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1420 /* There should never be any byte-swapping here. */
1421 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1422 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1423 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1424 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1425 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1426 if (!checksum)
6e449a3a 1427 mPUSHi(ai16);
73cb7263 1428 else if (checksum > bits_in_uv)
08ca2aa3 1429 cdouble += (NV) ai16;
73cb7263
NC
1430 else
1431 cuv += ai16;
068bd2e7
MHM
1432 }
1433 break;
a6ec74c1 1434 case 'i':
49704364 1435 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1436 while (len-- > 0) {
08ca2aa3 1437 int aint;
aaec8192 1438 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1439 if (!checksum)
6e449a3a 1440 mPUSHi(aint);
73cb7263
NC
1441 else if (checksum > bits_in_uv)
1442 cdouble += (NV)aint;
1443 else
1444 cuv += aint;
a6ec74c1
JH
1445 }
1446 break;
1447 case 'I':
49704364 1448 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1449 while (len-- > 0) {
08ca2aa3 1450 unsigned int auint;
aaec8192 1451 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1452 if (!checksum)
6e449a3a 1453 mPUSHu(auint);
73cb7263
NC
1454 else if (checksum > bits_in_uv)
1455 cdouble += (NV)auint;
1456 else
1457 cuv += auint;
a6ec74c1
JH
1458 }
1459 break;
92d41999 1460 case 'j':
73cb7263 1461 while (len-- > 0) {
08ca2aa3 1462 IV aiv;
aaec8192 1463 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1464 if (!checksum)
6e449a3a 1465 mPUSHi(aiv);
73cb7263
NC
1466 else if (checksum > bits_in_uv)
1467 cdouble += (NV)aiv;
1468 else
1469 cuv += aiv;
92d41999
JH
1470 }
1471 break;
1472 case 'J':
73cb7263 1473 while (len-- > 0) {
08ca2aa3 1474 UV auv;
aaec8192 1475 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1476 if (!checksum)
6e449a3a 1477 mPUSHu(auv);
73cb7263
NC
1478 else if (checksum > bits_in_uv)
1479 cdouble += (NV)auv;
1480 else
1481 cuv += auv;
92d41999
JH
1482 }
1483 break;
49704364
WL
1484 case 'l' | TYPE_IS_SHRIEKING:
1485#if LONGSIZE != SIZE32
73cb7263 1486 while (len-- > 0) {
08ca2aa3 1487 long along;
aaec8192 1488 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1489 if (!checksum)
6e449a3a 1490 mPUSHi(along);
73cb7263
NC
1491 else if (checksum > bits_in_uv)
1492 cdouble += (NV)along;
1493 else
1494 cuv += along;
49704364
WL
1495 }
1496 break;
1497#else
924ba076 1498 /* FALLTHROUGH */
a6ec74c1 1499#endif
49704364 1500 case 'l':
73cb7263 1501 while (len-- > 0) {
08ca2aa3
TH
1502 I32 ai32;
1503#if U32SIZE > SIZE32
1504 ai32 = 0;
1505#endif
aaec8192 1506 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1507#if U32SIZE > SIZE32
08ca2aa3 1508 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1509#endif
08ca2aa3 1510 if (!checksum)
6e449a3a 1511 mPUSHi(ai32);
73cb7263
NC
1512 else if (checksum > bits_in_uv)
1513 cdouble += (NV)ai32;
1514 else
1515 cuv += ai32;
a6ec74c1
JH
1516 }
1517 break;
49704364
WL
1518 case 'L' | TYPE_IS_SHRIEKING:
1519#if LONGSIZE != SIZE32
73cb7263 1520 while (len-- > 0) {
08ca2aa3 1521 unsigned long aulong;
aaec8192 1522 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1523 if (!checksum)
6e449a3a 1524 mPUSHu(aulong);
73cb7263
NC
1525 else if (checksum > bits_in_uv)
1526 cdouble += (NV)aulong;
1527 else
1528 cuv += aulong;
49704364
WL
1529 }
1530 break;
1531#else
924ba076 1532 /* FALLTHROUGH */
49704364 1533#endif
a6ec74c1
JH
1534 case 'V':
1535 case 'N':
1536 case 'L':
73cb7263 1537 while (len-- > 0) {
08ca2aa3
TH
1538 U32 au32;
1539#if U32SIZE > SIZE32
1540 au32 = 0;
1541#endif
aaec8192 1542 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1543 if (datumtype == 'N')
1544 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1545 if (datumtype == 'V')
1546 au32 = vtohl(au32);
08ca2aa3 1547 if (!checksum)
6e449a3a 1548 mPUSHu(au32);
fc241834
RGS
1549 else if (checksum > bits_in_uv)
1550 cdouble += (NV)au32;
1551 else
1552 cuv += au32;
a6ec74c1
JH
1553 }
1554 break;
068bd2e7
MHM
1555 case 'V' | TYPE_IS_SHRIEKING:
1556 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1557 while (len-- > 0) {
08ca2aa3 1558 I32 ai32;
f8e5a5db 1559#if U32SIZE > SIZE32
08ca2aa3 1560 ai32 = 0;
f8e5a5db 1561#endif
aaec8192 1562 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1563 /* There should never be any byte swapping here. */
1564 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1565 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1566 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1567 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1568 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1569 if (!checksum)
6e449a3a 1570 mPUSHi(ai32);
73cb7263
NC
1571 else if (checksum > bits_in_uv)
1572 cdouble += (NV)ai32;
1573 else
1574 cuv += ai32;
068bd2e7
MHM
1575 }
1576 break;
a6ec74c1 1577 case 'p':
a6ec74c1 1578 while (len-- > 0) {
f7fe979e 1579 const char *aptr;
aaec8192 1580 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1581 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1582 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1583 }
1584 break;
1585 case 'w':
a6ec74c1
JH
1586 {
1587 UV auv = 0;
1588 U32 bytes = 0;
fc241834 1589
08ca2aa3
TH
1590 while (len > 0 && s < strend) {
1591 U8 ch;
f337b084 1592 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1593 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1594 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1595 if (ch < 0x80) {
a6ec74c1 1596 bytes = 0;
6e449a3a 1597 mPUSHu(auv);
a6ec74c1
JH
1598 len--;
1599 auv = 0;
08ca2aa3 1600 continue;
a6ec74c1 1601 }
08ca2aa3 1602 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1603 const char *t;
a6ec74c1 1604
f5992bc4 1605 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1606 while (s < strend) {
f337b084 1607 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1608 sv = mul128(sv, (U8)(ch & 0x7f));
1609 if (!(ch & 0x80)) {
a6ec74c1
JH
1610 bytes = 0;
1611 break;
1612 }
1613 }
10516c54 1614 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1615 while (*t == '0')
1616 t++;
1617 sv_chop(sv, t);
6e449a3a 1618 mPUSHs(sv);
a6ec74c1
JH
1619 len--;
1620 auv = 0;
1621 }
1622 }
1623 if ((s >= strend) && bytes)
49704364 1624 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1625 }
1626 break;
1627 case 'P':
49704364
WL
1628 if (symptr->howlen == e_star)
1629 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1630 EXTEND(SP, 1);
2d3e0934 1631 if (s + sizeof(char*) <= strend) {
08ca2aa3 1632 char *aptr;
aaec8192 1633 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1634 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1635 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1636 }
a6ec74c1 1637 break;
c174bf3b 1638#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1639 case 'q':
73cb7263 1640 while (len-- > 0) {
08ca2aa3 1641 Quad_t aquad;
aaec8192 1642 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1643 if (!checksum)
c174bf3b 1644 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1647 else
1648 cuv += aquad;
1649 }
a6ec74c1
JH
1650 break;
1651 case 'Q':
73cb7263 1652 while (len-- > 0) {
08ca2aa3 1653 Uquad_t auquad;
aaec8192 1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1655 if (!checksum)
c174bf3b 1656 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1657 else if (checksum > bits_in_uv)
1658 cdouble += (NV)auquad;
1659 else
1660 cuv += auquad;
a6ec74c1
JH
1661 }
1662 break;
1640b983 1663#endif
a6ec74c1
JH
1664 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1665 case 'f':
73cb7263 1666 while (len-- > 0) {
08ca2aa3 1667 float afloat;
aaec8192 1668 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1669 if (!checksum)
6e449a3a 1670 mPUSHn(afloat);
08ca2aa3 1671 else
73cb7263 1672 cdouble += afloat;
fc241834 1673 }
a6ec74c1
JH
1674 break;
1675 case 'd':
73cb7263 1676 while (len-- > 0) {
08ca2aa3 1677 double adouble;
aaec8192 1678 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1679 if (!checksum)
6e449a3a 1680 mPUSHn(adouble);
08ca2aa3 1681 else
73cb7263 1682 cdouble += adouble;
fc241834 1683 }
a6ec74c1 1684 break;
92d41999 1685 case 'F':
73cb7263 1686 while (len-- > 0) {
275663fa 1687 NV_bytes anv;
aaec8192
NC
1688 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1689 datumtype, needs_swap);
08ca2aa3 1690 if (!checksum)
275663fa 1691 mPUSHn(anv.nv);
08ca2aa3 1692 else
275663fa 1693 cdouble += anv.nv;
fc241834 1694 }
92d41999
JH
1695 break;
1696#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1697 case 'D':
73cb7263 1698 while (len-- > 0) {
275663fa 1699 ld_bytes aldouble;
aaec8192
NC
1700 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1701 sizeof(aldouble.bytes), datumtype, needs_swap);
08ca2aa3 1702 if (!checksum)
275663fa 1703 mPUSHn(aldouble.ld);
08ca2aa3 1704 else
275663fa 1705 cdouble += aldouble.ld;
92d41999
JH
1706 }
1707 break;
1708#endif
a6ec74c1 1709 case 'u':
858fe5e1 1710 if (!checksum) {
f7fe979e 1711 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1712 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1713 if (l) SvPOK_on(sv);
1714 }
1715 if (utf8) {
1716 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1717 I32 a, b, c, d;
db187877 1718 char hunk[3];
08ca2aa3 1719
08ca2aa3
TH
1720 while (len > 0) {
1721 next_uni_uu(aTHX_ &s, strend, &a);
1722 next_uni_uu(aTHX_ &s, strend, &b);
1723 next_uni_uu(aTHX_ &s, strend, &c);
1724 next_uni_uu(aTHX_ &s, strend, &d);
1725 hunk[0] = (char)((a << 2) | (b >> 4));
1726 hunk[1] = (char)((b << 4) | (c >> 2));
1727 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1728 if (!checksum)
1729 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1730 len -= 3;
1731 }
1732 if (s < strend) {
f7fe979e
AL
1733 if (*s == '\n') {
1734 s++;
1735 }
08ca2aa3
TH
1736 else {
1737 /* possible checksum byte */
f7fe979e
AL
1738 const char *skip = s+UTF8SKIP(s);
1739 if (skip < strend && *skip == '\n')
1740 s = skip+1;
08ca2aa3
TH
1741 }
1742 }
1743 }
1744 } else {
fc241834
RGS
1745 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1746 I32 a, b, c, d;
db187877 1747 char hunk[3];
a6ec74c1 1748
fc241834
RGS
1749 len = PL_uudmap[*(U8*)s++] & 077;
1750 while (len > 0) {
1751 if (s < strend && ISUUCHAR(*s))
1752 a = PL_uudmap[*(U8*)s++] & 077;
1753 else
1754 a = 0;
1755 if (s < strend && ISUUCHAR(*s))
1756 b = PL_uudmap[*(U8*)s++] & 077;
1757 else
1758 b = 0;
1759 if (s < strend && ISUUCHAR(*s))
1760 c = PL_uudmap[*(U8*)s++] & 077;
1761 else
1762 c = 0;
1763 if (s < strend && ISUUCHAR(*s))
1764 d = PL_uudmap[*(U8*)s++] & 077;
1765 else
1766 d = 0;
1767 hunk[0] = (char)((a << 2) | (b >> 4));
1768 hunk[1] = (char)((b << 4) | (c >> 2));
1769 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1770 if (!checksum)
1771 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1772 len -= 3;
1773 }
1774 if (*s == '\n')
1775 s++;
1776 else /* possible checksum byte */
1777 if (s + 1 < strend && s[1] == '\n')
1778 s += 2;
a6ec74c1 1779 }
08ca2aa3 1780 }
858fe5e1
TC
1781 if (!checksum)
1782 XPUSHs(sv);
a6ec74c1
JH
1783 break;
1784 }
49704364 1785
a6ec74c1 1786 if (checksum) {
1109a392 1787 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1788 (checksum > bits_in_uv &&
08ca2aa3
TH
1789 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1790 NV trouble, anv;
a6ec74c1 1791
08ca2aa3 1792 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1793 while (checksum >= 16) {
1794 checksum -= 16;
08ca2aa3 1795 anv *= 65536.0;
a6ec74c1 1796 }
a6ec74c1 1797 while (cdouble < 0.0)
08ca2aa3
TH
1798 cdouble += anv;
1799 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1800 sv = newSVnv(cdouble);
a6ec74c1
JH
1801 }
1802 else {
fa8ec7c1
NC
1803 if (checksum < bits_in_uv) {
1804 UV mask = ((UV)1 << checksum) - 1;
92d41999 1805 cuv &= mask;
a6ec74c1 1806 }
c4c5f44a 1807 sv = newSVuv(cuv);
a6ec74c1 1808 }
6e449a3a 1809 mXPUSHs(sv);
a6ec74c1
JH
1810 checksum = 0;
1811 }
fc241834 1812
49704364
WL
1813 if (symptr->flags & FLAG_SLASH){
1814 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1815 break;
49704364
WL
1816 if( next_symbol(symptr) ){
1817 if( symptr->howlen == e_number )
1818 Perl_croak(aTHX_ "Count after length/code in unpack" );
1819 if( beyond ){
1820 /* ...end of char buffer then no decent length available */
1821 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1822 } else {
1823 /* take top of stack (hope it's numeric) */
1824 len = POPi;
1825 if( len < 0 )
1826 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1827 }
1828 } else {
1829 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1830 }
1831 datumtype = symptr->code;
21c16052 1832 explicit_length = FALSE;
49704364
WL
1833 goto redo_switch;
1834 }
a6ec74c1 1835 }
49704364 1836
18529408
IZ
1837 if (new_s)
1838 *new_s = s;
1839 PUTBACK;
1840 return SP - PL_stack_base - start_sp_offset;
1841}
1842
1843PP(pp_unpack)
1844{
97aff369 1845 dVAR;
18529408 1846 dSP;
bab9c0ac 1847 dPOPPOPssrl;
18529408
IZ
1848 I32 gimme = GIMME_V;
1849 STRLEN llen;
1850 STRLEN rlen;
5c144d81
NC
1851 const char *pat = SvPV_const(left, llen);
1852 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1853 const char *strend = s + rlen;
1854 const char *patend = pat + llen;
08ca2aa3 1855 I32 cnt;
18529408
IZ
1856
1857 PUTBACK;
7accc089 1858 cnt = unpackstring(pat, patend, s, strend,
49704364 1859 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1860 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1861
18529408
IZ
1862 SPAGAIN;
1863 if ( !cnt && gimme == G_SCALAR )
1864 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1865 RETURN;
1866}
1867
f337b084 1868STATIC U8 *
f7fe979e 1869doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1870{
f337b084 1871 *h++ = PL_uuemap[len];
a6ec74c1 1872 while (len > 2) {
f337b084
TH
1873 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1874 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1875 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1876 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1877 s += 3;
1878 len -= 3;
1879 }
1880 if (len > 0) {
f7fe979e 1881 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1882 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1883 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1884 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1885 *h++ = PL_uuemap[0];
a6ec74c1 1886 }
f337b084
TH
1887 *h++ = '\n';
1888 return h;
a6ec74c1
JH
1889}
1890
1891STATIC SV *
f7fe979e 1892S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1893{
8b6e33c7
AL
1894 SV *result = newSVpvn(s, l);
1895 char *const result_c = SvPV_nolen(result); /* convenience */
1896 char *out = result_c;
1897 bool skip = 1;
1898 bool ignore = 0;
a6ec74c1 1899
7918f24d
NC
1900 PERL_ARGS_ASSERT_IS_AN_INT;
1901
a6ec74c1
JH
1902 while (*s) {
1903 switch (*s) {
1904 case ' ':
1905 break;
1906 case '+':
1907 if (!skip) {
1908 SvREFCNT_dec(result);
1909 return (NULL);
1910 }
1911 break;
1912 case '0':
1913 case '1':
1914 case '2':
1915 case '3':
1916 case '4':
1917 case '5':
1918 case '6':
1919 case '7':
1920 case '8':
1921 case '9':
1922 skip = 0;
1923 if (!ignore) {
1924 *(out++) = *s;
1925 }
1926 break;
1927 case '.':
1928 ignore = 1;
1929 break;
1930 default:
1931 SvREFCNT_dec(result);
1932 return (NULL);
1933 }
1934 s++;
1935 }
1936 *(out++) = '\0';
1937 SvCUR_set(result, out - result_c);
1938 return (result);
1939}
1940
1941/* pnum must be '\0' terminated */
1942STATIC int
1943S_div128(pTHX_ SV *pnum, bool *done)
1944{
8b6e33c7
AL
1945 STRLEN len;
1946 char * const s = SvPV(pnum, len);
1947 char *t = s;
1948 int m = 0;
1949
7918f24d
NC
1950 PERL_ARGS_ASSERT_DIV128;
1951
8b6e33c7
AL
1952 *done = 1;
1953 while (*t) {
1954 const int i = m * 10 + (*t - '0');
1955 const int r = (i >> 7); /* r < 10 */
1956 m = i & 0x7F;
1957 if (r) {
1958 *done = 0;
1959 }
1960 *(t++) = '0' + r;
a6ec74c1 1961 }
8b6e33c7
AL
1962 *(t++) = '\0';
1963 SvCUR_set(pnum, (STRLEN) (t - s));
1964 return (m);
a6ec74c1
JH
1965}
1966
18529408 1967/*
7accc089
JH
1968=for apidoc packlist
1969
1970The engine implementing pack() Perl function.
1971
bfce84ec
AL
1972=cut
1973*/
7accc089
JH
1974
1975void
5aaab254 1976Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1977{
97aff369 1978 dVAR;
aadb217d
JH
1979 tempsym_t sym;
1980
7918f24d
NC
1981 PERL_ARGS_ASSERT_PACKLIST;
1982
f7fe979e 1983 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1984
f337b084
TH
1985 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1986 Also make sure any UTF8 flag is loaded */
56eb0262 1987 SvPV_force_nolen(cat);
bfce84ec
AL
1988 if (DO_UTF8(cat))
1989 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1990
49704364
WL
1991 (void)pack_rec( cat, &sym, beglist, endlist );
1992}
1993
f337b084
TH
1994/* like sv_utf8_upgrade, but also repoint the group start markers */
1995STATIC void
1996marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1997 STRLEN len;
1998 tempsym_t *group;
f7fe979e
AL
1999 const char *from_ptr, *from_start, *from_end, **marks, **m;
2000 char *to_start, *to_ptr;
f337b084
TH
2001
2002 if (SvUTF8(sv)) return;
2003
aa07b2f6 2004 from_start = SvPVX_const(sv);
f337b084
TH
2005 from_end = from_start + SvCUR(sv);
2006 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2007 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2008 if (from_ptr == from_end) {
2009 /* Simple case: no character needs to be changed */
2010 SvUTF8_on(sv);
2011 return;
2012 }
2013
3473cf63 2014 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2015 Newx(to_start, len, char);
f337b084
TH
2016 Copy(from_start, to_start, from_ptr-from_start, char);
2017 to_ptr = to_start + (from_ptr-from_start);
2018
a02a5408 2019 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2020 for (group=sym_ptr; group; group = group->previous)
2021 marks[group->level] = from_start + group->strbeg;
2022 marks[sym_ptr->level+1] = from_end+1;
2023 for (m = marks; *m < from_ptr; m++)
2024 *m = to_start + (*m-from_start);
2025
2026 for (;from_ptr < from_end; from_ptr++) {
2027 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2028 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2029 }
2030 *to_ptr = 0;
2031
2032 while (*m == from_ptr) *m++ = to_ptr;
2033 if (m != marks + sym_ptr->level+1) {
2034 Safefree(marks);
2035 Safefree(to_start);
5637ef5b
NC
2036 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2037 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2038 }
2039 for (group=sym_ptr; group; group = group->previous)
2040 group->strbeg = marks[group->level] - to_start;
2041 Safefree(marks);
2042
2043 if (SvOOK(sv)) {
2044 if (SvIVX(sv)) {
b162af07 2045 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2046 from_start -= SvIVX(sv);
2047 SvIV_set(sv, 0);
2048 }
2049 SvFLAGS(sv) &= ~SVf_OOK;
2050 }
2051 if (SvLEN(sv) != 0)
2052 Safefree(from_start);
f880fe2f 2053 SvPV_set(sv, to_start);
b162af07
SP
2054 SvCUR_set(sv, to_ptr - to_start);
2055 SvLEN_set(sv, len);
f337b084
TH
2056 SvUTF8_on(sv);
2057}
2058
2059/* Exponential string grower. Makes string extension effectively O(n)
2060 needed says how many extra bytes we need (not counting the final '\0')
2061 Only grows the string if there is an actual lack of space
2062*/
2063STATIC char *
0bd48802 2064S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2065 const STRLEN cur = SvCUR(sv);
2066 const STRLEN len = SvLEN(sv);
f337b084 2067 STRLEN extend;
7918f24d
NC
2068
2069 PERL_ARGS_ASSERT_SV_EXP_GROW;
2070
f337b084
TH
2071 if (len - cur > needed) return SvPVX(sv);
2072 extend = needed > len ? needed : len;
2073 return SvGROW(sv, len+extend+1);
2074}
49704364
WL
2075
2076STATIC
2077SV **
f337b084 2078S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2079{
97aff369 2080 dVAR;
49704364 2081 tempsym_t lookahead;
f337b084
TH
2082 I32 items = endlist - beglist;
2083 bool found = next_symbol(symptr);
2084 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2085 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2086 char* from;
f337b084 2087
7918f24d
NC
2088 PERL_ARGS_ASSERT_PACK_REC;
2089
f337b084
TH
2090 if (symptr->level == 0 && found && symptr->code == 'U') {
2091 marked_upgrade(aTHX_ cat, symptr);
2092 symptr->flags |= FLAG_DO_UTF8;
2093 utf8 = 0;
49704364 2094 }
f337b084 2095 symptr->strbeg = SvCUR(cat);
49704364
WL
2096
2097 while (found) {
f337b084
TH
2098 SV *fromstr;
2099 STRLEN fromlen;
2100 I32 len;
a0714e2c 2101 SV *lengthcode = NULL;
49704364 2102 I32 datumtype = symptr->code;
f337b084
TH
2103 howlen_t howlen = symptr->howlen;
2104 char *start = SvPVX(cat);
2105 char *cur = start + SvCUR(cat);
a1219b5e 2106 bool needs_swap;
49704364 2107
f337b084
TH
2108#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2109
2110 switch (howlen) {
fc241834 2111 case e_star:
f337b084
TH
2112 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2113 0 : items;
2114 break;
2115 default:
2116 /* e_no_len and e_number */
2117 len = symptr->length;
49704364
WL
2118 break;
2119 }
2120
f337b084 2121 if (len) {
a7a3cfaa 2122 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2123
a7a3cfaa
TH
2124 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2125 /* We can process this letter. */
2126 STRLEN size = props & PACK_SIZE_MASK;
2127 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2128 }
f337b084
TH
2129 }
2130
49704364
WL
2131 /* Look ahead for next symbol. Do we have code/code? */
2132 lookahead = *symptr;
2133 found = next_symbol(&lookahead);
246f24af
TH
2134 if (symptr->flags & FLAG_SLASH) {
2135 IV count;
f337b084 2136 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2137 if (strchr("aAZ", lookahead.code)) {
2138 if (lookahead.howlen == e_number) count = lookahead.length;
2139 else {
ce399ba6 2140 if (items > 0) {
48a5da33 2141 count = sv_len_utf8(*beglist);
ce399ba6 2142 }
246f24af
TH
2143 else count = 0;
2144 if (lookahead.code == 'Z') count++;
2145 }
2146 } else {
2147 if (lookahead.howlen == e_number && lookahead.length < items)
2148 count = lookahead.length;
2149 else count = items;
2150 }
2151 lookahead.howlen = e_number;
2152 lookahead.length = count;
2153 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2154 }
49704364 2155
a1219b5e
NC
2156 needs_swap = NEEDS_SWAP(datumtype);
2157
fc241834
RGS
2158 /* Code inside the switch must take care to properly update
2159 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2160 doesn't simply leave using break */
1109a392 2161 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2162 default:
f337b084
TH
2163 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2164 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2165 case '%':
49704364 2166 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2167
28be1210 2168 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2169 case '.':
2170 if (howlen == e_star) from = start;
2171 else if (len == 0) from = cur;
2172 else {
2173 tempsym_t *group = symptr;
2174
2175 while (--len && group) group = group->previous;
2176 from = group ? start + group->strbeg : start;
2177 }
2178 fromstr = NEXTFROM;
2179 len = SvIV(fromstr);
2180 goto resize;
28be1210 2181 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2182 case '@':
28be1210
TH
2183 from = start + symptr->strbeg;
2184 resize:
28be1210 2185 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2186 if (len >= 0) {
2187 while (len && from < cur) {
2188 from += UTF8SKIP(from);
2189 len--;
2190 }
2191 if (from > cur)
2192 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2193 if (len) {
2194 /* Here we know from == cur */
2195 grow:
2196 GROWING(0, cat, start, cur, len);
2197 Zero(cur, len, char);
2198 cur += len;
2199 } else if (from < cur) {
2200 len = cur - from;
2201 goto shrink;
2202 } else goto no_change;
2203 } else {
2204 cur = from;
2205 len = -len;
2206 goto utf8_shrink;
f337b084 2207 }
28be1210
TH
2208 else {
2209 len -= cur - from;
f337b084 2210 if (len > 0) goto grow;
28be1210 2211 if (len == 0) goto no_change;
fc241834 2212 len = -len;
28be1210 2213 goto shrink;
f337b084 2214 }
a6ec74c1 2215 break;
81d52ecd 2216
fc241834 2217 case '(': {
49704364 2218 tempsym_t savsym = *symptr;
66c611c5
MHM
2219 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2220 symptr->flags |= group_modifiers;
49704364
WL
2221 symptr->patend = savsym.grpend;
2222 symptr->level++;
f337b084 2223 symptr->previous = &lookahead;
18529408 2224 while (len--) {
f337b084
TH
2225 U32 was_utf8;
2226 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2227 else symptr->flags &= ~FLAG_PARSE_UTF8;
2228 was_utf8 = SvUTF8(cat);
49704364 2229 symptr->patptr = savsym.grpbeg;
f337b084
TH
2230 beglist = pack_rec(cat, symptr, beglist, endlist);
2231 if (SvUTF8(cat) != was_utf8)
2232 /* This had better be an upgrade while in utf8==0 mode */
2233 utf8 = 1;
2234
49704364 2235 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2236 break; /* No way to continue */
2237 }
ee790063 2238 items = endlist - beglist;
f337b084
TH
2239 lookahead.flags = symptr->flags & ~group_modifiers;
2240 goto no_change;
18529408 2241 }
62f95557
IZ
2242 case 'X' | TYPE_IS_SHRIEKING:
2243 if (!len) /* Avoid division by 0 */
2244 len = 1;
f337b084
TH
2245 if (utf8) {
2246 char *hop, *last;
2247 I32 l = len;
2248 hop = last = start;
2249 while (hop < cur) {
2250 hop += UTF8SKIP(hop);
2251 if (--l == 0) {
2252 last = hop;
2253 l = len;
2254 }
2255 }
2256 if (last > cur)
2257 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2258 cur = last;
2259 break;
2260 }
2261 len = (cur-start) % len;
924ba076 2262 /* FALLTHROUGH */
a6ec74c1 2263 case 'X':
f337b084
TH
2264 if (utf8) {
2265 if (len < 1) goto no_change;
28be1210 2266 utf8_shrink:
f337b084
TH
2267 while (len > 0) {
2268 if (cur <= start)
28be1210
TH
2269 Perl_croak(aTHX_ "'%c' outside of string in pack",
2270 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2271 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2272 if (cur <= start)
28be1210
TH
2273 Perl_croak(aTHX_ "'%c' outside of string in pack",
2274 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2275 }
2276 len--;
2277 }
2278 } else {
fc241834 2279 shrink:
f337b084 2280 if (cur - start < len)
28be1210
TH
2281 Perl_croak(aTHX_ "'%c' outside of string in pack",
2282 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2283 cur -= len;
2284 }
2285 if (cur < start+symptr->strbeg) {
2286 /* Make sure group starts don't point into the void */
2287 tempsym_t *group;
9e27e96a 2288 const STRLEN length = cur-start;
f337b084
TH
2289 for (group = symptr;
2290 group && length < group->strbeg;
2291 group = group->previous) group->strbeg = length;
2292 lookahead.strbeg = length;
2293 }
a6ec74c1 2294 break;
fc241834
RGS
2295 case 'x' | TYPE_IS_SHRIEKING: {
2296 I32 ai32;
62f95557
IZ
2297 if (!len) /* Avoid division by 0 */
2298 len = 1;
230e1fce 2299 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2300 else ai32 = (cur - start) % len;
2301 if (ai32 == 0) goto no_change;
2302 len -= ai32;
2303 }
924ba076 2304 /* FALLTHROUGH */
a6ec74c1 2305 case 'x':
f337b084 2306 goto grow;
a6ec74c1
JH
2307 case 'A':
2308 case 'Z':
f337b084 2309 case 'a': {
f7fe979e 2310 const char *aptr;
f337b084 2311
a6ec74c1 2312 fromstr = NEXTFROM;
e62f0680 2313 aptr = SvPV_const(fromstr, fromlen);
f337b084 2314 if (DO_UTF8(fromstr)) {
f7fe979e 2315 const char *end, *s;
f337b084
TH
2316
2317 if (!utf8 && !SvUTF8(cat)) {
2318 marked_upgrade(aTHX_ cat, symptr);
2319 lookahead.flags |= FLAG_DO_UTF8;
2320 lookahead.strbeg = symptr->strbeg;
2321 utf8 = 1;
2322 start = SvPVX(cat);
2323 cur = start + SvCUR(cat);
2324 }
fc241834 2325 if (howlen == e_star) {
f337b084
TH
2326 if (utf8) goto string_copy;
2327 len = fromlen+1;
2328 }
2329 s = aptr;
2330 end = aptr + fromlen;
2331 fromlen = datumtype == 'Z' ? len-1 : len;
2332 while ((I32) fromlen > 0 && s < end) {
2333 s += UTF8SKIP(s);
2334 fromlen--;
2335 }
2336 if (s > end)
2337 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2338 if (utf8) {
fc241834 2339 len = fromlen;
f337b084
TH
2340 if (datumtype == 'Z') len++;
2341 fromlen = s-aptr;
2342 len += fromlen;
fc241834 2343
f337b084 2344 goto string_copy;
fc241834 2345 }
f337b084
TH
2346 fromlen = len - fromlen;
2347 if (datumtype == 'Z') fromlen--;
2348 if (howlen == e_star) {
2349 len = fromlen;
2350 if (datumtype == 'Z') len++;
fc241834 2351 }
f337b084 2352 GROWING(0, cat, start, cur, len);
fc241834 2353 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2354 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2355 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2356 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2357 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2358 cur += fromlen;
a6ec74c1 2359 len -= fromlen;
f337b084
TH
2360 } else if (utf8) {
2361 if (howlen == e_star) {
2362 len = fromlen;
2363 if (datumtype == 'Z') len++;
a6ec74c1 2364 }
f337b084
TH
2365 if (len <= (I32) fromlen) {
2366 fromlen = len;
2367 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2368 }
fc241834 2369 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2370 upgrade, so:
2371 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2372 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2373 len -= fromlen;
2374 while (fromlen > 0) {
230e1fce 2375 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2376 aptr++;
2377 fromlen--;
fc241834 2378 }
f337b084
TH
2379 } else {
2380 string_copy:
2381 if (howlen == e_star) {
2382 len = fromlen;
2383 if (datumtype == 'Z') len++;
2384 }
2385 if (len <= (I32) fromlen) {
2386 fromlen = len;
2387 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2388 }
f337b084
TH
2389 GROWING(0, cat, start, cur, len);
2390 Copy(aptr, cur, fromlen, char);
2391 cur += fromlen;
2392 len -= fromlen;
a6ec74c1 2393 }
f337b084
TH
2394 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2395 cur += len;
3c4fb04a 2396 SvTAINT(cat);
a6ec74c1 2397 break;
f337b084 2398 }
a6ec74c1 2399 case 'B':
f337b084 2400 case 'b': {
b83604b4 2401 const char *str, *end;
f337b084
TH
2402 I32 l, field_len;
2403 U8 bits;
2404 bool utf8_source;
2405 U32 utf8_flags;
a6ec74c1 2406
fc241834 2407 fromstr = NEXTFROM;
b83604b4 2408 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2409 end = str + fromlen;
2410 if (DO_UTF8(fromstr)) {
2411 utf8_source = TRUE;
041457d9 2412 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2413 } else {
2414 utf8_source = FALSE;
2415 utf8_flags = 0; /* Unused, but keep compilers happy */
2416 }
2417 if (howlen == e_star) len = fromlen;
2418 field_len = (len+7)/8;
2419 GROWING(utf8, cat, start, cur, field_len);
2420 if (len > (I32)fromlen) len = fromlen;
2421 bits = 0;
2422 l = 0;
2423 if (datumtype == 'B')
2424 while (l++ < len) {
2425 if (utf8_source) {
95b63a38 2426 UV val = 0;
f337b084
TH
2427 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2428 bits |= val & 1;
2429 } else bits |= *str++ & 1;
2430 if (l & 7) bits <<= 1;
fc241834 2431 else {
f337b084
TH
2432 PUSH_BYTE(utf8, cur, bits);
2433 bits = 0;
a6ec74c1
JH
2434 }
2435 }
f337b084
TH
2436 else
2437 /* datumtype == 'b' */
2438 while (l++ < len) {
2439 if (utf8_source) {
95b63a38 2440 UV val = 0;
f337b084
TH
2441 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2442 if (val & 1) bits |= 0x80;
2443 } else if (*str++ & 1)
2444 bits |= 0x80;
2445 if (l & 7) bits >>= 1;
fc241834 2446 else {
f337b084
TH
2447 PUSH_BYTE(utf8, cur, bits);
2448 bits = 0;
a6ec74c1
JH
2449 }
2450 }
f337b084
TH
2451 l--;
2452 if (l & 7) {
fc241834 2453 if (datumtype == 'B')
f337b084 2454 bits <<= 7 - (l & 7);
fc241834 2455 else
f337b084
TH
2456 bits >>= 7 - (l & 7);
2457 PUSH_BYTE(utf8, cur, bits);
2458 l += 7;
a6ec74c1 2459 }
f337b084
TH
2460 /* Determine how many chars are left in the requested field */
2461 l /= 8;
2462 if (howlen == e_star) field_len = 0;
2463 else field_len -= l;
2464 Zero(cur, field_len, char);
2465 cur += field_len;
a6ec74c1 2466 break;
f337b084 2467 }
a6ec74c1 2468 case 'H':
f337b084 2469 case 'h': {
10516c54 2470 const char *str, *end;
f337b084
TH
2471 I32 l, field_len;
2472 U8 bits;
2473 bool utf8_source;
2474 U32 utf8_flags;
a6ec74c1 2475
fc241834 2476 fromstr = NEXTFROM;
10516c54 2477 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2478 end = str + fromlen;
2479 if (DO_UTF8(fromstr)) {
2480 utf8_source = TRUE;
041457d9 2481 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2482 } else {
2483 utf8_source = FALSE;
2484 utf8_flags = 0; /* Unused, but keep compilers happy */
2485 }
2486 if (howlen == e_star) len = fromlen;
2487 field_len = (len+1)/2;
2488 GROWING(utf8, cat, start, cur, field_len);
2489 if (!utf8 && len > (I32)fromlen) len = fromlen;
2490 bits = 0;
2491 l = 0;
2492 if (datumtype == 'H')
2493 while (l++ < len) {
2494 if (utf8_source) {
95b63a38 2495 UV val = 0;
f337b084
TH
2496 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2497 if (val < 256 && isALPHA(val))
2498 bits |= (val + 9) & 0xf;
a6ec74c1 2499 else
f337b084
TH
2500 bits |= val & 0xf;
2501 } else if (isALPHA(*str))
2502 bits |= (*str++ + 9) & 0xf;
2503 else
2504 bits |= *str++ & 0xf;
2505 if (l & 1) bits <<= 4;
fc241834 2506 else {
f337b084
TH
2507 PUSH_BYTE(utf8, cur, bits);
2508 bits = 0;
a6ec74c1
JH
2509 }
2510 }
f337b084
TH
2511 else
2512 while (l++ < len) {
2513 if (utf8_source) {
95b63a38 2514 UV val = 0;
f337b084
TH
2515 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2516 if (val < 256 && isALPHA(val))
2517 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2518 else
f337b084
TH
2519 bits |= (val & 0xf) << 4;
2520 } else if (isALPHA(*str))
2521 bits |= ((*str++ + 9) & 0xf) << 4;
2522 else
2523 bits |= (*str++ & 0xf) << 4;
2524 if (l & 1) bits >>= 4;
fc241834 2525 else {
f337b084
TH
2526 PUSH_BYTE(utf8, cur, bits);
2527 bits = 0;
a6ec74c1 2528 }
fc241834 2529 }
f337b084
TH
2530 l--;
2531 if (l & 1) {
2532 PUSH_BYTE(utf8, cur, bits);
2533 l++;
2534 }
2535 /* Determine how many chars are left in the requested field */
2536 l /= 2;
2537 if (howlen == e_star) field_len = 0;
2538 else field_len -= l;
2539 Zero(cur, field_len, char);
2540 cur += field_len;
2541 break;
fc241834
RGS
2542 }
2543 case 'c':
f337b084
TH
2544 while (len-- > 0) {
2545 IV aiv;
2546 fromstr = NEXTFROM;
2547 aiv = SvIV(fromstr);
a2a5de95
NC
2548 if ((-128 > aiv || aiv > 127))
2549 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2550 "Character in 'c' format wrapped in pack");
585ec06d 2551 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2552 }
2553 break;
2554 case 'C':
f337b084
TH
2555 if (len == 0) {
2556 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2557 break;
2558 }
a6ec74c1 2559 while (len-- > 0) {
f337b084 2560 IV aiv;
a6ec74c1 2561 fromstr = NEXTFROM;
f337b084 2562 aiv = SvIV(fromstr);
a2a5de95
NC
2563 if ((0 > aiv || aiv > 0xff))
2564 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2565 "Character in 'C' format wrapped in pack");
1651fc44 2566 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2567 }
fc241834
RGS
2568 break;
2569 case 'W': {
2570 char *end;
670f1322 2571 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2572
2573 end = start+SvLEN(cat)-1;
2574 if (utf8) end -= UTF8_MAXLEN-1;
2575 while (len-- > 0) {
2576 UV auv;
2577 fromstr = NEXTFROM;
2578 auv = SvUV(fromstr);
2579 if (in_bytes) auv = auv % 0x100;
2580 if (utf8) {
2581 W_utf8:
2582 if (cur > end) {
2583 *cur = '\0';
b162af07 2584 SvCUR_set(cat, cur - start);
fc241834
RGS
2585
2586 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2587 end = start+SvLEN(cat)-UTF8_MAXLEN;
2588 }
c80e42f3
KW
2589 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2590 auv,
041457d9 2591 warn_utf8 ?
230e1fce 2592 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2593 } else {
2594 if (auv >= 0x100) {
2595 if (!SvUTF8(cat)) {
2596 *cur = '\0';
b162af07 2597 SvCUR_set(cat, cur - start);
fc241834
RGS
2598 marked_upgrade(aTHX_ cat, symptr);
2599 lookahead.flags |= FLAG_DO_UTF8;
2600 lookahead.strbeg = symptr->strbeg;
2601 utf8 = 1;
2602 start = SvPVX(cat);
2603 cur = start + SvCUR(cat);
2604 end = start+SvLEN(cat)-UTF8_MAXLEN;
2605 goto W_utf8;
2606 }
a2a5de95
NC
2607 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2608 "Character in 'W' format wrapped in pack");
fc241834
RGS
2609 auv &= 0xff;
2610 }
2611 if (cur >= end) {
2612 *cur = '\0';
b162af07 2613 SvCUR_set(cat, cur - start);
fc241834
RGS
2614 GROWING(0, cat, start, cur, len+1);
2615 end = start+SvLEN(cat)-1;
2616 }
fe2774ed 2617 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2618 }
2619 }
2620 break;
fc241834
RGS
2621 }
2622 case 'U': {
2623 char *end;
2624
2625 if (len == 0) {
2626 if (!(symptr->flags & FLAG_DO_UTF8)) {
2627 marked_upgrade(aTHX_ cat, symptr);
2628 lookahead.flags |= FLAG_DO_UTF8;
2629 lookahead.strbeg = symptr->strbeg;
2630 }
2631 utf8 = 0;
2632 goto no_change;
2633 }
2634
2635 end = start+SvLEN(cat);
2636 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2637 while (len-- > 0) {
fc241834 2638 UV auv;
a6ec74c1 2639 fromstr = NEXTFROM;
fc241834
RGS
2640 auv = SvUV(fromstr);
2641 if (utf8) {
230e1fce 2642 U8 buffer[UTF8_MAXLEN], *endb;
c80e42f3 2643 endb = uvchr_to_utf8_flags(buffer, auv,
041457d9 2644 warn_utf8 ?
fc241834
RGS
2645 0 : UNICODE_ALLOW_ANY);
2646 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2647 *cur = '\0';
b162af07 2648 SvCUR_set(cat, cur - start);
fc241834
RGS
2649 GROWING(0, cat, start, cur,
2650 len+(endb-buffer)*UTF8_EXPAND);
2651 end = start+SvLEN(cat);
2652 }
3a88beaa 2653 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
fc241834
RGS
2654 } else {
2655 if (cur >= end) {
2656 *cur = '\0';
b162af07 2657 SvCUR_set(cat, cur - start);
fc241834
RGS
2658 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2659 end = start+SvLEN(cat)-UTF8_MAXLEN;
2660 }
c80e42f3 2661 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
041457d9 2662 warn_utf8 ?
230e1fce 2663 0 : UNICODE_ALLOW_ANY);
fc241834 2664 }
a6ec74c1 2665 }
a6ec74c1 2666 break;
fc241834 2667 }
a6ec74c1
JH
2668 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2669 case 'f':
a6ec74c1 2670 while (len-- > 0) {
f337b084
TH
2671 float afloat;
2672 NV anv;
a6ec74c1 2673 fromstr = NEXTFROM;
f337b084 2674 anv = SvNV(fromstr);
85bba25f 2675# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2676 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2677 * on Alpha; fake it if we don't have them.
2678 */
f337b084 2679 if (anv > FLT_MAX)
fc241834 2680 afloat = FLT_MAX;
f337b084 2681 else if (anv < -FLT_MAX)
fc241834 2682 afloat = -FLT_MAX;
f337b084 2683 else afloat = (float)anv;
baf3cf9c 2684# else
f337b084 2685 afloat = (float)anv;
baf3cf9c 2686# endif
3a88beaa 2687 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2688 }
2689 break;
2690 case 'd':
a6ec74c1 2691 while (len-- > 0) {
f337b084
TH
2692 double adouble;
2693 NV anv;
a6ec74c1 2694 fromstr = NEXTFROM;
f337b084 2695 anv = SvNV(fromstr);
85bba25f 2696# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2697 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2698 * on Alpha; fake it if we don't have them.
2699 */
f337b084 2700 if (anv > DBL_MAX)
fc241834 2701 adouble = DBL_MAX;
f337b084 2702 else if (anv < -DBL_MAX)
fc241834 2703 adouble = -DBL_MAX;
f337b084 2704 else adouble = (double)anv;
baf3cf9c 2705# else
f337b084 2706 adouble = (double)anv;
baf3cf9c 2707# endif
3a88beaa 2708 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2709 }
2710 break;
fc241834 2711 case 'F': {
275663fa 2712 NV_bytes anv;
1109a392 2713 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2714 while (len-- > 0) {
2715 fromstr = NEXTFROM;
cd07c537
DM
2716#ifdef __GNUC__
2717 /* to work round a gcc/x86 bug; don't use SvNV */
2718 anv.nv = sv_2nv(fromstr);
2719#else
275663fa 2720 anv.nv = SvNV(fromstr);
cd07c537 2721#endif
3a88beaa 2722 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2723 }
2724 break;
fc241834 2725 }
92d41999 2726#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2727 case 'D': {
275663fa 2728 ld_bytes aldouble;
1109a392
MHM
2729 /* long doubles can have unused bits, which may be nonzero */
2730 Zero(&aldouble, 1, long double);
92d41999
JH
2731 while (len-- > 0) {
2732 fromstr = NEXTFROM;
cd07c537
DM
2733# ifdef __GNUC__
2734 /* to work round a gcc/x86 bug; don't use SvNV */
2735 aldouble.ld = (long double)sv_2nv(fromstr);
2736# else
275663fa 2737 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2738# endif
3a88beaa
NC
2739 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2740 needs_swap);
92d41999
JH
2741 }
2742 break;
fc241834 2743 }
92d41999 2744#endif
068bd2e7 2745 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2746 case 'n':
2747 while (len-- > 0) {
f337b084 2748 I16 ai16;
a6ec74c1 2749 fromstr = NEXTFROM;
ef108786 2750 ai16 = (I16)SvIV(fromstr);
ef108786 2751 ai16 = PerlSock_htons(ai16);
3a88beaa 2752 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2753 }
2754 break;
068bd2e7 2755 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2756 case 'v':
2757 while (len-- > 0) {
f337b084 2758 I16 ai16;
a6ec74c1 2759 fromstr = NEXTFROM;
ef108786 2760 ai16 = (I16)SvIV(fromstr);
ef108786 2761 ai16 = htovs(ai16);
3a88beaa 2762 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2763 }
2764 break;
49704364 2765 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2766#if SHORTSIZE != SIZE16
fc241834 2767 while (len-- > 0) {
f337b084 2768 unsigned short aushort;
fc241834
RGS
2769 fromstr = NEXTFROM;
2770 aushort = SvUV(fromstr);
3a88beaa 2771 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2772 }
49704364
WL
2773 break;
2774#else
924ba076 2775 /* FALLTHROUGH */
a6ec74c1 2776#endif
49704364 2777 case 'S':
fc241834 2778 while (len-- > 0) {
f337b084 2779 U16 au16;
fc241834
RGS
2780 fromstr = NEXTFROM;
2781 au16 = (U16)SvUV(fromstr);
3a88beaa 2782 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2783 }
2784 break;
49704364 2785 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2786#if SHORTSIZE != SIZE16
fc241834 2787 while (len-- > 0) {
f337b084 2788 short ashort;
fc241834
RGS
2789 fromstr = NEXTFROM;
2790 ashort = SvIV(fromstr);
3a88beaa 2791 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2792 }
49704364
WL
2793 break;
2794#else
924ba076 2795 /* FALLTHROUGH */
a6ec74c1 2796#endif
49704364
WL
2797 case 's':
2798 while (len-- > 0) {
f337b084 2799 I16 ai16;
49704364 2800 fromstr = NEXTFROM;
ef108786 2801 ai16 = (I16)SvIV(fromstr);
3a88beaa 2802 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2803 }
2804 break;
2805 case 'I':
49704364 2806 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2807 while (len-- > 0) {
f337b084 2808 unsigned int auint;
a6ec74c1
JH
2809 fromstr = NEXTFROM;
2810 auint = SvUV(fromstr);
3a88beaa 2811 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2812 }
2813 break;
92d41999
JH
2814 case 'j':
2815 while (len-- > 0) {
f337b084 2816 IV aiv;
92d41999
JH
2817 fromstr = NEXTFROM;
2818 aiv = SvIV(fromstr);
3a88beaa 2819 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2820 }
2821 break;
2822 case 'J':
2823 while (len-- > 0) {
f337b084 2824 UV auv;
92d41999
JH
2825 fromstr = NEXTFROM;
2826 auv = SvUV(fromstr);
3a88beaa 2827 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2828 }
2829 break;
a6ec74c1
JH
2830 case 'w':
2831 while (len-- > 0) {
f337b084 2832 NV anv;
a6ec74c1 2833 fromstr = NEXTFROM;
15e9f109 2834 anv = SvNV(fromstr);
a6ec74c1 2835
f337b084
TH
2836 if (anv < 0) {
2837 *cur = '\0';
b162af07 2838 SvCUR_set(cat, cur - start);
49704364 2839 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2840 }
a6ec74c1 2841
196b62db
NC
2842 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2843 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2844 any negative IVs will have already been got by the croak()
2845 above. IOK is untrue for fractions, so we test them
2846 against UV_MAX_P1. */
f337b084
TH
2847 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2848 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2849 char *in = buf + sizeof(buf);
196b62db 2850 UV auv = SvUV(fromstr);
a6ec74c1
JH
2851
2852 do {
eb160463 2853 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2854 auv >>= 7;
2855 } while (auv);
2856 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2857 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2858 in, (buf + sizeof(buf)) - in);
2859 } else if (SvPOKp(fromstr))
2860 goto w_string;
a6ec74c1 2861 else if (SvNOKp(fromstr)) {
0258719b 2862 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2863 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2864 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2865 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2866 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2867 Some C compilers are strict about integral constant
2868 expressions so we conservatively divide by a slightly
2869 smaller integer instead of multiplying by the exact
2870 floating-point value.
0258719b
NC
2871 */
2872#ifdef NV_MAX_10_EXP
f337b084 2873 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2874 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2875#else
f337b084 2876 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2877 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2878#endif
a6ec74c1
JH
2879 char *in = buf + sizeof(buf);
2880
8b6e33c7 2881 anv = Perl_floor(anv);
a6ec74c1 2882 do {
8b6e33c7 2883 const NV next = Perl_floor(anv / 128);
a6ec74c1 2884 if (in <= buf) /* this cannot happen ;-) */
49704364 2885 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2886 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2887 anv = next;
2888 } while (anv > 0);
a6ec74c1 2889 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2890 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2891 in, (buf + sizeof(buf)) - in);
2892 } else {
8b6e33c7
AL
2893 const char *from;
2894 char *result, *in;
735b914b
JH
2895 SV *norm;
2896 STRLEN len;
2897 bool done;
2898
f337b084 2899 w_string:
735b914b 2900 /* Copy string and check for compliance */
349d4f2f 2901 from = SvPV_const(fromstr, len);
735b914b 2902 if ((norm = is_an_int(from, len)) == NULL)
49704364 2903 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2904
a02a5408 2905 Newx(result, len, char);
735b914b
JH
2906 in = result + len;
2907 done = FALSE;
f337b084 2908 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2909 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2910 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2911 in, (result + len) - in);
735b914b
JH
2912 Safefree(result);
2913 SvREFCNT_dec(norm); /* free norm */
fc241834 2914 }
a6ec74c1
JH
2915 }
2916 break;
2917 case 'i':
49704364 2918 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2919 while (len-- > 0) {
f337b084 2920 int aint;
a6ec74c1
JH
2921 fromstr = NEXTFROM;
2922 aint = SvIV(fromstr);
3a88beaa 2923 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2924 }
2925 break;
068bd2e7 2926 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2927 case 'N':
2928 while (len-- > 0) {
f337b084 2929 U32 au32;
a6ec74c1 2930 fromstr = NEXTFROM;
ef108786 2931 au32 = SvUV(fromstr);
ef108786 2932 au32 = PerlSock_htonl(au32);
3a88beaa 2933 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2934 }
2935 break;
068bd2e7 2936 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2937 case 'V':
2938 while (len-- > 0) {
f337b084 2939 U32 au32;
a6ec74c1 2940 fromstr = NEXTFROM;
ef108786 2941 au32 = SvUV(fromstr);
ef108786 2942 au32 = htovl(au32);
3a88beaa 2943 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2944 }
2945 break;
49704364 2946 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2947#if LONGSIZE != SIZE32
fc241834 2948 while (len-- > 0) {
f337b084 2949 unsigned long aulong;
fc241834
RGS
2950 fromstr = NEXTFROM;
2951 aulong = SvUV(fromstr);
3a88beaa 2952 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2953 }
49704364
WL
2954 break;
2955#else
2956 /* Fall though! */
a6ec74c1 2957#endif
49704364 2958 case 'L':
fc241834 2959 while (len-- > 0) {
f337b084 2960 U32 au32;
fc241834
RGS
2961 fromstr = NEXTFROM;
2962 au32 = SvUV(fromstr);
3a88beaa 2963 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2964 }
2965 break;
49704364 2966 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2967#if LONGSIZE != SIZE32
fc241834 2968 while (len-- > 0) {
f337b084 2969 long along;
fc241834
RGS
2970 fromstr = NEXTFROM;
2971 along = SvIV(fromstr);
3a88beaa 2972 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2973 }
49704364
WL
2974 break;
2975#else
2976 /* Fall though! */
a6ec74c1 2977#endif
49704364
WL
2978 case 'l':
2979 while (len-- > 0) {
f337b084 2980 I32 ai32;
49704364 2981 fromstr = NEXTFROM;
ef108786 2982 ai32 = SvIV(fromstr);
3a88beaa 2983 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
2984 }
2985 break;
c174bf3b 2986#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
2987 case 'Q':
2988 while (len-- > 0) {
f337b084 2989 Uquad_t auquad;
a6ec74c1 2990 fromstr = NEXTFROM;
f337b084 2991 auquad = (Uquad_t) SvUV(fromstr);
3a88beaa 2992 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
2993 }
2994 break;
2995 case 'q':
2996 while (len-- > 0) {
f337b084 2997 Quad_t aquad;
a6ec74c1
JH
2998 fromstr = NEXTFROM;
2999 aquad = (Quad_t)SvIV(fromstr);
3a88beaa 3000 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3001 }
3002 break;
1640b983 3003#endif
a6ec74c1
JH
3004 case 'P':
3005 len = 1; /* assume SV is correct length */
f337b084 3006 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3007 /* FALLTHROUGH */
a6ec74c1
JH
3008 case 'p':
3009 while (len-- > 0) {
83003860 3010 const char *aptr;
f337b084 3011
a6ec74c1 3012 fromstr = NEXTFROM;
28a4f200
TH
3013 SvGETMAGIC(fromstr);
3014 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3015 else {
a6ec74c1
JH
3016 /* XXX better yet, could spirit away the string to
3017 * a safe spot and hang on to it until the result
3018 * of pack() (and all copies of the result) are
3019 * gone.
3020 */
041457d9 3021 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3022 !SvREADONLY(fromstr)))) {
3023 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3024 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3025 }
3026 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3027 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3028 else
2596d9fe 3029 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3030 }
3a88beaa 3031 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3032 }
3033 break;
fc241834 3034 case 'u': {
f7fe979e 3035 const char *aptr, *aend;
fc241834 3036 bool from_utf8;
f337b084 3037
a6ec74c1 3038 fromstr = NEXTFROM;
fc241834
RGS
3039 if (len <= 2) len = 45;
3040 else len = len / 3 * 3;
3041 if (len >= 64) {
a2a5de95
NC
3042 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3043 "Field too wide in 'u' format in pack");
fc241834
RGS
3044 len = 63;
3045 }
83003860 3046 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3047 from_utf8 = DO_UTF8(fromstr);
3048 if (from_utf8) {
3049 aend = aptr + fromlen;
3f63b0e5 3050 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3051 } else aend = NULL; /* Unused, but keep compilers happy */
3052 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3053 while (fromlen > 0) {
fc241834 3054 U8 *end;
a6ec74c1 3055 I32 todo;
fc241834 3056 U8 hunk[1+63/3*4+1];
a6ec74c1 3057
eb160463 3058 if ((I32)fromlen > len)
a6ec74c1
JH
3059 todo = len;
3060 else
3061 todo = fromlen;
fc241834
RGS
3062 if (from_utf8) {
3063 char buffer[64];
3064 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3065 'u' | TYPE_IS_PACK)) {
3066 *cur = '\0';
b162af07 3067 SvCUR_set(cat, cur - start);
5637ef5b
NC
3068 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3069 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3070 aptr, aend, buffer, (long) todo);
fc241834
RGS
3071 }
3072 end = doencodes(hunk, buffer, todo);
3073 } else {
3074 end = doencodes(hunk, aptr, todo);
3075 aptr += todo;
3076 }
3a88beaa 3077 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3078 fromlen -= todo;
3079 }
a6ec74c1
JH
3080 break;
3081 }
f337b084
TH
3082 }
3083 *cur = '\0';
b162af07 3084 SvCUR_set(cat, cur - start);
f337b084 3085 no_change:
49704364 3086 *symptr = lookahead;
a6ec74c1 3087 }
49704364 3088 return beglist;
18529408
IZ
3089}
3090#undef NEXTFROM
3091
3092
3093PP(pp_pack)
3094{
97aff369 3095 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3096 SV *cat = TARG;
18529408 3097 STRLEN fromlen;
349d4f2f 3098 SV *pat_sv = *++MARK;
eb578fdb
KW
3099 const char *pat = SvPV_const(pat_sv, fromlen);
3100 const char *patend = pat + fromlen;
18529408
IZ
3101
3102 MARK++;
76f68e9b 3103 sv_setpvs(cat, "");
f337b084 3104 SvUTF8_off(cat);
18529408 3105
7accc089 3106 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3107
a6ec74c1
JH
3108 SvSETMAGIC(cat);
3109 SP = ORIGMARK;
3110 PUSHs(cat);
3111 RETURN;
3112}
a6ec74c1 3113
73cb7263
NC
3114/*
3115 * Local variables:
3116 * c-indentation-style: bsd
3117 * c-basic-offset: 4
14d04a33 3118 * indent-tabs-mode: nil
73cb7263
NC
3119 * End:
3120 *
14d04a33 3121 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3122 */