This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Annotate the "cold" paths in pp_pack.c with UNLIKELY().
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
f337b084 154 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e
NC
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
78d46eaa 244#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
246#define PACK_SIZE_MASK 0x3F
247
298bc19c 248#include "packsizetables.c"
78d46eaa 249
20aa3a38
NC
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
08ca2aa3 258STATIC U8
f7fe979e 259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 260{
08ca2aa3 261 STRLEN retlen;
0bcc34c2 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 264 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 270 if (val >= 0x100) {
a2a5de95
NC
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
274 val &= 0xff;
275 }
276 *s += retlen;
fe2774ed 277 return (U8)val;
08ca2aa3
TH
278}
279
f337b084
TH
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
282 *(U8 *)(s)++)
283
284STATIC bool
f7fe979e 285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
286{
287 UV val;
288 STRLEN retlen;
f7fe979e 289 const char *from = *s;
08ca2aa3 290 int bad = 0;
f7fe979e 291 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
228e69a7 295 if (UNLIKELY(needs_swap))
20aa3a38
NC
296 buf += buf_len;
297
08ca2aa3
TH
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
f337b084 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
228e69a7 309 if (UNLIKELY(needs_swap))
20aa3a38
NC
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
f7fe979e 318 const char *ptr;
9e27e96a 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
f337b084 322 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
323 }
324 if (from > end) from = end;
325 }
a2a5de95
NC
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 328 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
f7fe979e 338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 339{
97aff369 340 dVAR;
08ca2aa3 341 STRLEN retlen;
0bcc34c2 342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
345 *out = 0;
346 return FALSE;
347 }
348 *out = PL_uudmap[val] & 077;
f337b084 349 *s += retlen;
08ca2aa3
TH
350 return TRUE;
351}
78d46eaa 352
64844641 353STATIC char *
3a88beaa 354S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
7918f24d
NC
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
356
228e69a7 357 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
358 const U8 *p = start + len;
359 while (p-- > start) {
360 const UV uv = NATIVE_TO_ASCII(*p);
361 if (UNI_IS_INVARIANT(uv))
362 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
363 else {
364 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
365 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
366 }
367 }
368 } else {
369 const U8 * const end = start + len;
370 while (start < end) {
371 const UV uv = NATIVE_TO_ASCII(*start);
372 if (UNI_IS_INVARIANT(uv))
373 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
374 else {
375 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
376 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
377 }
378 start++;
379 }
f337b084 380 }
64844641 381 return dest;
f337b084
TH
382}
383
3a88beaa 384#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 385STMT_START { \
228e69a7 386 if (UNLIKELY(utf8)) \
3a88beaa 387 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
230e1fce 388 else { \
228e69a7 389 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
390 S_reverse_copy((char *)(buf), cur, len); \
391 else \
392 Copy(buf, cur, len, char); \
230e1fce
NC
393 (cur) += (len); \
394 } \
f337b084
TH
395} STMT_END
396
397#define GROWING(utf8, cat, start, cur, in_len) \
398STMT_START { \
399 STRLEN glen = (in_len); \
3473cf63 400 if (utf8) glen *= UTF8_EXPAND; \
f337b084 401 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 402 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
403 (cur) = (start) + SvCUR(cat); \
404 } \
405} STMT_END
406
407#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
408STMT_START { \
f7fe979e 409 const STRLEN glen = (in_len); \
f337b084 410 STRLEN gl = glen; \
3473cf63 411 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
412 if ((cur) + gl >= (start) + SvLEN(cat)) { \
413 *cur = '\0'; \
b162af07 414 SvCUR_set((cat), (cur) - (start)); \
0bd48802 415 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
416 (cur) = (start) + SvCUR(cat); \
417 } \
3a88beaa 418 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
419} STMT_END
420
421#define PUSH_BYTE(utf8, s, byte) \
422STMT_START { \
423 if (utf8) { \
f7fe979e 424 const U8 au8 = (byte); \
3a88beaa 425 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
f337b084
TH
426 } else *(U8 *)(s)++ = (byte); \
427} STMT_END
428
429/* Only to be used inside a loop (see the break) */
430#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
431STMT_START { \
432 STRLEN retlen; \
433 if (str >= end) break; \
434 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
435 if (retlen == (STRLEN) -1 || retlen == 0) { \
436 *cur = '\0'; \
437 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
438 } \
439 str += retlen; \
440} STMT_END
441
f7fe979e
AL
442static const char *_action( const tempsym_t* symptr )
443{
10edeb5d 444 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
445}
446
206947d2 447/* Returns the sizeof() struct described by pat */
028d1f6d 448STATIC I32
f337b084 449S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 450{
f337b084 451 I32 total = 0;
206947d2 452
7918f24d
NC
453 PERL_ARGS_ASSERT_MEASURE_STRUCT;
454
49704364 455 while (next_symbol(symptr)) {
f337b084 456 I32 len;
f7fe979e 457 int size;
f337b084
TH
458
459 switch (symptr->howlen) {
fc241834 460 case e_star:
49704364 461 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 462 _action( symptr ) );
49704364 463 break;
f337b084
TH
464 default:
465 /* e_no_len and e_number */
466 len = symptr->length;
467 break;
49704364
WL
468 }
469
a7a3cfaa 470 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 471 if (!size) {
f7fe979e 472 int star;
80a13697
NC
473 /* endianness doesn't influence the size of a type */
474 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
475 default:
476 Perl_croak(aTHX_ "Invalid type '%c' in %s",
477 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 478 _action( symptr ) );
28be1210
TH
479 case '.' | TYPE_IS_SHRIEKING:
480 case '@' | TYPE_IS_SHRIEKING:
80a13697 481 case '@':
28be1210 482 case '.':
80a13697
NC
483 case '/':
484 case 'U': /* XXXX Is it correct? */
485 case 'w':
486 case 'u':
487 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 488 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 489 _action( symptr ) );
80a13697
NC
490 case '%':
491 size = 0;
492 break;
493 case '(':
fc241834
RGS
494 {
495 tempsym_t savsym = *symptr;
496 symptr->patptr = savsym.grpbeg;
497 symptr->patend = savsym.grpend;
498 /* XXXX Theoretically, we need to measure many times at
499 different positions, since the subexpression may contain
500 alignment commands, but be not of aligned length.
501 Need to detect this and croak(). */
502 size = measure_struct(symptr);
503 *symptr = savsym;
504 break;
505 }
80a13697
NC
506 case 'X' | TYPE_IS_SHRIEKING:
507 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
508 */
509 if (!len) /* Avoid division by 0 */
510 len = 1;
511 len = total % len; /* Assumed: the start is aligned. */
512 /* FALL THROUGH */
513 case 'X':
514 size = -1;
515 if (total < len)
f7fe979e 516 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
517 break;
518 case 'x' | TYPE_IS_SHRIEKING:
519 if (!len) /* Avoid division by 0 */
520 len = 1;
521 star = total % len; /* Assumed: the start is aligned. */
522 if (star) /* Other portable ways? */
523 len = len - star;
524 else
525 len = 0;
526 /* FALL THROUGH */
527 case 'x':
528 case 'A':
529 case 'Z':
530 case 'a':
80a13697
NC
531 size = 1;
532 break;
533 case 'B':
534 case 'b':
535 len = (len + 7)/8;
536 size = 1;
537 break;
538 case 'H':
539 case 'h':
540 len = (len + 1)/2;
541 size = 1;
542 break;
78d46eaa 543
80a13697
NC
544 case 'P':
545 len = 1;
546 size = sizeof(char*);
78d46eaa
NC
547 break;
548 }
206947d2
IZ
549 }
550 total += len * size;
551 }
552 return total;
553}
554
49704364
WL
555
556/* locate matching closing parenthesis or bracket
557 * returns char pointer to char after match, or NULL
558 */
f7fe979e 559STATIC const char *
5aaab254 560S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 561{
7918f24d
NC
562 PERL_ARGS_ASSERT_GROUP_END;
563
49704364 564 while (patptr < patend) {
f7fe979e 565 const char c = *patptr++;
49704364
WL
566
567 if (isSPACE(c))
568 continue;
569 else if (c == ender)
570 return patptr-1;
571 else if (c == '#') {
572 while (patptr < patend && *patptr != '\n')
573 patptr++;
574 continue;
575 } else if (c == '(')
576 patptr = group_end(patptr, patend, ')') + 1;
577 else if (c == '[')
578 patptr = group_end(patptr, patend, ']') + 1;
18529408 579 }
49704364
WL
580 Perl_croak(aTHX_ "No group ending character '%c' found in template",
581 ender);
582 return 0;
18529408
IZ
583}
584
49704364
WL
585
586/* Convert unsigned decimal number to binary.
587 * Expects a pointer to the first digit and address of length variable
588 * Advances char pointer to 1st non-digit char and returns number
fc241834 589 */
f7fe979e 590STATIC const char *
5aaab254 591S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
592{
593 I32 len = *patptr++ - '0';
7918f24d
NC
594
595 PERL_ARGS_ASSERT_GET_NUM;
596
49704364
WL
597 while (isDIGIT(*patptr)) {
598 if (len >= 0x7FFFFFFF/10)
599 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
600 len = (len * 10) + (*patptr++ - '0');
601 }
602 *lenptr = len;
603 return patptr;
604}
605
606/* The marvellous template parsing routine: Using state stored in *symptr,
607 * locates next template code and count
608 */
609STATIC bool
f337b084 610S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 611{
f7fe979e 612 const char* patptr = symptr->patptr;
0bcc34c2 613 const char* const patend = symptr->patend;
49704364 614
7918f24d
NC
615 PERL_ARGS_ASSERT_NEXT_SYMBOL;
616
49704364
WL
617 symptr->flags &= ~FLAG_SLASH;
618
619 while (patptr < patend) {
620 if (isSPACE(*patptr))
621 patptr++;
622 else if (*patptr == '#') {
623 patptr++;
624 while (patptr < patend && *patptr != '\n')
625 patptr++;
626 if (patptr < patend)
627 patptr++;
628 } else {
fc241834 629 /* We should have found a template code */
49704364 630 I32 code = *patptr++ & 0xFF;
66c611c5 631 U32 inherited_modifiers = 0;
49704364
WL
632
633 if (code == ','){ /* grandfather in commas but with a warning */
634 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
635 symptr->flags |= FLAG_COMMA;
636 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 637 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
638 }
639 continue;
640 }
fc241834 641
49704364 642 /* for '(', skip to ')' */
fc241834 643 if (code == '(') {
49704364
WL
644 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
645 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 646 _action( symptr ) );
49704364
WL
647 symptr->grpbeg = patptr;
648 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
649 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
650 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 651 _action( symptr ) );
49704364
WL
652 }
653
66c611c5
MHM
654 /* look for group modifiers to inherit */
655 if (TYPE_ENDIANNESS(symptr->flags)) {
656 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
657 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
658 }
659
1109a392
MHM
660 /* look for modifiers */
661 while (patptr < patend) {
b7787f18
AL
662 const char *allowed;
663 I32 modifier;
1109a392
MHM
664 switch (*patptr) {
665 case '!':
666 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 667 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
668 break;
669 case '>':
670 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 671 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
672 break;
673 case '<':
674 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 675 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
676 break;
677 default:
b7787f18
AL
678 allowed = "";
679 modifier = 0;
1109a392
MHM
680 break;
681 }
66c611c5 682
1109a392
MHM
683 if (modifier == 0)
684 break;
66c611c5 685
1109a392
MHM
686 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
687 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 688 allowed, _action( symptr ) );
66c611c5
MHM
689
690 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 691 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 692 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
693 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
694 TYPE_ENDIANNESS_MASK)
695 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 696 *patptr, _action( symptr ) );
66c611c5 697
a2a5de95
NC
698 if ((code & modifier)) {
699 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
700 "Duplicate modifier '%c' after '%c' in %s",
701 *patptr, (int) TYPE_NO_MODIFIERS(code),
702 _action( symptr ) );
1109a392 703 }
66c611c5 704
1109a392
MHM
705 code |= modifier;
706 patptr++;
49704364
WL
707 }
708
66c611c5
MHM
709 /* inherit modifiers */
710 code |= inherited_modifiers;
711
fc241834 712 /* look for count and/or / */
49704364
WL
713 if (patptr < patend) {
714 if (isDIGIT(*patptr)) {
715 patptr = get_num( patptr, &symptr->length );
716 symptr->howlen = e_number;
717
718 } else if (*patptr == '*') {
719 patptr++;
720 symptr->howlen = e_star;
721
722 } else if (*patptr == '[') {
f7fe979e 723 const char* lenptr = ++patptr;
49704364
WL
724 symptr->howlen = e_number;
725 patptr = group_end( patptr, patend, ']' ) + 1;
726 /* what kind of [] is it? */
727 if (isDIGIT(*lenptr)) {
728 lenptr = get_num( lenptr, &symptr->length );
729 if( *lenptr != ']' )
730 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 731 _action( symptr ) );
49704364
WL
732 } else {
733 tempsym_t savsym = *symptr;
734 symptr->patend = patptr-1;
735 symptr->patptr = lenptr;
736 savsym.length = measure_struct(symptr);
737 *symptr = savsym;
738 }
739 } else {
740 symptr->howlen = e_no_len;
741 symptr->length = 1;
742 }
743
744 /* try to find / */
745 while (patptr < patend) {
746 if (isSPACE(*patptr))
747 patptr++;
748 else if (*patptr == '#') {
749 patptr++;
750 while (patptr < patend && *patptr != '\n')
751 patptr++;
752 if (patptr < patend)
753 patptr++;
754 } else {
66c611c5 755 if (*patptr == '/') {
49704364
WL
756 symptr->flags |= FLAG_SLASH;
757 patptr++;
66c611c5
MHM
758 if (patptr < patend &&
759 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 760 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 761 _action( symptr ) );
49704364
WL
762 }
763 break;
764 }
18529408 765 }
49704364
WL
766 } else {
767 /* at end - no count, no / */
768 symptr->howlen = e_no_len;
769 symptr->length = 1;
770 }
771
772 symptr->code = code;
fc241834 773 symptr->patptr = patptr;
49704364 774 return TRUE;
18529408 775 }
49704364 776 }
fc241834 777 symptr->patptr = patptr;
49704364 778 return FALSE;
18529408
IZ
779}
780
18529408 781/*
fc241834 782 There is no way to cleanly handle the case where we should process the
08ca2aa3 783 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
784 (e.g. estimates like strend-s as an upper bound for the number of
785 characters left wouldn't work). So if we foresee the need of this
786 (pattern starts with U or contains U0), we want to work on the encoded
787 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
788 themselves if they need to do a lot of unpacks like this on it
789*/
fc241834 790STATIC bool
08ca2aa3
TH
791need_utf8(const char *pat, const char *patend)
792{
793 bool first = TRUE;
7918f24d
NC
794
795 PERL_ARGS_ASSERT_NEED_UTF8;
796
08ca2aa3
TH
797 while (pat < patend) {
798 if (pat[0] == '#') {
799 pat++;
f7fe979e 800 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
801 if (!pat) return FALSE;
802 } else if (pat[0] == 'U') {
803 if (first || pat[1] == '0') return TRUE;
804 } else first = FALSE;
805 pat++;
806 }
807 return FALSE;
808}
809
810STATIC char
811first_symbol(const char *pat, const char *patend) {
7918f24d
NC
812 PERL_ARGS_ASSERT_FIRST_SYMBOL;
813
08ca2aa3
TH
814 while (pat < patend) {
815 if (pat[0] != '#') return pat[0];
816 pat++;
f7fe979e 817 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
818 if (!pat) return 0;
819 pat++;
820 }
821 return 0;
822}
823
824/*
7accc089
JH
825=for apidoc unpackstring
826
21ebfc7a
DM
827The engine implementing the unpack() Perl function.
828
829Using the template pat..patend, this function unpacks the string
830s..strend into a number of mortal SVs, which it pushes onto the perl
831argument (@_) stack (so you will need to issue a C<PUTBACK> before and
832C<SPAGAIN> after the call to this function). It returns the number of
833pushed elements.
834
835The strend and patend pointers should point to the byte following the last
836character of each string.
837
838Although this function returns its values on the perl argument stack, it
839doesn't take any parameters from that stack (and thus in particular
840there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
841example).
7accc089
JH
842
843=cut */
844
845I32
f7fe979e 846Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 847{
f7fe979e 848 tempsym_t sym;
08ca2aa3 849
7918f24d
NC
850 PERL_ARGS_ASSERT_UNPACKSTRING;
851
f337b084 852 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
853 else if (need_utf8(pat, patend)) {
854 /* We probably should try to avoid this in case a scalar context call
855 wouldn't get to the "U0" */
856 STRLEN len = strend - s;
230e1fce 857 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
858 SAVEFREEPV(s);
859 strend = s + len;
f337b084 860 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
861 }
862
f337b084
TH
863 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
864 flags |= FLAG_PARSE_UTF8;
08ca2aa3 865
f7fe979e 866 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
867
868 return unpack_rec(&sym, s, s, strend, NULL );
869}
870
4136a0f7 871STATIC I32
f7fe979e 872S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 873{
27da23d5 874 dVAR; dSP;
3297d27d 875 SV *sv = NULL;
f7fe979e 876 const I32 start_sp_offset = SP - PL_stack_base;
49704364 877 howlen_t howlen;
a6ec74c1 878 I32 checksum = 0;
92d41999 879 UV cuv = 0;
a6ec74c1 880 NV cdouble = 0.0;
f337b084 881 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 882 bool beyond = FALSE;
21c16052 883 bool explicit_length;
9e27e96a 884 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 885 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
886
887 PERL_ARGS_ASSERT_UNPACK_REC;
888
28be1210 889 symptr->strbeg = s - strbeg;
49704364 890
49704364 891 while (next_symbol(symptr)) {
a7a3cfaa 892 packprops_t props;
9e27e96a 893 I32 len;
f337b084 894 I32 datumtype = symptr->code;
a1219b5e 895 bool needs_swap;
206947d2 896 /* do first one only unless in list context
08ca2aa3 897 / is implemented by unpacking the count, then popping it from the
206947d2 898 stack, so must check that we're not in the middle of a / */
49704364 899 if ( unpack_only_one
206947d2 900 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 901 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 902 break;
49704364 903
f337b084 904 switch (howlen = symptr->howlen) {
fc241834
RGS
905 case e_star:
906 len = strend - strbeg; /* long enough */
49704364 907 break;
f337b084
TH
908 default:
909 /* e_no_len and e_number */
910 len = symptr->length;
911 break;
49704364 912 }
18529408 913
21c16052 914 explicit_length = TRUE;
a6ec74c1 915 redo_switch:
49704364 916 beyond = s >= strend;
a7a3cfaa
TH
917
918 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
919 if (props) {
920 /* props nonzero means we can process this letter. */
9e27e96a
AL
921 const long size = props & PACK_SIZE_MASK;
922 const long howmany = (strend - s) / size;
a7a3cfaa
TH
923 if (len > howmany)
924 len = howmany;
925
926 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
927 if (len && unpack_only_one) len = 1;
928 EXTEND(SP, len);
929 EXTEND_MORTAL(len);
78d46eaa
NC
930 }
931 }
a7a3cfaa 932
a1219b5e
NC
933 needs_swap = NEEDS_SWAP(datumtype);
934
1109a392 935 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 936 default:
1109a392 937 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 938
a6ec74c1 939 case '%':
49704364 940 if (howlen == e_no_len)
18529408 941 len = 16; /* len is not specified */
a6ec74c1 942 checksum = len;
92d41999 943 cuv = 0;
a6ec74c1 944 cdouble = 0;
18529408 945 continue;
a6ec74c1 946 break;
18529408
IZ
947 case '(':
948 {
49704364 949 tempsym_t savsym = *symptr;
9e27e96a 950 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 951 symptr->flags |= group_modifiers;
49704364 952 symptr->patend = savsym.grpend;
28be1210 953 symptr->previous = &savsym;
49704364 954 symptr->level++;
18529408 955 PUTBACK;
c6f750d1 956 if (len && unpack_only_one) len = 1;
18529408 957 while (len--) {
49704364 958 symptr->patptr = savsym.grpbeg;
f337b084
TH
959 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
960 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
961 unpack_rec(symptr, s, strbeg, strend, &s);
962 if (s == strend && savsym.howlen == e_star)
49704364 963 break; /* No way to continue */
18529408
IZ
964 }
965 SPAGAIN;
28be1210 966 savsym.flags = symptr->flags & ~group_modifiers;
49704364 967 *symptr = savsym;
18529408
IZ
968 break;
969 }
28be1210 970 case '.' | TYPE_IS_SHRIEKING:
28be1210 971 case '.': {
9e27e96a 972 const char *from;
28be1210 973 SV *sv;
9e27e96a 974 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
975 if (howlen == e_star) from = strbeg;
976 else if (len <= 0) from = s;
977 else {
978 tempsym_t *group = symptr;
979
980 while (--len && group) group = group->previous;
981 from = group ? strbeg + group->strbeg : strbeg;
982 }
983 sv = from <= s ?
00646304
CB
984 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
985 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 986 mXPUSHs(sv);
28be1210
TH
987 break;
988 }
28be1210 989 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 990 case '@':
28be1210 991 s = strbeg + symptr->strbeg;
28be1210 992 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 993 {
08ca2aa3
TH
994 while (len > 0) {
995 if (s >= strend)
996 Perl_croak(aTHX_ "'@' outside of string in unpack");
997 s += UTF8SKIP(s);
998 len--;
999 }
1000 if (s > strend)
1001 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1002 } else {
28be1210 1003 if (strend-s < len)
fc241834 1004 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1005 s += len;
08ca2aa3 1006 }
a6ec74c1 1007 break;
62f95557
IZ
1008 case 'X' | TYPE_IS_SHRIEKING:
1009 if (!len) /* Avoid division by 0 */
1010 len = 1;
08ca2aa3 1011 if (utf8) {
f7fe979e 1012 const char *hop, *last;
f337b084
TH
1013 I32 l = len;
1014 hop = last = strbeg;
1015 while (hop < s) {
1016 hop += UTF8SKIP(hop);
1017 if (--l == 0) {
08ca2aa3 1018 last = hop;
f337b084
TH
1019 l = len;
1020 }
fc241834 1021 }
f337b084
TH
1022 if (last > s)
1023 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1024 s = last;
1025 break;
f337b084
TH
1026 }
1027 len = (s - strbeg) % len;
62f95557 1028 /* FALL THROUGH */
a6ec74c1 1029 case 'X':
08ca2aa3
TH
1030 if (utf8) {
1031 while (len > 0) {
1032 if (s <= strbeg)
1033 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1034 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1035 if (s <= strbeg)
1036 Perl_croak(aTHX_ "'X' outside of string in unpack");
1037 }
1038 len--;
1039 }
1040 } else {
fc241834
RGS
1041 if (len > s - strbeg)
1042 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1043 s -= len;
08ca2aa3 1044 }
a6ec74c1 1045 break;
9e27e96a
AL
1046 case 'x' | TYPE_IS_SHRIEKING: {
1047 I32 ai32;
62f95557
IZ
1048 if (!len) /* Avoid division by 0 */
1049 len = 1;
230e1fce
NC
1050 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1051 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1052 if (ai32 == 0) break;
1053 len -= ai32;
9e27e96a 1054 }
62f95557 1055 /* FALL THROUGH */
a6ec74c1 1056 case 'x':
08ca2aa3
TH
1057 if (utf8) {
1058 while (len>0) {
1059 if (s >= strend)
1060 Perl_croak(aTHX_ "'x' outside of string in unpack");
1061 s += UTF8SKIP(s);
1062 len--;
1063 }
1064 } else {
fc241834
RGS
1065 if (len > strend - s)
1066 Perl_croak(aTHX_ "'x' outside of string in unpack");
1067 s += len;
f337b084 1068 }
a6ec74c1
JH
1069 break;
1070 case '/':
49704364
WL
1071 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1072 break;
a6ec74c1
JH
1073 case 'A':
1074 case 'Z':
1075 case 'a':
08ca2aa3
TH
1076 if (checksum) {
1077 /* Preliminary length estimate is assumed done in 'W' */
1078 if (len > strend - s) len = strend - s;
1079 goto W_checksum;
1080 }
1081 if (utf8) {
1082 I32 l;
f7fe979e 1083 const char *hop;
08ca2aa3
TH
1084 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1085 if (hop >= strend) {
1086 if (hop > strend)
1087 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1088 break;
fc241834 1089 }
a6ec74c1 1090 }
08ca2aa3
TH
1091 if (hop > strend)
1092 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1093 len = hop - s;
1094 } else if (len > strend - s)
1095 len = strend - s;
1096
1097 if (datumtype == 'Z') {
1098 /* 'Z' strips stuff after first null */
f7fe979e 1099 const char *ptr, *end;
f337b084
TH
1100 end = s + len;
1101 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1102 sv = newSVpvn(s, ptr-s);
1103 if (howlen == e_star) /* exact for 'Z*' */
1104 len = ptr-s + (ptr != strend ? 1 : 0);
1105 } else if (datumtype == 'A') {
1106 /* 'A' strips both nulls and spaces */
f7fe979e 1107 const char *ptr;
18bdf90a
TH
1108 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1109 for (ptr = s+len-1; ptr >= s; ptr--)
1110 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1111 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1112 if (ptr >= s) ptr += UTF8SKIP(ptr);
1113 else ptr++;
28be1210 1114 if (ptr > s+len)
18bdf90a
TH
1115 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1116 } else {
1117 for (ptr = s+len-1; ptr >= s; ptr--)
1118 if (*ptr != 0 && !isSPACE(*ptr)) break;
1119 ptr++;
1120 }
08ca2aa3
TH
1121 sv = newSVpvn(s, ptr-s);
1122 } else sv = newSVpvn(s, len);
1123
1124 if (utf8) {
1125 SvUTF8_on(sv);
1126 /* Undo any upgrade done due to need_utf8() */
f337b084 1127 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1128 sv_utf8_downgrade(sv, 0);
a6ec74c1 1129 }
6e449a3a 1130 mXPUSHs(sv);
08ca2aa3 1131 s += len;
a6ec74c1
JH
1132 break;
1133 case 'B':
08ca2aa3
TH
1134 case 'b': {
1135 char *str;
49704364 1136 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1137 len = (strend - s) * 8;
1138 if (checksum) {
f337b084 1139 if (utf8)
08ca2aa3 1140 while (len >= 8 && s < strend) {
f337b084 1141 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1142 len -= 8;
1143 }
f337b084 1144 else
fc241834 1145 while (len >= 8) {
08ca2aa3 1146 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1147 len -= 8;
1148 }
08ca2aa3
TH
1149 if (len && s < strend) {
1150 U8 bits;
f337b084
TH
1151 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1152 if (datumtype == 'b')
a6ec74c1 1153 while (len-- > 0) {
92d41999 1154 if (bits & 1) cuv++;
a6ec74c1
JH
1155 bits >>= 1;
1156 }
f337b084 1157 else
a6ec74c1 1158 while (len-- > 0) {
08ca2aa3 1159 if (bits & 0x80) cuv++;
a6ec74c1
JH
1160 bits <<= 1;
1161 }
fc241834 1162 }
a6ec74c1
JH
1163 break;
1164 }
08ca2aa3 1165
561b68a9 1166 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1167 SvPOK_on(sv);
1168 str = SvPVX(sv);
1169 if (datumtype == 'b') {
f337b084 1170 U8 bits = 0;
f7fe979e 1171 const I32 ai32 = len;
08ca2aa3
TH
1172 for (len = 0; len < ai32; len++) {
1173 if (len & 7) bits >>= 1;
1174 else if (utf8) {
1175 if (s >= strend) break;
f337b084 1176 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1177 } else bits = *(U8 *) s++;
1178 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1179 }
08ca2aa3 1180 } else {
f337b084 1181 U8 bits = 0;
f7fe979e 1182 const I32 ai32 = len;
08ca2aa3
TH
1183 for (len = 0; len < ai32; len++) {
1184 if (len & 7) bits <<= 1;
1185 else if (utf8) {
1186 if (s >= strend) break;
f337b084 1187 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1188 } else bits = *(U8 *) s++;
1189 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1190 }
1191 }
1192 *str = '\0';
aa07b2f6 1193 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1194 XPUSHs(sv);
a6ec74c1 1195 break;
08ca2aa3 1196 }
a6ec74c1 1197 case 'H':
08ca2aa3 1198 case 'h': {
3297d27d 1199 char *str = NULL;
fc241834 1200 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1201 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1202 len = (strend - s) * 2;
858fe5e1
TC
1203 if (!checksum) {
1204 sv = sv_2mortal(newSV(len ? len : 1));
1205 SvPOK_on(sv);
1206 str = SvPVX(sv);
1207 }
a6ec74c1 1208 if (datumtype == 'h') {
f337b084 1209 U8 bits = 0;
9e27e96a 1210 I32 ai32 = len;
fc241834
RGS
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits >>= 4;
1213 else if (utf8) {
1214 if (s >= strend) break;
f337b084 1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1216 } else bits = * (U8 *) s++;
858fe5e1
TC
1217 if (!checksum)
1218 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1219 }
08ca2aa3 1220 } else {
f337b084 1221 U8 bits = 0;
f7fe979e 1222 const I32 ai32 = len;
08ca2aa3
TH
1223 for (len = 0; len < ai32; len++) {
1224 if (len & 1) bits <<= 4;
1225 else if (utf8) {
1226 if (s >= strend) break;
f337b084 1227 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1228 } else bits = *(U8 *) s++;
858fe5e1
TC
1229 if (!checksum)
1230 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1231 }
1232 }
858fe5e1
TC
1233 if (!checksum) {
1234 *str = '\0';
1235 SvCUR_set(sv, str - SvPVX_const(sv));
1236 XPUSHs(sv);
1237 }
a6ec74c1 1238 break;
08ca2aa3 1239 }
1651fc44
ML
1240 case 'C':
1241 if (len == 0) {
1242 if (explicit_length)
1243 /* Switch to "character" mode */
1244 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1245 break;
1246 }
1247 /* FALL THROUGH */
a6ec74c1 1248 case 'c':
1651fc44
ML
1249 while (len-- > 0 && s < strend) {
1250 int aint;
1251 if (utf8)
1252 {
1253 STRLEN retlen;
1254 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1256 if (retlen == (STRLEN) -1 || retlen == 0)
1257 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1258 s += retlen;
1259 }
1260 else
1261 aint = *(U8 *)(s)++;
1262 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1263 aint -= 256;
08ca2aa3 1264 if (!checksum)
6e449a3a 1265 mPUSHi(aint);
73cb7263
NC
1266 else if (checksum > bits_in_uv)
1267 cdouble += (NV)aint;
1268 else
1269 cuv += aint;
a6ec74c1
JH
1270 }
1271 break;
08ca2aa3
TH
1272 case 'W':
1273 W_checksum:
1651fc44 1274 if (utf8) {
08ca2aa3 1275 while (len-- > 0 && s < strend) {
08ca2aa3 1276 STRLEN retlen;
f7fe979e 1277 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1278 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1279 if (retlen == (STRLEN) -1 || retlen == 0)
1280 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1281 s += retlen;
1282 if (!checksum)
6e449a3a 1283 mPUSHu(val);
08ca2aa3
TH
1284 else if (checksum > bits_in_uv)
1285 cdouble += (NV) val;
d6d3e8bd 1286 else
08ca2aa3 1287 cuv += val;
fc241834 1288 }
08ca2aa3 1289 } else if (!checksum)
a6ec74c1 1290 while (len-- > 0) {
f7fe979e 1291 const U8 ch = *(U8 *) s++;
6e449a3a 1292 mPUSHu(ch);
a6ec74c1 1293 }
08ca2aa3
TH
1294 else if (checksum > bits_in_uv)
1295 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1296 else
1297 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1298 break;
1299 case 'U':
35bcd338 1300 if (len == 0) {
c5333953 1301 if (explicit_length && howlen != e_star) {
08ca2aa3 1302 /* Switch to "bytes in UTF-8" mode */
f337b084 1303 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1304 else
1305 /* Should be impossible due to the need_utf8() test */
1306 Perl_croak(aTHX_ "U0 mode on a byte string");
1307 }
35bcd338
JH
1308 break;
1309 }
08ca2aa3 1310 if (len > strend - s) len = strend - s;
fc241834 1311 if (!checksum) {
08ca2aa3
TH
1312 if (len && unpack_only_one) len = 1;
1313 EXTEND(SP, len);
1314 EXTEND_MORTAL(len);
fc241834 1315 }
08ca2aa3
TH
1316 while (len-- > 0 && s < strend) {
1317 STRLEN retlen;
1318 UV auv;
1319 if (utf8) {
1320 U8 result[UTF8_MAXLEN];
f7fe979e 1321 const char *ptr = s;
08ca2aa3 1322 STRLEN len;
08ca2aa3
TH
1323 /* Bug: warns about bad utf8 even if we are short on bytes
1324 and will break out of the loop */
230e1fce
NC
1325 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1326 'U'))
08ca2aa3
TH
1327 break;
1328 len = UTF8SKIP(result);
fc241834 1329 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1330 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1331 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1332 s = ptr;
1333 } else {
cde84f2a 1334 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1335 if (retlen == (STRLEN) -1 || retlen == 0)
1336 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1337 s += retlen;
1338 }
1339 if (!checksum)
6e449a3a 1340 mPUSHu(auv);
73cb7263 1341 else if (checksum > bits_in_uv)
08ca2aa3 1342 cdouble += (NV) auv;
73cb7263 1343 else
08ca2aa3 1344 cuv += auv;
a6ec74c1
JH
1345 }
1346 break;
49704364
WL
1347 case 's' | TYPE_IS_SHRIEKING:
1348#if SHORTSIZE != SIZE16
73cb7263 1349 while (len-- > 0) {
08ca2aa3 1350 short ashort;
aaec8192 1351 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1352 if (!checksum)
6e449a3a 1353 mPUSHi(ashort);
73cb7263
NC
1354 else if (checksum > bits_in_uv)
1355 cdouble += (NV)ashort;
1356 else
1357 cuv += ashort;
49704364
WL
1358 }
1359 break;
1360#else
1361 /* Fallthrough! */
a6ec74c1 1362#endif
49704364 1363 case 's':
73cb7263 1364 while (len-- > 0) {
08ca2aa3
TH
1365 I16 ai16;
1366
1367#if U16SIZE > SIZE16
1368 ai16 = 0;
1369#endif
aaec8192 1370 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1371#if U16SIZE > SIZE16
73cb7263
NC
1372 if (ai16 > 32767)
1373 ai16 -= 65536;
a6ec74c1 1374#endif
08ca2aa3 1375 if (!checksum)
6e449a3a 1376 mPUSHi(ai16);
73cb7263
NC
1377 else if (checksum > bits_in_uv)
1378 cdouble += (NV)ai16;
1379 else
1380 cuv += ai16;
a6ec74c1
JH
1381 }
1382 break;
49704364
WL
1383 case 'S' | TYPE_IS_SHRIEKING:
1384#if SHORTSIZE != SIZE16
73cb7263 1385 while (len-- > 0) {
08ca2aa3 1386 unsigned short aushort;
aaec8192
NC
1387 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1388 needs_swap);
08ca2aa3 1389 if (!checksum)
6e449a3a 1390 mPUSHu(aushort);
73cb7263
NC
1391 else if (checksum > bits_in_uv)
1392 cdouble += (NV)aushort;
1393 else
1394 cuv += aushort;
49704364
WL
1395 }
1396 break;
1397#else
486ec47a 1398 /* Fallthrough! */
49704364 1399#endif
a6ec74c1
JH
1400 case 'v':
1401 case 'n':
1402 case 'S':
73cb7263 1403 while (len-- > 0) {
08ca2aa3
TH
1404 U16 au16;
1405#if U16SIZE > SIZE16
1406 au16 = 0;
1407#endif
aaec8192 1408 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1409 if (datumtype == 'n')
1410 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1411 if (datumtype == 'v')
1412 au16 = vtohs(au16);
08ca2aa3 1413 if (!checksum)
6e449a3a 1414 mPUSHu(au16);
73cb7263 1415 else if (checksum > bits_in_uv)
f337b084 1416 cdouble += (NV) au16;
73cb7263
NC
1417 else
1418 cuv += au16;
a6ec74c1
JH
1419 }
1420 break;
068bd2e7
MHM
1421 case 'v' | TYPE_IS_SHRIEKING:
1422 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1423 while (len-- > 0) {
08ca2aa3
TH
1424 I16 ai16;
1425# if U16SIZE > SIZE16
1426 ai16 = 0;
1427# endif
aaec8192 1428 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1429 /* There should never be any byte-swapping here. */
1430 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1431 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1432 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1433 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1434 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1435 if (!checksum)
6e449a3a 1436 mPUSHi(ai16);
73cb7263 1437 else if (checksum > bits_in_uv)
08ca2aa3 1438 cdouble += (NV) ai16;
73cb7263
NC
1439 else
1440 cuv += ai16;
068bd2e7
MHM
1441 }
1442 break;
a6ec74c1 1443 case 'i':
49704364 1444 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1445 while (len-- > 0) {
08ca2aa3 1446 int aint;
aaec8192 1447 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1448 if (!checksum)
6e449a3a 1449 mPUSHi(aint);
73cb7263
NC
1450 else if (checksum > bits_in_uv)
1451 cdouble += (NV)aint;
1452 else
1453 cuv += aint;
a6ec74c1
JH
1454 }
1455 break;
1456 case 'I':
49704364 1457 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1458 while (len-- > 0) {
08ca2aa3 1459 unsigned int auint;
aaec8192 1460 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1461 if (!checksum)
6e449a3a 1462 mPUSHu(auint);
73cb7263
NC
1463 else if (checksum > bits_in_uv)
1464 cdouble += (NV)auint;
1465 else
1466 cuv += auint;
a6ec74c1
JH
1467 }
1468 break;
92d41999 1469 case 'j':
73cb7263 1470 while (len-- > 0) {
08ca2aa3 1471 IV aiv;
aaec8192 1472 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1473 if (!checksum)
6e449a3a 1474 mPUSHi(aiv);
73cb7263
NC
1475 else if (checksum > bits_in_uv)
1476 cdouble += (NV)aiv;
1477 else
1478 cuv += aiv;
92d41999
JH
1479 }
1480 break;
1481 case 'J':
73cb7263 1482 while (len-- > 0) {
08ca2aa3 1483 UV auv;
aaec8192 1484 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1485 if (!checksum)
6e449a3a 1486 mPUSHu(auv);
73cb7263
NC
1487 else if (checksum > bits_in_uv)
1488 cdouble += (NV)auv;
1489 else
1490 cuv += auv;
92d41999
JH
1491 }
1492 break;
49704364
WL
1493 case 'l' | TYPE_IS_SHRIEKING:
1494#if LONGSIZE != SIZE32
73cb7263 1495 while (len-- > 0) {
08ca2aa3 1496 long along;
aaec8192 1497 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1498 if (!checksum)
6e449a3a 1499 mPUSHi(along);
73cb7263
NC
1500 else if (checksum > bits_in_uv)
1501 cdouble += (NV)along;
1502 else
1503 cuv += along;
49704364
WL
1504 }
1505 break;
1506#else
1507 /* Fallthrough! */
a6ec74c1 1508#endif
49704364 1509 case 'l':
73cb7263 1510 while (len-- > 0) {
08ca2aa3
TH
1511 I32 ai32;
1512#if U32SIZE > SIZE32
1513 ai32 = 0;
1514#endif
aaec8192 1515 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1516#if U32SIZE > SIZE32
08ca2aa3 1517 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1518#endif
08ca2aa3 1519 if (!checksum)
6e449a3a 1520 mPUSHi(ai32);
73cb7263
NC
1521 else if (checksum > bits_in_uv)
1522 cdouble += (NV)ai32;
1523 else
1524 cuv += ai32;
a6ec74c1
JH
1525 }
1526 break;
49704364
WL
1527 case 'L' | TYPE_IS_SHRIEKING:
1528#if LONGSIZE != SIZE32
73cb7263 1529 while (len-- > 0) {
08ca2aa3 1530 unsigned long aulong;
aaec8192 1531 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1532 if (!checksum)
6e449a3a 1533 mPUSHu(aulong);
73cb7263
NC
1534 else if (checksum > bits_in_uv)
1535 cdouble += (NV)aulong;
1536 else
1537 cuv += aulong;
49704364
WL
1538 }
1539 break;
1540#else
1541 /* Fall through! */
1542#endif
a6ec74c1
JH
1543 case 'V':
1544 case 'N':
1545 case 'L':
73cb7263 1546 while (len-- > 0) {
08ca2aa3
TH
1547 U32 au32;
1548#if U32SIZE > SIZE32
1549 au32 = 0;
1550#endif
aaec8192 1551 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1552 if (datumtype == 'N')
1553 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1554 if (datumtype == 'V')
1555 au32 = vtohl(au32);
08ca2aa3 1556 if (!checksum)
6e449a3a 1557 mPUSHu(au32);
fc241834
RGS
1558 else if (checksum > bits_in_uv)
1559 cdouble += (NV)au32;
1560 else
1561 cuv += au32;
a6ec74c1
JH
1562 }
1563 break;
068bd2e7
MHM
1564 case 'V' | TYPE_IS_SHRIEKING:
1565 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1566 while (len-- > 0) {
08ca2aa3 1567 I32 ai32;
f8e5a5db 1568#if U32SIZE > SIZE32
08ca2aa3 1569 ai32 = 0;
f8e5a5db 1570#endif
aaec8192 1571 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1572 /* There should never be any byte swapping here. */
1573 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1574 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1575 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1576 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1577 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1578 if (!checksum)
6e449a3a 1579 mPUSHi(ai32);
73cb7263
NC
1580 else if (checksum > bits_in_uv)
1581 cdouble += (NV)ai32;
1582 else
1583 cuv += ai32;
068bd2e7
MHM
1584 }
1585 break;
a6ec74c1 1586 case 'p':
a6ec74c1 1587 while (len-- > 0) {
f7fe979e 1588 const char *aptr;
aaec8192 1589 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1590 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1591 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1592 }
1593 break;
1594 case 'w':
a6ec74c1
JH
1595 {
1596 UV auv = 0;
1597 U32 bytes = 0;
fc241834 1598
08ca2aa3
TH
1599 while (len > 0 && s < strend) {
1600 U8 ch;
f337b084 1601 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1602 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1603 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1604 if (ch < 0x80) {
a6ec74c1 1605 bytes = 0;
6e449a3a 1606 mPUSHu(auv);
a6ec74c1
JH
1607 len--;
1608 auv = 0;
08ca2aa3 1609 continue;
a6ec74c1 1610 }
08ca2aa3 1611 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1612 const char *t;
a6ec74c1 1613
f5992bc4 1614 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1615 while (s < strend) {
f337b084 1616 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1617 sv = mul128(sv, (U8)(ch & 0x7f));
1618 if (!(ch & 0x80)) {
a6ec74c1
JH
1619 bytes = 0;
1620 break;
1621 }
1622 }
10516c54 1623 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1624 while (*t == '0')
1625 t++;
1626 sv_chop(sv, t);
6e449a3a 1627 mPUSHs(sv);
a6ec74c1
JH
1628 len--;
1629 auv = 0;
1630 }
1631 }
1632 if ((s >= strend) && bytes)
49704364 1633 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1634 }
1635 break;
1636 case 'P':
49704364
WL
1637 if (symptr->howlen == e_star)
1638 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1639 EXTEND(SP, 1);
2d3e0934 1640 if (s + sizeof(char*) <= strend) {
08ca2aa3 1641 char *aptr;
aaec8192 1642 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1643 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1644 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1645 }
a6ec74c1
JH
1646 break;
1647#ifdef HAS_QUAD
1648 case 'q':
73cb7263 1649 while (len-- > 0) {
08ca2aa3 1650 Quad_t aquad;
aaec8192 1651 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1652 if (!checksum)
6e449a3a
MHM
1653 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1654 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1655 else if (checksum > bits_in_uv)
1656 cdouble += (NV)aquad;
1657 else
1658 cuv += aquad;
1659 }
a6ec74c1
JH
1660 break;
1661 case 'Q':
73cb7263 1662 while (len-- > 0) {
08ca2aa3 1663 Uquad_t auquad;
aaec8192 1664 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1665 if (!checksum)
6e449a3a
MHM
1666 mPUSHs(auquad <= UV_MAX ?
1667 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1668 else if (checksum > bits_in_uv)
1669 cdouble += (NV)auquad;
1670 else
1671 cuv += auquad;
a6ec74c1
JH
1672 }
1673 break;
08ca2aa3 1674#endif /* HAS_QUAD */
a6ec74c1
JH
1675 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1676 case 'f':
73cb7263 1677 while (len-- > 0) {
08ca2aa3 1678 float afloat;
aaec8192 1679 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1680 if (!checksum)
6e449a3a 1681 mPUSHn(afloat);
08ca2aa3 1682 else
73cb7263 1683 cdouble += afloat;
fc241834 1684 }
a6ec74c1
JH
1685 break;
1686 case 'd':
73cb7263 1687 while (len-- > 0) {
08ca2aa3 1688 double adouble;
aaec8192 1689 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1690 if (!checksum)
6e449a3a 1691 mPUSHn(adouble);
08ca2aa3 1692 else
73cb7263 1693 cdouble += adouble;
fc241834 1694 }
a6ec74c1 1695 break;
92d41999 1696 case 'F':
73cb7263 1697 while (len-- > 0) {
275663fa 1698 NV_bytes anv;
aaec8192
NC
1699 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1700 datumtype, needs_swap);
08ca2aa3 1701 if (!checksum)
275663fa 1702 mPUSHn(anv.nv);
08ca2aa3 1703 else
275663fa 1704 cdouble += anv.nv;
fc241834 1705 }
92d41999
JH
1706 break;
1707#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1708 case 'D':
73cb7263 1709 while (len-- > 0) {
275663fa 1710 ld_bytes aldouble;
aaec8192
NC
1711 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1712 sizeof(aldouble.bytes), datumtype, needs_swap);
08ca2aa3 1713 if (!checksum)
275663fa 1714 mPUSHn(aldouble.ld);
08ca2aa3 1715 else
275663fa 1716 cdouble += aldouble.ld;
92d41999
JH
1717 }
1718 break;
1719#endif
a6ec74c1 1720 case 'u':
858fe5e1 1721 if (!checksum) {
f7fe979e 1722 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1723 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1724 if (l) SvPOK_on(sv);
1725 }
1726 if (utf8) {
1727 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1728 I32 a, b, c, d;
db187877 1729 char hunk[3];
08ca2aa3 1730
08ca2aa3
TH
1731 while (len > 0) {
1732 next_uni_uu(aTHX_ &s, strend, &a);
1733 next_uni_uu(aTHX_ &s, strend, &b);
1734 next_uni_uu(aTHX_ &s, strend, &c);
1735 next_uni_uu(aTHX_ &s, strend, &d);
1736 hunk[0] = (char)((a << 2) | (b >> 4));
1737 hunk[1] = (char)((b << 4) | (c >> 2));
1738 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1739 if (!checksum)
1740 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1741 len -= 3;
1742 }
1743 if (s < strend) {
f7fe979e
AL
1744 if (*s == '\n') {
1745 s++;
1746 }
08ca2aa3
TH
1747 else {
1748 /* possible checksum byte */
f7fe979e
AL
1749 const char *skip = s+UTF8SKIP(s);
1750 if (skip < strend && *skip == '\n')
1751 s = skip+1;
08ca2aa3
TH
1752 }
1753 }
1754 }
1755 } else {
fc241834
RGS
1756 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1757 I32 a, b, c, d;
db187877 1758 char hunk[3];
a6ec74c1 1759
fc241834
RGS
1760 len = PL_uudmap[*(U8*)s++] & 077;
1761 while (len > 0) {
1762 if (s < strend && ISUUCHAR(*s))
1763 a = PL_uudmap[*(U8*)s++] & 077;
1764 else
1765 a = 0;
1766 if (s < strend && ISUUCHAR(*s))
1767 b = PL_uudmap[*(U8*)s++] & 077;
1768 else
1769 b = 0;
1770 if (s < strend && ISUUCHAR(*s))
1771 c = PL_uudmap[*(U8*)s++] & 077;
1772 else
1773 c = 0;
1774 if (s < strend && ISUUCHAR(*s))
1775 d = PL_uudmap[*(U8*)s++] & 077;
1776 else
1777 d = 0;
1778 hunk[0] = (char)((a << 2) | (b >> 4));
1779 hunk[1] = (char)((b << 4) | (c >> 2));
1780 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1781 if (!checksum)
1782 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1783 len -= 3;
1784 }
1785 if (*s == '\n')
1786 s++;
1787 else /* possible checksum byte */
1788 if (s + 1 < strend && s[1] == '\n')
1789 s += 2;
a6ec74c1 1790 }
08ca2aa3 1791 }
858fe5e1
TC
1792 if (!checksum)
1793 XPUSHs(sv);
a6ec74c1
JH
1794 break;
1795 }
49704364 1796
a6ec74c1 1797 if (checksum) {
1109a392 1798 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1799 (checksum > bits_in_uv &&
08ca2aa3
TH
1800 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1801 NV trouble, anv;
a6ec74c1 1802
08ca2aa3 1803 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1804 while (checksum >= 16) {
1805 checksum -= 16;
08ca2aa3 1806 anv *= 65536.0;
a6ec74c1 1807 }
a6ec74c1 1808 while (cdouble < 0.0)
08ca2aa3
TH
1809 cdouble += anv;
1810 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1811 sv = newSVnv(cdouble);
a6ec74c1
JH
1812 }
1813 else {
fa8ec7c1
NC
1814 if (checksum < bits_in_uv) {
1815 UV mask = ((UV)1 << checksum) - 1;
92d41999 1816 cuv &= mask;
a6ec74c1 1817 }
c4c5f44a 1818 sv = newSVuv(cuv);
a6ec74c1 1819 }
6e449a3a 1820 mXPUSHs(sv);
a6ec74c1
JH
1821 checksum = 0;
1822 }
fc241834 1823
49704364
WL
1824 if (symptr->flags & FLAG_SLASH){
1825 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1826 break;
49704364
WL
1827 if( next_symbol(symptr) ){
1828 if( symptr->howlen == e_number )
1829 Perl_croak(aTHX_ "Count after length/code in unpack" );
1830 if( beyond ){
1831 /* ...end of char buffer then no decent length available */
1832 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1833 } else {
1834 /* take top of stack (hope it's numeric) */
1835 len = POPi;
1836 if( len < 0 )
1837 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1838 }
1839 } else {
1840 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1841 }
1842 datumtype = symptr->code;
21c16052 1843 explicit_length = FALSE;
49704364
WL
1844 goto redo_switch;
1845 }
a6ec74c1 1846 }
49704364 1847
18529408
IZ
1848 if (new_s)
1849 *new_s = s;
1850 PUTBACK;
1851 return SP - PL_stack_base - start_sp_offset;
1852}
1853
1854PP(pp_unpack)
1855{
97aff369 1856 dVAR;
18529408 1857 dSP;
bab9c0ac 1858 dPOPPOPssrl;
18529408
IZ
1859 I32 gimme = GIMME_V;
1860 STRLEN llen;
1861 STRLEN rlen;
5c144d81
NC
1862 const char *pat = SvPV_const(left, llen);
1863 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1864 const char *strend = s + rlen;
1865 const char *patend = pat + llen;
08ca2aa3 1866 I32 cnt;
18529408
IZ
1867
1868 PUTBACK;
7accc089 1869 cnt = unpackstring(pat, patend, s, strend,
49704364 1870 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1871 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1872
18529408
IZ
1873 SPAGAIN;
1874 if ( !cnt && gimme == G_SCALAR )
1875 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1876 RETURN;
1877}
1878
f337b084 1879STATIC U8 *
f7fe979e 1880doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1881{
f337b084 1882 *h++ = PL_uuemap[len];
a6ec74c1 1883 while (len > 2) {
f337b084
TH
1884 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1885 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1886 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1887 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1888 s += 3;
1889 len -= 3;
1890 }
1891 if (len > 0) {
f7fe979e 1892 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1893 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1894 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1895 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1896 *h++ = PL_uuemap[0];
a6ec74c1 1897 }
f337b084
TH
1898 *h++ = '\n';
1899 return h;
a6ec74c1
JH
1900}
1901
1902STATIC SV *
f7fe979e 1903S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1904{
8b6e33c7
AL
1905 SV *result = newSVpvn(s, l);
1906 char *const result_c = SvPV_nolen(result); /* convenience */
1907 char *out = result_c;
1908 bool skip = 1;
1909 bool ignore = 0;
a6ec74c1 1910
7918f24d
NC
1911 PERL_ARGS_ASSERT_IS_AN_INT;
1912
a6ec74c1
JH
1913 while (*s) {
1914 switch (*s) {
1915 case ' ':
1916 break;
1917 case '+':
1918 if (!skip) {
1919 SvREFCNT_dec(result);
1920 return (NULL);
1921 }
1922 break;
1923 case '0':
1924 case '1':
1925 case '2':
1926 case '3':
1927 case '4':
1928 case '5':
1929 case '6':
1930 case '7':
1931 case '8':
1932 case '9':
1933 skip = 0;
1934 if (!ignore) {
1935 *(out++) = *s;
1936 }
1937 break;
1938 case '.':
1939 ignore = 1;
1940 break;
1941 default:
1942 SvREFCNT_dec(result);
1943 return (NULL);
1944 }
1945 s++;
1946 }
1947 *(out++) = '\0';
1948 SvCUR_set(result, out - result_c);
1949 return (result);
1950}
1951
1952/* pnum must be '\0' terminated */
1953STATIC int
1954S_div128(pTHX_ SV *pnum, bool *done)
1955{
8b6e33c7
AL
1956 STRLEN len;
1957 char * const s = SvPV(pnum, len);
1958 char *t = s;
1959 int m = 0;
1960
7918f24d
NC
1961 PERL_ARGS_ASSERT_DIV128;
1962
8b6e33c7
AL
1963 *done = 1;
1964 while (*t) {
1965 const int i = m * 10 + (*t - '0');
1966 const int r = (i >> 7); /* r < 10 */
1967 m = i & 0x7F;
1968 if (r) {
1969 *done = 0;
1970 }
1971 *(t++) = '0' + r;
a6ec74c1 1972 }
8b6e33c7
AL
1973 *(t++) = '\0';
1974 SvCUR_set(pnum, (STRLEN) (t - s));
1975 return (m);
a6ec74c1
JH
1976}
1977
18529408 1978/*
7accc089
JH
1979=for apidoc packlist
1980
1981The engine implementing pack() Perl function.
1982
bfce84ec
AL
1983=cut
1984*/
7accc089
JH
1985
1986void
5aaab254 1987Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1988{
97aff369 1989 dVAR;
aadb217d
JH
1990 tempsym_t sym;
1991
7918f24d
NC
1992 PERL_ARGS_ASSERT_PACKLIST;
1993
f7fe979e 1994 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1995
f337b084
TH
1996 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1997 Also make sure any UTF8 flag is loaded */
56eb0262 1998 SvPV_force_nolen(cat);
bfce84ec
AL
1999 if (DO_UTF8(cat))
2000 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2001
49704364
WL
2002 (void)pack_rec( cat, &sym, beglist, endlist );
2003}
2004
f337b084
TH
2005/* like sv_utf8_upgrade, but also repoint the group start markers */
2006STATIC void
2007marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2008 STRLEN len;
2009 tempsym_t *group;
f7fe979e
AL
2010 const char *from_ptr, *from_start, *from_end, **marks, **m;
2011 char *to_start, *to_ptr;
f337b084
TH
2012
2013 if (SvUTF8(sv)) return;
2014
aa07b2f6 2015 from_start = SvPVX_const(sv);
f337b084
TH
2016 from_end = from_start + SvCUR(sv);
2017 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2018 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2019 if (from_ptr == from_end) {
2020 /* Simple case: no character needs to be changed */
2021 SvUTF8_on(sv);
2022 return;
2023 }
2024
3473cf63 2025 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2026 Newx(to_start, len, char);
f337b084
TH
2027 Copy(from_start, to_start, from_ptr-from_start, char);
2028 to_ptr = to_start + (from_ptr-from_start);
2029
a02a5408 2030 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2031 for (group=sym_ptr; group; group = group->previous)
2032 marks[group->level] = from_start + group->strbeg;
2033 marks[sym_ptr->level+1] = from_end+1;
2034 for (m = marks; *m < from_ptr; m++)
2035 *m = to_start + (*m-from_start);
2036
2037 for (;from_ptr < from_end; from_ptr++) {
2038 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2039 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2040 }
2041 *to_ptr = 0;
2042
2043 while (*m == from_ptr) *m++ = to_ptr;
2044 if (m != marks + sym_ptr->level+1) {
2045 Safefree(marks);
2046 Safefree(to_start);
5637ef5b
NC
2047 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2048 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2049 }
2050 for (group=sym_ptr; group; group = group->previous)
2051 group->strbeg = marks[group->level] - to_start;
2052 Safefree(marks);
2053
2054 if (SvOOK(sv)) {
2055 if (SvIVX(sv)) {
b162af07 2056 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2057 from_start -= SvIVX(sv);
2058 SvIV_set(sv, 0);
2059 }
2060 SvFLAGS(sv) &= ~SVf_OOK;
2061 }
2062 if (SvLEN(sv) != 0)
2063 Safefree(from_start);
f880fe2f 2064 SvPV_set(sv, to_start);
b162af07
SP
2065 SvCUR_set(sv, to_ptr - to_start);
2066 SvLEN_set(sv, len);
f337b084
TH
2067 SvUTF8_on(sv);
2068}
2069
2070/* Exponential string grower. Makes string extension effectively O(n)
2071 needed says how many extra bytes we need (not counting the final '\0')
2072 Only grows the string if there is an actual lack of space
2073*/
2074STATIC char *
0bd48802 2075S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2076 const STRLEN cur = SvCUR(sv);
2077 const STRLEN len = SvLEN(sv);
f337b084 2078 STRLEN extend;
7918f24d
NC
2079
2080 PERL_ARGS_ASSERT_SV_EXP_GROW;
2081
f337b084
TH
2082 if (len - cur > needed) return SvPVX(sv);
2083 extend = needed > len ? needed : len;
2084 return SvGROW(sv, len+extend+1);
2085}
49704364
WL
2086
2087STATIC
2088SV **
f337b084 2089S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2090{
97aff369 2091 dVAR;
49704364 2092 tempsym_t lookahead;
f337b084
TH
2093 I32 items = endlist - beglist;
2094 bool found = next_symbol(symptr);
2095 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2096 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2097
7918f24d
NC
2098 PERL_ARGS_ASSERT_PACK_REC;
2099
f337b084
TH
2100 if (symptr->level == 0 && found && symptr->code == 'U') {
2101 marked_upgrade(aTHX_ cat, symptr);
2102 symptr->flags |= FLAG_DO_UTF8;
2103 utf8 = 0;
49704364 2104 }
f337b084 2105 symptr->strbeg = SvCUR(cat);
49704364
WL
2106
2107 while (found) {
f337b084
TH
2108 SV *fromstr;
2109 STRLEN fromlen;
2110 I32 len;
a0714e2c 2111 SV *lengthcode = NULL;
49704364 2112 I32 datumtype = symptr->code;
f337b084
TH
2113 howlen_t howlen = symptr->howlen;
2114 char *start = SvPVX(cat);
2115 char *cur = start + SvCUR(cat);
a1219b5e 2116 bool needs_swap;
49704364 2117
f337b084
TH
2118#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2119
2120 switch (howlen) {
fc241834 2121 case e_star:
f337b084
TH
2122 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2123 0 : items;
2124 break;
2125 default:
2126 /* e_no_len and e_number */
2127 len = symptr->length;
49704364
WL
2128 break;
2129 }
2130
f337b084 2131 if (len) {
a7a3cfaa 2132 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2133
a7a3cfaa
TH
2134 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2135 /* We can process this letter. */
2136 STRLEN size = props & PACK_SIZE_MASK;
2137 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2138 }
f337b084
TH
2139 }
2140
49704364
WL
2141 /* Look ahead for next symbol. Do we have code/code? */
2142 lookahead = *symptr;
2143 found = next_symbol(&lookahead);
246f24af
TH
2144 if (symptr->flags & FLAG_SLASH) {
2145 IV count;
f337b084 2146 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2147 if (strchr("aAZ", lookahead.code)) {
2148 if (lookahead.howlen == e_number) count = lookahead.length;
2149 else {
ce399ba6 2150 if (items > 0) {
48a5da33 2151 count = sv_len_utf8(*beglist);
ce399ba6 2152 }
246f24af
TH
2153 else count = 0;
2154 if (lookahead.code == 'Z') count++;
2155 }
2156 } else {
2157 if (lookahead.howlen == e_number && lookahead.length < items)
2158 count = lookahead.length;
2159 else count = items;
2160 }
2161 lookahead.howlen = e_number;
2162 lookahead.length = count;
2163 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2164 }
49704364 2165
a1219b5e
NC
2166 needs_swap = NEEDS_SWAP(datumtype);
2167
fc241834
RGS
2168 /* Code inside the switch must take care to properly update
2169 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2170 doesn't simply leave using break */
1109a392 2171 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2172 default:
f337b084
TH
2173 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2174 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2175 case '%':
49704364 2176 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2177 {
2178 char *from;
28be1210 2179 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2180 case '.':
2181 if (howlen == e_star) from = start;
2182 else if (len == 0) from = cur;
2183 else {
2184 tempsym_t *group = symptr;
2185
2186 while (--len && group) group = group->previous;
2187 from = group ? start + group->strbeg : start;
2188 }
2189 fromstr = NEXTFROM;
2190 len = SvIV(fromstr);
2191 goto resize;
28be1210 2192 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2193 case '@':
28be1210
TH
2194 from = start + symptr->strbeg;
2195 resize:
28be1210 2196 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2197 if (len >= 0) {
2198 while (len && from < cur) {
2199 from += UTF8SKIP(from);
2200 len--;
2201 }
2202 if (from > cur)
2203 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2204 if (len) {
2205 /* Here we know from == cur */
2206 grow:
2207 GROWING(0, cat, start, cur, len);
2208 Zero(cur, len, char);
2209 cur += len;
2210 } else if (from < cur) {
2211 len = cur - from;
2212 goto shrink;
2213 } else goto no_change;
2214 } else {
2215 cur = from;
2216 len = -len;
2217 goto utf8_shrink;
f337b084 2218 }
28be1210
TH
2219 else {
2220 len -= cur - from;
f337b084 2221 if (len > 0) goto grow;
28be1210 2222 if (len == 0) goto no_change;
fc241834 2223 len = -len;
28be1210 2224 goto shrink;
f337b084 2225 }
a6ec74c1 2226 break;
28be1210 2227 }
fc241834 2228 case '(': {
49704364 2229 tempsym_t savsym = *symptr;
66c611c5
MHM
2230 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2231 symptr->flags |= group_modifiers;
49704364
WL
2232 symptr->patend = savsym.grpend;
2233 symptr->level++;
f337b084 2234 symptr->previous = &lookahead;
18529408 2235 while (len--) {
f337b084
TH
2236 U32 was_utf8;
2237 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2238 else symptr->flags &= ~FLAG_PARSE_UTF8;
2239 was_utf8 = SvUTF8(cat);
49704364 2240 symptr->patptr = savsym.grpbeg;
f337b084
TH
2241 beglist = pack_rec(cat, symptr, beglist, endlist);
2242 if (SvUTF8(cat) != was_utf8)
2243 /* This had better be an upgrade while in utf8==0 mode */
2244 utf8 = 1;
2245
49704364 2246 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2247 break; /* No way to continue */
2248 }
ee790063 2249 items = endlist - beglist;
f337b084
TH
2250 lookahead.flags = symptr->flags & ~group_modifiers;
2251 goto no_change;
18529408 2252 }
62f95557
IZ
2253 case 'X' | TYPE_IS_SHRIEKING:
2254 if (!len) /* Avoid division by 0 */
2255 len = 1;
f337b084
TH
2256 if (utf8) {
2257 char *hop, *last;
2258 I32 l = len;
2259 hop = last = start;
2260 while (hop < cur) {
2261 hop += UTF8SKIP(hop);
2262 if (--l == 0) {
2263 last = hop;
2264 l = len;
2265 }
2266 }
2267 if (last > cur)
2268 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2269 cur = last;
2270 break;
2271 }
2272 len = (cur-start) % len;
62f95557 2273 /* FALL THROUGH */
a6ec74c1 2274 case 'X':
f337b084
TH
2275 if (utf8) {
2276 if (len < 1) goto no_change;
28be1210 2277 utf8_shrink:
f337b084
TH
2278 while (len > 0) {
2279 if (cur <= start)
28be1210
TH
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2282 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2283 if (cur <= start)
28be1210
TH
2284 Perl_croak(aTHX_ "'%c' outside of string in pack",
2285 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2286 }
2287 len--;
2288 }
2289 } else {
fc241834 2290 shrink:
f337b084 2291 if (cur - start < len)
28be1210
TH
2292 Perl_croak(aTHX_ "'%c' outside of string in pack",
2293 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2294 cur -= len;
2295 }
2296 if (cur < start+symptr->strbeg) {
2297 /* Make sure group starts don't point into the void */
2298 tempsym_t *group;
9e27e96a 2299 const STRLEN length = cur-start;
f337b084
TH
2300 for (group = symptr;
2301 group && length < group->strbeg;
2302 group = group->previous) group->strbeg = length;
2303 lookahead.strbeg = length;
2304 }
a6ec74c1 2305 break;
fc241834
RGS
2306 case 'x' | TYPE_IS_SHRIEKING: {
2307 I32 ai32;
62f95557
IZ
2308 if (!len) /* Avoid division by 0 */
2309 len = 1;
230e1fce 2310 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2311 else ai32 = (cur - start) % len;
2312 if (ai32 == 0) goto no_change;
2313 len -= ai32;
2314 }
2315 /* FALL THROUGH */
a6ec74c1 2316 case 'x':
f337b084 2317 goto grow;
a6ec74c1
JH
2318 case 'A':
2319 case 'Z':
f337b084 2320 case 'a': {
f7fe979e 2321 const char *aptr;
f337b084 2322
a6ec74c1 2323 fromstr = NEXTFROM;
e62f0680 2324 aptr = SvPV_const(fromstr, fromlen);
f337b084 2325 if (DO_UTF8(fromstr)) {
f7fe979e 2326 const char *end, *s;
f337b084
TH
2327
2328 if (!utf8 && !SvUTF8(cat)) {
2329 marked_upgrade(aTHX_ cat, symptr);
2330 lookahead.flags |= FLAG_DO_UTF8;
2331 lookahead.strbeg = symptr->strbeg;
2332 utf8 = 1;
2333 start = SvPVX(cat);
2334 cur = start + SvCUR(cat);
2335 }
fc241834 2336 if (howlen == e_star) {
f337b084
TH
2337 if (utf8) goto string_copy;
2338 len = fromlen+1;
2339 }
2340 s = aptr;
2341 end = aptr + fromlen;
2342 fromlen = datumtype == 'Z' ? len-1 : len;
2343 while ((I32) fromlen > 0 && s < end) {
2344 s += UTF8SKIP(s);
2345 fromlen--;
2346 }
2347 if (s > end)
2348 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2349 if (utf8) {
fc241834 2350 len = fromlen;
f337b084
TH
2351 if (datumtype == 'Z') len++;
2352 fromlen = s-aptr;
2353 len += fromlen;
fc241834 2354
f337b084 2355 goto string_copy;
fc241834 2356 }
f337b084
TH
2357 fromlen = len - fromlen;
2358 if (datumtype == 'Z') fromlen--;
2359 if (howlen == e_star) {
2360 len = fromlen;
2361 if (datumtype == 'Z') len++;
fc241834 2362 }
f337b084 2363 GROWING(0, cat, start, cur, len);
fc241834 2364 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2365 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2366 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2367 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2368 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2369 cur += fromlen;
a6ec74c1 2370 len -= fromlen;
f337b084
TH
2371 } else if (utf8) {
2372 if (howlen == e_star) {
2373 len = fromlen;
2374 if (datumtype == 'Z') len++;
a6ec74c1 2375 }
f337b084
TH
2376 if (len <= (I32) fromlen) {
2377 fromlen = len;
2378 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2379 }
fc241834 2380 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2381 upgrade, so:
2382 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2383 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2384 len -= fromlen;
2385 while (fromlen > 0) {
230e1fce 2386 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2387 aptr++;
2388 fromlen--;
fc241834 2389 }
f337b084
TH
2390 } else {
2391 string_copy:
2392 if (howlen == e_star) {
2393 len = fromlen;
2394 if (datumtype == 'Z') len++;
2395 }
2396 if (len <= (I32) fromlen) {
2397 fromlen = len;
2398 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2399 }
f337b084
TH
2400 GROWING(0, cat, start, cur, len);
2401 Copy(aptr, cur, fromlen, char);
2402 cur += fromlen;
2403 len -= fromlen;
a6ec74c1 2404 }
f337b084
TH
2405 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2406 cur += len;
3c4fb04a 2407 SvTAINT(cat);
a6ec74c1 2408 break;
f337b084 2409 }
a6ec74c1 2410 case 'B':
f337b084 2411 case 'b': {
b83604b4 2412 const char *str, *end;
f337b084
TH
2413 I32 l, field_len;
2414 U8 bits;
2415 bool utf8_source;
2416 U32 utf8_flags;
a6ec74c1 2417
fc241834 2418 fromstr = NEXTFROM;
b83604b4 2419 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2420 end = str + fromlen;
2421 if (DO_UTF8(fromstr)) {
2422 utf8_source = TRUE;
041457d9 2423 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2424 } else {
2425 utf8_source = FALSE;
2426 utf8_flags = 0; /* Unused, but keep compilers happy */
2427 }
2428 if (howlen == e_star) len = fromlen;
2429 field_len = (len+7)/8;
2430 GROWING(utf8, cat, start, cur, field_len);
2431 if (len > (I32)fromlen) len = fromlen;
2432 bits = 0;
2433 l = 0;
2434 if (datumtype == 'B')
2435 while (l++ < len) {
2436 if (utf8_source) {
95b63a38 2437 UV val = 0;
f337b084
TH
2438 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2439 bits |= val & 1;
2440 } else bits |= *str++ & 1;
2441 if (l & 7) bits <<= 1;
fc241834 2442 else {
f337b084
TH
2443 PUSH_BYTE(utf8, cur, bits);
2444 bits = 0;
a6ec74c1
JH
2445 }
2446 }
f337b084
TH
2447 else
2448 /* datumtype == 'b' */
2449 while (l++ < len) {
2450 if (utf8_source) {
95b63a38 2451 UV val = 0;
f337b084
TH
2452 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2453 if (val & 1) bits |= 0x80;
2454 } else if (*str++ & 1)
2455 bits |= 0x80;
2456 if (l & 7) bits >>= 1;
fc241834 2457 else {
f337b084
TH
2458 PUSH_BYTE(utf8, cur, bits);
2459 bits = 0;
a6ec74c1
JH
2460 }
2461 }
f337b084
TH
2462 l--;
2463 if (l & 7) {
fc241834 2464 if (datumtype == 'B')
f337b084 2465 bits <<= 7 - (l & 7);
fc241834 2466 else
f337b084
TH
2467 bits >>= 7 - (l & 7);
2468 PUSH_BYTE(utf8, cur, bits);
2469 l += 7;
a6ec74c1 2470 }
f337b084
TH
2471 /* Determine how many chars are left in the requested field */
2472 l /= 8;
2473 if (howlen == e_star) field_len = 0;
2474 else field_len -= l;
2475 Zero(cur, field_len, char);
2476 cur += field_len;
a6ec74c1 2477 break;
f337b084 2478 }
a6ec74c1 2479 case 'H':
f337b084 2480 case 'h': {
10516c54 2481 const char *str, *end;
f337b084
TH
2482 I32 l, field_len;
2483 U8 bits;
2484 bool utf8_source;
2485 U32 utf8_flags;
a6ec74c1 2486
fc241834 2487 fromstr = NEXTFROM;
10516c54 2488 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2489 end = str + fromlen;
2490 if (DO_UTF8(fromstr)) {
2491 utf8_source = TRUE;
041457d9 2492 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2493 } else {
2494 utf8_source = FALSE;
2495 utf8_flags = 0; /* Unused, but keep compilers happy */
2496 }
2497 if (howlen == e_star) len = fromlen;
2498 field_len = (len+1)/2;
2499 GROWING(utf8, cat, start, cur, field_len);
2500 if (!utf8 && len > (I32)fromlen) len = fromlen;
2501 bits = 0;
2502 l = 0;
2503 if (datumtype == 'H')
2504 while (l++ < len) {
2505 if (utf8_source) {
95b63a38 2506 UV val = 0;
f337b084
TH
2507 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2508 if (val < 256 && isALPHA(val))
2509 bits |= (val + 9) & 0xf;
a6ec74c1 2510 else
f337b084
TH
2511 bits |= val & 0xf;
2512 } else if (isALPHA(*str))
2513 bits |= (*str++ + 9) & 0xf;
2514 else
2515 bits |= *str++ & 0xf;
2516 if (l & 1) bits <<= 4;
fc241834 2517 else {
f337b084
TH
2518 PUSH_BYTE(utf8, cur, bits);
2519 bits = 0;
a6ec74c1
JH
2520 }
2521 }
f337b084
TH
2522 else
2523 while (l++ < len) {
2524 if (utf8_source) {
95b63a38 2525 UV val = 0;
f337b084
TH
2526 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2527 if (val < 256 && isALPHA(val))
2528 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2529 else
f337b084
TH
2530 bits |= (val & 0xf) << 4;
2531 } else if (isALPHA(*str))
2532 bits |= ((*str++ + 9) & 0xf) << 4;
2533 else
2534 bits |= (*str++ & 0xf) << 4;
2535 if (l & 1) bits >>= 4;
fc241834 2536 else {
f337b084
TH
2537 PUSH_BYTE(utf8, cur, bits);
2538 bits = 0;
a6ec74c1 2539 }
fc241834 2540 }
f337b084
TH
2541 l--;
2542 if (l & 1) {
2543 PUSH_BYTE(utf8, cur, bits);
2544 l++;
2545 }
2546 /* Determine how many chars are left in the requested field */
2547 l /= 2;
2548 if (howlen == e_star) field_len = 0;
2549 else field_len -= l;
2550 Zero(cur, field_len, char);
2551 cur += field_len;
2552 break;
fc241834
RGS
2553 }
2554 case 'c':
f337b084
TH
2555 while (len-- > 0) {
2556 IV aiv;
2557 fromstr = NEXTFROM;
2558 aiv = SvIV(fromstr);
a2a5de95
NC
2559 if ((-128 > aiv || aiv > 127))
2560 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2561 "Character in 'c' format wrapped in pack");
585ec06d 2562 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2563 }
2564 break;
2565 case 'C':
f337b084
TH
2566 if (len == 0) {
2567 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2568 break;
2569 }
a6ec74c1 2570 while (len-- > 0) {
f337b084 2571 IV aiv;
a6ec74c1 2572 fromstr = NEXTFROM;
f337b084 2573 aiv = SvIV(fromstr);
a2a5de95
NC
2574 if ((0 > aiv || aiv > 0xff))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'C' format wrapped in pack");
1651fc44 2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2578 }
fc241834
RGS
2579 break;
2580 case 'W': {
2581 char *end;
670f1322 2582 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2583
2584 end = start+SvLEN(cat)-1;
2585 if (utf8) end -= UTF8_MAXLEN-1;
2586 while (len-- > 0) {
2587 UV auv;
2588 fromstr = NEXTFROM;
2589 auv = SvUV(fromstr);
2590 if (in_bytes) auv = auv % 0x100;
2591 if (utf8) {
2592 W_utf8:
2593 if (cur > end) {
2594 *cur = '\0';
b162af07 2595 SvCUR_set(cat, cur - start);
fc241834
RGS
2596
2597 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2598 end = start+SvLEN(cat)-UTF8_MAXLEN;
2599 }
230e1fce
NC
2600 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2601 NATIVE_TO_UNI(auv),
041457d9 2602 warn_utf8 ?
230e1fce 2603 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2604 } else {
2605 if (auv >= 0x100) {
2606 if (!SvUTF8(cat)) {
2607 *cur = '\0';
b162af07 2608 SvCUR_set(cat, cur - start);
fc241834
RGS
2609 marked_upgrade(aTHX_ cat, symptr);
2610 lookahead.flags |= FLAG_DO_UTF8;
2611 lookahead.strbeg = symptr->strbeg;
2612 utf8 = 1;
2613 start = SvPVX(cat);
2614 cur = start + SvCUR(cat);
2615 end = start+SvLEN(cat)-UTF8_MAXLEN;
2616 goto W_utf8;
2617 }
a2a5de95
NC
2618 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2619 "Character in 'W' format wrapped in pack");
fc241834
RGS
2620 auv &= 0xff;
2621 }
2622 if (cur >= end) {
2623 *cur = '\0';
b162af07 2624 SvCUR_set(cat, cur - start);
fc241834
RGS
2625 GROWING(0, cat, start, cur, len+1);
2626 end = start+SvLEN(cat)-1;
2627 }
fe2774ed 2628 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2629 }
2630 }
2631 break;
fc241834
RGS
2632 }
2633 case 'U': {
2634 char *end;
2635
2636 if (len == 0) {
2637 if (!(symptr->flags & FLAG_DO_UTF8)) {
2638 marked_upgrade(aTHX_ cat, symptr);
2639 lookahead.flags |= FLAG_DO_UTF8;
2640 lookahead.strbeg = symptr->strbeg;
2641 }
2642 utf8 = 0;
2643 goto no_change;
2644 }
2645
2646 end = start+SvLEN(cat);
2647 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2648 while (len-- > 0) {
fc241834 2649 UV auv;
a6ec74c1 2650 fromstr = NEXTFROM;
fc241834
RGS
2651 auv = SvUV(fromstr);
2652 if (utf8) {
230e1fce 2653 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2654 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2655 warn_utf8 ?
fc241834
RGS
2656 0 : UNICODE_ALLOW_ANY);
2657 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2658 *cur = '\0';
b162af07 2659 SvCUR_set(cat, cur - start);
fc241834
RGS
2660 GROWING(0, cat, start, cur,
2661 len+(endb-buffer)*UTF8_EXPAND);
2662 end = start+SvLEN(cat);
2663 }
3a88beaa 2664 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
fc241834
RGS
2665 } else {
2666 if (cur >= end) {
2667 *cur = '\0';
b162af07 2668 SvCUR_set(cat, cur - start);
fc241834
RGS
2669 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2670 end = start+SvLEN(cat)-UTF8_MAXLEN;
2671 }
230e1fce 2672 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2673 warn_utf8 ?
230e1fce 2674 0 : UNICODE_ALLOW_ANY);
fc241834 2675 }
a6ec74c1 2676 }
a6ec74c1 2677 break;
fc241834 2678 }
a6ec74c1
JH
2679 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2680 case 'f':
a6ec74c1 2681 while (len-- > 0) {
f337b084
TH
2682 float afloat;
2683 NV anv;
a6ec74c1 2684 fromstr = NEXTFROM;
f337b084 2685 anv = SvNV(fromstr);
85bba25f 2686# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2687 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2688 * on Alpha; fake it if we don't have them.
2689 */
f337b084 2690 if (anv > FLT_MAX)
fc241834 2691 afloat = FLT_MAX;
f337b084 2692 else if (anv < -FLT_MAX)
fc241834 2693 afloat = -FLT_MAX;
f337b084 2694 else afloat = (float)anv;
baf3cf9c 2695# else
f337b084 2696 afloat = (float)anv;
baf3cf9c 2697# endif
3a88beaa 2698 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2699 }
2700 break;
2701 case 'd':
a6ec74c1 2702 while (len-- > 0) {
f337b084
TH
2703 double adouble;
2704 NV anv;
a6ec74c1 2705 fromstr = NEXTFROM;
f337b084 2706 anv = SvNV(fromstr);
85bba25f 2707# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2708 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2709 * on Alpha; fake it if we don't have them.
2710 */
f337b084 2711 if (anv > DBL_MAX)
fc241834 2712 adouble = DBL_MAX;
f337b084 2713 else if (anv < -DBL_MAX)
fc241834 2714 adouble = -DBL_MAX;
f337b084 2715 else adouble = (double)anv;
baf3cf9c 2716# else
f337b084 2717 adouble = (double)anv;
baf3cf9c 2718# endif
3a88beaa 2719 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2720 }
2721 break;
fc241834 2722 case 'F': {
275663fa 2723 NV_bytes anv;
1109a392 2724 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2725 while (len-- > 0) {
2726 fromstr = NEXTFROM;
cd07c537
DM
2727#ifdef __GNUC__
2728 /* to work round a gcc/x86 bug; don't use SvNV */
2729 anv.nv = sv_2nv(fromstr);
2730#else
275663fa 2731 anv.nv = SvNV(fromstr);
cd07c537 2732#endif
3a88beaa 2733 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2734 }
2735 break;
fc241834 2736 }
92d41999 2737#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2738 case 'D': {
275663fa 2739 ld_bytes aldouble;
1109a392
MHM
2740 /* long doubles can have unused bits, which may be nonzero */
2741 Zero(&aldouble, 1, long double);
92d41999
JH
2742 while (len-- > 0) {
2743 fromstr = NEXTFROM;
cd07c537
DM
2744# ifdef __GNUC__
2745 /* to work round a gcc/x86 bug; don't use SvNV */
2746 aldouble.ld = (long double)sv_2nv(fromstr);
2747# else
275663fa 2748 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2749# endif
3a88beaa
NC
2750 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2751 needs_swap);
92d41999
JH
2752 }
2753 break;
fc241834 2754 }
92d41999 2755#endif
068bd2e7 2756 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2757 case 'n':
2758 while (len-- > 0) {
f337b084 2759 I16 ai16;
a6ec74c1 2760 fromstr = NEXTFROM;
ef108786 2761 ai16 = (I16)SvIV(fromstr);
ef108786 2762 ai16 = PerlSock_htons(ai16);
3a88beaa 2763 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2764 }
2765 break;
068bd2e7 2766 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2767 case 'v':
2768 while (len-- > 0) {
f337b084 2769 I16 ai16;
a6ec74c1 2770 fromstr = NEXTFROM;
ef108786 2771 ai16 = (I16)SvIV(fromstr);
ef108786 2772 ai16 = htovs(ai16);
3a88beaa 2773 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2774 }
2775 break;
49704364 2776 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2777#if SHORTSIZE != SIZE16
fc241834 2778 while (len-- > 0) {
f337b084 2779 unsigned short aushort;
fc241834
RGS
2780 fromstr = NEXTFROM;
2781 aushort = SvUV(fromstr);
3a88beaa 2782 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2783 }
49704364
WL
2784 break;
2785#else
2786 /* Fall through! */
a6ec74c1 2787#endif
49704364 2788 case 'S':
fc241834 2789 while (len-- > 0) {
f337b084 2790 U16 au16;
fc241834
RGS
2791 fromstr = NEXTFROM;
2792 au16 = (U16)SvUV(fromstr);
3a88beaa 2793 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2794 }
2795 break;
49704364 2796 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2797#if SHORTSIZE != SIZE16
fc241834 2798 while (len-- > 0) {
f337b084 2799 short ashort;
fc241834
RGS
2800 fromstr = NEXTFROM;
2801 ashort = SvIV(fromstr);
3a88beaa 2802 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2803 }
49704364
WL
2804 break;
2805#else
2806 /* Fall through! */
a6ec74c1 2807#endif
49704364
WL
2808 case 's':
2809 while (len-- > 0) {
f337b084 2810 I16 ai16;
49704364 2811 fromstr = NEXTFROM;
ef108786 2812 ai16 = (I16)SvIV(fromstr);
3a88beaa 2813 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2814 }
2815 break;
2816 case 'I':
49704364 2817 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2818 while (len-- > 0) {
f337b084 2819 unsigned int auint;
a6ec74c1
JH
2820 fromstr = NEXTFROM;
2821 auint = SvUV(fromstr);
3a88beaa 2822 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2823 }
2824 break;
92d41999
JH
2825 case 'j':
2826 while (len-- > 0) {
f337b084 2827 IV aiv;
92d41999
JH
2828 fromstr = NEXTFROM;
2829 aiv = SvIV(fromstr);
3a88beaa 2830 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2831 }
2832 break;
2833 case 'J':
2834 while (len-- > 0) {
f337b084 2835 UV auv;
92d41999
JH
2836 fromstr = NEXTFROM;
2837 auv = SvUV(fromstr);
3a88beaa 2838 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2839 }
2840 break;
a6ec74c1
JH
2841 case 'w':
2842 while (len-- > 0) {
f337b084 2843 NV anv;
a6ec74c1 2844 fromstr = NEXTFROM;
15e9f109 2845 anv = SvNV(fromstr);
a6ec74c1 2846
f337b084
TH
2847 if (anv < 0) {
2848 *cur = '\0';
b162af07 2849 SvCUR_set(cat, cur - start);
49704364 2850 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2851 }
a6ec74c1 2852
196b62db
NC
2853 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2854 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2855 any negative IVs will have already been got by the croak()
2856 above. IOK is untrue for fractions, so we test them
2857 against UV_MAX_P1. */
f337b084
TH
2858 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2859 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2860 char *in = buf + sizeof(buf);
196b62db 2861 UV auv = SvUV(fromstr);
a6ec74c1
JH
2862
2863 do {
eb160463 2864 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2865 auv >>= 7;
2866 } while (auv);
2867 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2868 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2869 in, (buf + sizeof(buf)) - in);
2870 } else if (SvPOKp(fromstr))
2871 goto w_string;
a6ec74c1 2872 else if (SvNOKp(fromstr)) {
0258719b 2873 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2874 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2875 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2876 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2877 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2878 Some C compilers are strict about integral constant
2879 expressions so we conservatively divide by a slightly
2880 smaller integer instead of multiplying by the exact
2881 floating-point value.
0258719b
NC
2882 */
2883#ifdef NV_MAX_10_EXP
f337b084 2884 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2885 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2886#else
f337b084 2887 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2888 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2889#endif
a6ec74c1
JH
2890 char *in = buf + sizeof(buf);
2891
8b6e33c7 2892 anv = Perl_floor(anv);
a6ec74c1 2893 do {
8b6e33c7 2894 const NV next = Perl_floor(anv / 128);
a6ec74c1 2895 if (in <= buf) /* this cannot happen ;-) */
49704364 2896 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2897 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2898 anv = next;
2899 } while (anv > 0);
a6ec74c1 2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2903 } else {
8b6e33c7
AL
2904 const char *from;
2905 char *result, *in;
735b914b
JH
2906 SV *norm;
2907 STRLEN len;
2908 bool done;
2909
f337b084 2910 w_string:
735b914b 2911 /* Copy string and check for compliance */
349d4f2f 2912 from = SvPV_const(fromstr, len);
735b914b 2913 if ((norm = is_an_int(from, len)) == NULL)
49704364 2914 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2915
a02a5408 2916 Newx(result, len, char);
735b914b
JH
2917 in = result + len;
2918 done = FALSE;
f337b084 2919 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2920 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2921 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2922 in, (result + len) - in);
735b914b
JH
2923 Safefree(result);
2924 SvREFCNT_dec(norm); /* free norm */
fc241834 2925 }
a6ec74c1
JH
2926 }
2927 break;
2928 case 'i':
49704364 2929 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2930 while (len-- > 0) {
f337b084 2931 int aint;
a6ec74c1
JH
2932 fromstr = NEXTFROM;
2933 aint = SvIV(fromstr);
3a88beaa 2934 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2935 }
2936 break;
068bd2e7 2937 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2938 case 'N':
2939 while (len-- > 0) {
f337b084 2940 U32 au32;
a6ec74c1 2941 fromstr = NEXTFROM;
ef108786 2942 au32 = SvUV(fromstr);
ef108786 2943 au32 = PerlSock_htonl(au32);
3a88beaa 2944 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2945 }
2946 break;
068bd2e7 2947 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2948 case 'V':
2949 while (len-- > 0) {
f337b084 2950 U32 au32;
a6ec74c1 2951 fromstr = NEXTFROM;
ef108786 2952 au32 = SvUV(fromstr);
ef108786 2953 au32 = htovl(au32);
3a88beaa 2954 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2955 }
2956 break;
49704364 2957 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2958#if LONGSIZE != SIZE32
fc241834 2959 while (len-- > 0) {
f337b084 2960 unsigned long aulong;
fc241834
RGS
2961 fromstr = NEXTFROM;
2962 aulong = SvUV(fromstr);
3a88beaa 2963 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2964 }
49704364
WL
2965 break;
2966#else
2967 /* Fall though! */
a6ec74c1 2968#endif
49704364 2969 case 'L':
fc241834 2970 while (len-- > 0) {
f337b084 2971 U32 au32;
fc241834
RGS
2972 fromstr = NEXTFROM;
2973 au32 = SvUV(fromstr);
3a88beaa 2974 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2975 }
2976 break;
49704364 2977 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2978#if LONGSIZE != SIZE32
fc241834 2979 while (len-- > 0) {
f337b084 2980 long along;
fc241834
RGS
2981 fromstr = NEXTFROM;
2982 along = SvIV(fromstr);
3a88beaa 2983 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2984 }
49704364
WL
2985 break;
2986#else
2987 /* Fall though! */
a6ec74c1 2988#endif
49704364
WL
2989 case 'l':
2990 while (len-- > 0) {
f337b084 2991 I32 ai32;
49704364 2992 fromstr = NEXTFROM;
ef108786 2993 ai32 = SvIV(fromstr);
3a88beaa 2994 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
2995 }
2996 break;
2997#ifdef HAS_QUAD
2998 case 'Q':
2999 while (len-- > 0) {
f337b084 3000 Uquad_t auquad;
a6ec74c1 3001 fromstr = NEXTFROM;
f337b084 3002 auquad = (Uquad_t) SvUV(fromstr);
3a88beaa 3003 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3004 }
3005 break;
3006 case 'q':
3007 while (len-- > 0) {
f337b084 3008 Quad_t aquad;
a6ec74c1
JH
3009 fromstr = NEXTFROM;
3010 aquad = (Quad_t)SvIV(fromstr);
3a88beaa 3011 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3012 }
3013 break;
f337b084 3014#endif /* HAS_QUAD */
a6ec74c1
JH
3015 case 'P':
3016 len = 1; /* assume SV is correct length */
f337b084 3017 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3018 /* Fall through! */
a6ec74c1
JH
3019 case 'p':
3020 while (len-- > 0) {
83003860 3021 const char *aptr;
f337b084 3022
a6ec74c1 3023 fromstr = NEXTFROM;
28a4f200
TH
3024 SvGETMAGIC(fromstr);
3025 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3026 else {
a6ec74c1
JH
3027 /* XXX better yet, could spirit away the string to
3028 * a safe spot and hang on to it until the result
3029 * of pack() (and all copies of the result) are
3030 * gone.
3031 */
041457d9 3032 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3033 !SvREADONLY(fromstr)))) {
3034 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3035 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3036 }
3037 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3038 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3039 else
2596d9fe 3040 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3041 }
3a88beaa 3042 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3043 }
3044 break;
fc241834 3045 case 'u': {
f7fe979e 3046 const char *aptr, *aend;
fc241834 3047 bool from_utf8;
f337b084 3048
a6ec74c1 3049 fromstr = NEXTFROM;
fc241834
RGS
3050 if (len <= 2) len = 45;
3051 else len = len / 3 * 3;
3052 if (len >= 64) {
a2a5de95
NC
3053 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3054 "Field too wide in 'u' format in pack");
fc241834
RGS
3055 len = 63;
3056 }
83003860 3057 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3058 from_utf8 = DO_UTF8(fromstr);
3059 if (from_utf8) {
3060 aend = aptr + fromlen;
3f63b0e5 3061 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3062 } else aend = NULL; /* Unused, but keep compilers happy */
3063 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3064 while (fromlen > 0) {
fc241834 3065 U8 *end;
a6ec74c1 3066 I32 todo;
fc241834 3067 U8 hunk[1+63/3*4+1];
a6ec74c1 3068
eb160463 3069 if ((I32)fromlen > len)
a6ec74c1
JH
3070 todo = len;
3071 else
3072 todo = fromlen;
fc241834
RGS
3073 if (from_utf8) {
3074 char buffer[64];
3075 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3076 'u' | TYPE_IS_PACK)) {
3077 *cur = '\0';
b162af07 3078 SvCUR_set(cat, cur - start);
5637ef5b
NC
3079 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3080 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3081 aptr, aend, buffer, (long) todo);
fc241834
RGS
3082 }
3083 end = doencodes(hunk, buffer, todo);
3084 } else {
3085 end = doencodes(hunk, aptr, todo);
3086 aptr += todo;
3087 }
3a88beaa 3088 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3089 fromlen -= todo;
3090 }
a6ec74c1
JH
3091 break;
3092 }
f337b084
TH
3093 }
3094 *cur = '\0';
b162af07 3095 SvCUR_set(cat, cur - start);
f337b084 3096 no_change:
49704364 3097 *symptr = lookahead;
a6ec74c1 3098 }
49704364 3099 return beglist;
18529408
IZ
3100}
3101#undef NEXTFROM
3102
3103
3104PP(pp_pack)
3105{
97aff369 3106 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3107 SV *cat = TARG;
18529408 3108 STRLEN fromlen;
349d4f2f 3109 SV *pat_sv = *++MARK;
eb578fdb
KW
3110 const char *pat = SvPV_const(pat_sv, fromlen);
3111 const char *patend = pat + fromlen;
18529408
IZ
3112
3113 MARK++;
76f68e9b 3114 sv_setpvs(cat, "");
f337b084 3115 SvUTF8_off(cat);
18529408 3116
7accc089 3117 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3118
a6ec74c1
JH
3119 SvSETMAGIC(cat);
3120 SP = ORIGMARK;
3121 PUSHs(cat);
3122 RETURN;
3123}
a6ec74c1 3124
73cb7263
NC
3125/*
3126 * Local variables:
3127 * c-indentation-style: bsd
3128 * c-basic-offset: 4
14d04a33 3129 * indent-tabs-mode: nil
73cb7263
NC
3130 * End:
3131 *
14d04a33 3132 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3133 */