This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove Module-Build remnants
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
f337b084 154 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e
NC
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
78d46eaa 244#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
246#define PACK_SIZE_MASK 0x3F
247
298bc19c 248#include "packsizetables.c"
78d46eaa 249
20aa3a38
NC
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
08ca2aa3 258STATIC U8
f7fe979e 259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 260{
08ca2aa3 261 STRLEN retlen;
0bcc34c2 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 264 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 270 if (val >= 0x100) {
a2a5de95
NC
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
274 val &= 0xff;
275 }
276 *s += retlen;
fe2774ed 277 return (U8)val;
08ca2aa3
TH
278}
279
f337b084
TH
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
282 *(U8 *)(s)++)
283
284STATIC bool
f7fe979e 285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
286{
287 UV val;
288 STRLEN retlen;
f7fe979e 289 const char *from = *s;
08ca2aa3 290 int bad = 0;
f7fe979e 291 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
228e69a7 295 if (UNLIKELY(needs_swap))
20aa3a38
NC
296 buf += buf_len;
297
08ca2aa3
TH
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
f337b084 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
228e69a7 309 if (UNLIKELY(needs_swap))
20aa3a38
NC
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
f7fe979e 318 const char *ptr;
9e27e96a 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
c80e42f3 322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
323 }
324 if (from > end) from = end;
325 }
a2a5de95
NC
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 328 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
f7fe979e 338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 339{
97aff369 340 dVAR;
08ca2aa3 341 STRLEN retlen;
0bcc34c2 342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
345 *out = 0;
346 return FALSE;
347 }
348 *out = PL_uudmap[val] & 077;
f337b084 349 *s += retlen;
08ca2aa3
TH
350 return TRUE;
351}
78d46eaa 352
64844641 353STATIC char *
3a88beaa 354S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
7918f24d
NC
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
356
228e69a7 357 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
358 const U8 *p = start + len;
359 while (p-- > start) {
55d09dc8 360 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
361 }
362 } else {
363 const U8 * const end = start + len;
364 while (start < end) {
55d09dc8 365 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
366 start++;
367 }
f337b084 368 }
64844641 369 return dest;
f337b084
TH
370}
371
3a88beaa 372#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 373STMT_START { \
228e69a7 374 if (UNLIKELY(utf8)) \
3a88beaa 375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
230e1fce 376 else { \
228e69a7 377 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
378 S_reverse_copy((char *)(buf), cur, len); \
379 else \
380 Copy(buf, cur, len, char); \
230e1fce
NC
381 (cur) += (len); \
382 } \
f337b084
TH
383} STMT_END
384
385#define GROWING(utf8, cat, start, cur, in_len) \
386STMT_START { \
387 STRLEN glen = (in_len); \
3473cf63 388 if (utf8) glen *= UTF8_EXPAND; \
f337b084 389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 390 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
391 (cur) = (start) + SvCUR(cat); \
392 } \
393} STMT_END
394
395#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
396STMT_START { \
f7fe979e 397 const STRLEN glen = (in_len); \
f337b084 398 STRLEN gl = glen; \
3473cf63 399 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
401 *cur = '\0'; \
b162af07 402 SvCUR_set((cat), (cur) - (start)); \
0bd48802 403 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
404 (cur) = (start) + SvCUR(cat); \
405 } \
3a88beaa 406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
407} STMT_END
408
409#define PUSH_BYTE(utf8, s, byte) \
410STMT_START { \
411 if (utf8) { \
f7fe979e 412 const U8 au8 = (byte); \
3a88beaa 413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
f337b084
TH
414 } else *(U8 *)(s)++ = (byte); \
415} STMT_END
416
417/* Only to be used inside a loop (see the break) */
418#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
419STMT_START { \
420 STRLEN retlen; \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
424 *cur = '\0'; \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
426 } \
427 str += retlen; \
428} STMT_END
429
f7fe979e
AL
430static const char *_action( const tempsym_t* symptr )
431{
10edeb5d 432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
433}
434
206947d2 435/* Returns the sizeof() struct described by pat */
028d1f6d 436STATIC I32
f337b084 437S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 438{
f337b084 439 I32 total = 0;
206947d2 440
7918f24d
NC
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
442
49704364 443 while (next_symbol(symptr)) {
f337b084 444 I32 len;
f7fe979e 445 int size;
f337b084
TH
446
447 switch (symptr->howlen) {
fc241834 448 case e_star:
49704364 449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 450 _action( symptr ) );
49704364 451 break;
f337b084
TH
452 default:
453 /* e_no_len and e_number */
454 len = symptr->length;
455 break;
49704364
WL
456 }
457
a7a3cfaa 458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 459 if (!size) {
f7fe979e 460 int star;
80a13697
NC
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
463 default:
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 466 _action( symptr ) );
28be1210
TH
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
80a13697 469 case '@':
28be1210 470 case '.':
80a13697
NC
471 case '/':
472 case 'U': /* XXXX Is it correct? */
473 case 'w':
474 case 'u':
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 476 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 477 _action( symptr ) );
80a13697
NC
478 case '%':
479 size = 0;
480 break;
481 case '(':
fc241834
RGS
482 {
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
491 *symptr = savsym;
492 break;
493 }
80a13697
NC
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
496 */
497 if (!len) /* Avoid division by 0 */
498 len = 1;
499 len = total % len; /* Assumed: the start is aligned. */
924ba076 500 /* FALLTHROUGH */
80a13697
NC
501 case 'X':
502 size = -1;
503 if (total < len)
f7fe979e 504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
505 break;
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
508 len = 1;
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
511 len = len - star;
512 else
513 len = 0;
924ba076 514 /* FALLTHROUGH */
80a13697
NC
515 case 'x':
516 case 'A':
517 case 'Z':
518 case 'a':
80a13697
NC
519 size = 1;
520 break;
521 case 'B':
522 case 'b':
523 len = (len + 7)/8;
524 size = 1;
525 break;
526 case 'H':
527 case 'h':
528 len = (len + 1)/2;
529 size = 1;
530 break;
78d46eaa 531
80a13697
NC
532 case 'P':
533 len = 1;
534 size = sizeof(char*);
78d46eaa
NC
535 break;
536 }
206947d2
IZ
537 }
538 total += len * size;
539 }
540 return total;
541}
542
49704364
WL
543
544/* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
546 */
f7fe979e 547STATIC const char *
5aaab254 548S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 549{
7918f24d
NC
550 PERL_ARGS_ASSERT_GROUP_END;
551
49704364 552 while (patptr < patend) {
f7fe979e 553 const char c = *patptr++;
49704364
WL
554
555 if (isSPACE(c))
556 continue;
557 else if (c == ender)
558 return patptr-1;
559 else if (c == '#') {
560 while (patptr < patend && *patptr != '\n')
561 patptr++;
562 continue;
563 } else if (c == '(')
564 patptr = group_end(patptr, patend, ')') + 1;
565 else if (c == '[')
566 patptr = group_end(patptr, patend, ']') + 1;
18529408 567 }
49704364
WL
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
569 ender);
570 return 0;
18529408
IZ
571}
572
49704364
WL
573
574/* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
fc241834 577 */
f7fe979e 578STATIC const char *
5aaab254 579S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
580{
581 I32 len = *patptr++ - '0';
7918f24d
NC
582
583 PERL_ARGS_ASSERT_GET_NUM;
584
49704364
WL
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
589 }
590 *lenptr = len;
591 return patptr;
592}
593
594/* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
596 */
597STATIC bool
f337b084 598S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 599{
f7fe979e 600 const char* patptr = symptr->patptr;
0bcc34c2 601 const char* const patend = symptr->patend;
49704364 602
7918f24d
NC
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
604
49704364
WL
605 symptr->flags &= ~FLAG_SLASH;
606
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
609 patptr++;
610 else if (*patptr == '#') {
611 patptr++;
612 while (patptr < patend && *patptr != '\n')
613 patptr++;
614 if (patptr < patend)
615 patptr++;
616 } else {
fc241834 617 /* We should have found a template code */
49704364 618 I32 code = *patptr++ & 0xFF;
66c611c5 619 U32 inherited_modifiers = 0;
49704364
WL
620
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 625 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
626 }
627 continue;
628 }
fc241834 629
49704364 630 /* for '(', skip to ')' */
fc241834 631 if (code == '(') {
49704364
WL
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 634 _action( symptr ) );
49704364
WL
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 639 _action( symptr ) );
49704364
WL
640 }
641
66c611c5
MHM
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
646 }
647
1109a392
MHM
648 /* look for modifiers */
649 while (patptr < patend) {
b7787f18
AL
650 const char *allowed;
651 I32 modifier;
1109a392
MHM
652 switch (*patptr) {
653 case '!':
654 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 655 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
656 break;
657 case '>':
658 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 659 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
660 break;
661 case '<':
662 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 663 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
664 break;
665 default:
b7787f18
AL
666 allowed = "";
667 modifier = 0;
1109a392
MHM
668 break;
669 }
66c611c5 670
1109a392
MHM
671 if (modifier == 0)
672 break;
66c611c5 673
1109a392
MHM
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 676 allowed, _action( symptr ) );
66c611c5
MHM
677
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 684 *patptr, _action( symptr ) );
66c611c5 685
a2a5de95
NC
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 _action( symptr ) );
1109a392 691 }
66c611c5 692
1109a392
MHM
693 code |= modifier;
694 patptr++;
49704364
WL
695 }
696
66c611c5
MHM
697 /* inherit modifiers */
698 code |= inherited_modifiers;
699
fc241834 700 /* look for count and/or / */
49704364
WL
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
705
706 } else if (*patptr == '*') {
707 patptr++;
708 symptr->howlen = e_star;
709
710 } else if (*patptr == '[') {
f7fe979e 711 const char* lenptr = ++patptr;
49704364
WL
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
717 if( *lenptr != ']' )
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 719 _action( symptr ) );
49704364
WL
720 } else {
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
725 *symptr = savsym;
726 }
727 } else {
728 symptr->howlen = e_no_len;
729 symptr->length = 1;
730 }
731
732 /* try to find / */
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
735 patptr++;
736 else if (*patptr == '#') {
737 patptr++;
738 while (patptr < patend && *patptr != '\n')
739 patptr++;
740 if (patptr < patend)
741 patptr++;
742 } else {
66c611c5 743 if (*patptr == '/') {
49704364
WL
744 symptr->flags |= FLAG_SLASH;
745 patptr++;
66c611c5
MHM
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 749 _action( symptr ) );
49704364
WL
750 }
751 break;
752 }
18529408 753 }
49704364
WL
754 } else {
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
757 symptr->length = 1;
758 }
759
760 symptr->code = code;
fc241834 761 symptr->patptr = patptr;
49704364 762 return TRUE;
18529408 763 }
49704364 764 }
fc241834 765 symptr->patptr = patptr;
49704364 766 return FALSE;
18529408
IZ
767}
768
18529408 769/*
fc241834 770 There is no way to cleanly handle the case where we should process the
08ca2aa3 771 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
776 themselves if they need to do a lot of unpacks like this on it
777*/
fc241834 778STATIC bool
08ca2aa3
TH
779need_utf8(const char *pat, const char *patend)
780{
781 bool first = TRUE;
7918f24d
NC
782
783 PERL_ARGS_ASSERT_NEED_UTF8;
784
08ca2aa3
TH
785 while (pat < patend) {
786 if (pat[0] == '#') {
787 pat++;
f7fe979e 788 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
793 pat++;
794 }
795 return FALSE;
796}
797
798STATIC char
799first_symbol(const char *pat, const char *patend) {
7918f24d
NC
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801
08ca2aa3
TH
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
804 pat++;
f7fe979e 805 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
806 if (!pat) return 0;
807 pat++;
808 }
809 return 0;
810}
811
812/*
7accc089
JH
813=for apidoc unpackstring
814
21ebfc7a
DM
815The engine implementing the unpack() Perl function.
816
817Using the template pat..patend, this function unpacks the string
818s..strend into a number of mortal SVs, which it pushes onto the perl
819argument (@_) stack (so you will need to issue a C<PUTBACK> before and
72d33970 820C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
821pushed elements.
822
823The strend and patend pointers should point to the byte following the last
824character of each string.
825
826Although this function returns its values on the perl argument stack, it
827doesn't take any parameters from that stack (and thus in particular
828there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
829example).
7accc089
JH
830
831=cut */
832
833I32
f7fe979e 834Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 835{
f7fe979e 836 tempsym_t sym;
08ca2aa3 837
7918f24d
NC
838 PERL_ARGS_ASSERT_UNPACKSTRING;
839
f337b084 840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
230e1fce 845 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
846 SAVEFREEPV(s);
847 strend = s + len;
f337b084 848 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
849 }
850
f337b084
TH
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
08ca2aa3 853
f7fe979e 854 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
855
856 return unpack_rec(&sym, s, s, strend, NULL );
857}
858
4136a0f7 859STATIC I32
f7fe979e 860S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 861{
27da23d5 862 dVAR; dSP;
3297d27d 863 SV *sv = NULL;
f7fe979e 864 const I32 start_sp_offset = SP - PL_stack_base;
49704364 865 howlen_t howlen;
a6ec74c1 866 I32 checksum = 0;
92d41999 867 UV cuv = 0;
a6ec74c1 868 NV cdouble = 0.0;
f337b084 869 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 870 bool beyond = FALSE;
21c16052 871 bool explicit_length;
9e27e96a 872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
874
875 PERL_ARGS_ASSERT_UNPACK_REC;
876
28be1210 877 symptr->strbeg = s - strbeg;
49704364 878
49704364 879 while (next_symbol(symptr)) {
a7a3cfaa 880 packprops_t props;
9e27e96a 881 I32 len;
f337b084 882 I32 datumtype = symptr->code;
a1219b5e 883 bool needs_swap;
206947d2 884 /* do first one only unless in list context
08ca2aa3 885 / is implemented by unpacking the count, then popping it from the
206947d2 886 stack, so must check that we're not in the middle of a / */
49704364 887 if ( unpack_only_one
206947d2 888 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 889 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 890 break;
49704364 891
f337b084 892 switch (howlen = symptr->howlen) {
fc241834
RGS
893 case e_star:
894 len = strend - strbeg; /* long enough */
49704364 895 break;
f337b084
TH
896 default:
897 /* e_no_len and e_number */
898 len = symptr->length;
899 break;
49704364 900 }
18529408 901
21c16052 902 explicit_length = TRUE;
a6ec74c1 903 redo_switch:
49704364 904 beyond = s >= strend;
a7a3cfaa
TH
905
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
907 if (props) {
908 /* props nonzero means we can process this letter. */
9e27e96a
AL
909 const long size = props & PACK_SIZE_MASK;
910 const long howmany = (strend - s) / size;
a7a3cfaa
TH
911 if (len > howmany)
912 len = howmany;
913
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
916 EXTEND(SP, len);
917 EXTEND_MORTAL(len);
78d46eaa
NC
918 }
919 }
a7a3cfaa 920
a1219b5e
NC
921 needs_swap = NEEDS_SWAP(datumtype);
922
1109a392 923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 924 default:
1109a392 925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 926
a6ec74c1 927 case '%':
49704364 928 if (howlen == e_no_len)
18529408 929 len = 16; /* len is not specified */
a6ec74c1 930 checksum = len;
92d41999 931 cuv = 0;
a6ec74c1 932 cdouble = 0;
18529408 933 continue;
a6ec74c1 934 break;
18529408
IZ
935 case '(':
936 {
49704364 937 tempsym_t savsym = *symptr;
9e27e96a 938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 939 symptr->flags |= group_modifiers;
49704364 940 symptr->patend = savsym.grpend;
28be1210 941 symptr->previous = &savsym;
49704364 942 symptr->level++;
18529408 943 PUTBACK;
c6f750d1 944 if (len && unpack_only_one) len = 1;
18529408 945 while (len--) {
49704364 946 symptr->patptr = savsym.grpbeg;
f337b084
TH
947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
948 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
949 unpack_rec(symptr, s, strbeg, strend, &s);
950 if (s == strend && savsym.howlen == e_star)
49704364 951 break; /* No way to continue */
18529408
IZ
952 }
953 SPAGAIN;
28be1210 954 savsym.flags = symptr->flags & ~group_modifiers;
49704364 955 *symptr = savsym;
18529408
IZ
956 break;
957 }
28be1210 958 case '.' | TYPE_IS_SHRIEKING:
28be1210 959 case '.': {
9e27e96a 960 const char *from;
28be1210 961 SV *sv;
9e27e96a 962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
963 if (howlen == e_star) from = strbeg;
964 else if (len <= 0) from = s;
965 else {
966 tempsym_t *group = symptr;
967
968 while (--len && group) group = group->previous;
969 from = group ? strbeg + group->strbeg : strbeg;
970 }
971 sv = from <= s ?
00646304
CB
972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 974 mXPUSHs(sv);
28be1210
TH
975 break;
976 }
28be1210 977 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 978 case '@':
28be1210 979 s = strbeg + symptr->strbeg;
28be1210 980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 981 {
08ca2aa3
TH
982 while (len > 0) {
983 if (s >= strend)
984 Perl_croak(aTHX_ "'@' outside of string in unpack");
985 s += UTF8SKIP(s);
986 len--;
987 }
988 if (s > strend)
989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
990 } else {
28be1210 991 if (strend-s < len)
fc241834 992 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 993 s += len;
08ca2aa3 994 }
a6ec74c1 995 break;
62f95557
IZ
996 case 'X' | TYPE_IS_SHRIEKING:
997 if (!len) /* Avoid division by 0 */
998 len = 1;
08ca2aa3 999 if (utf8) {
f7fe979e 1000 const char *hop, *last;
f337b084
TH
1001 I32 l = len;
1002 hop = last = strbeg;
1003 while (hop < s) {
1004 hop += UTF8SKIP(hop);
1005 if (--l == 0) {
08ca2aa3 1006 last = hop;
f337b084
TH
1007 l = len;
1008 }
fc241834 1009 }
f337b084
TH
1010 if (last > s)
1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1012 s = last;
1013 break;
f337b084
TH
1014 }
1015 len = (s - strbeg) % len;
924ba076 1016 /* FALLTHROUGH */
a6ec74c1 1017 case 'X':
08ca2aa3
TH
1018 if (utf8) {
1019 while (len > 0) {
1020 if (s <= strbeg)
1021 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1022 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1023 if (s <= strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1025 }
1026 len--;
1027 }
1028 } else {
fc241834
RGS
1029 if (len > s - strbeg)
1030 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1031 s -= len;
08ca2aa3 1032 }
a6ec74c1 1033 break;
9e27e96a
AL
1034 case 'x' | TYPE_IS_SHRIEKING: {
1035 I32 ai32;
62f95557
IZ
1036 if (!len) /* Avoid division by 0 */
1037 len = 1;
230e1fce
NC
1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1039 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1040 if (ai32 == 0) break;
1041 len -= ai32;
9e27e96a 1042 }
924ba076 1043 /* FALLTHROUGH */
a6ec74c1 1044 case 'x':
08ca2aa3
TH
1045 if (utf8) {
1046 while (len>0) {
1047 if (s >= strend)
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1049 s += UTF8SKIP(s);
1050 len--;
1051 }
1052 } else {
fc241834
RGS
1053 if (len > strend - s)
1054 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 s += len;
f337b084 1056 }
a6ec74c1
JH
1057 break;
1058 case '/':
49704364
WL
1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1060 break;
a6ec74c1
JH
1061 case 'A':
1062 case 'Z':
1063 case 'a':
08ca2aa3
TH
1064 if (checksum) {
1065 /* Preliminary length estimate is assumed done in 'W' */
1066 if (len > strend - s) len = strend - s;
1067 goto W_checksum;
1068 }
1069 if (utf8) {
1070 I32 l;
f7fe979e 1071 const char *hop;
08ca2aa3
TH
1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1073 if (hop >= strend) {
1074 if (hop > strend)
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 break;
fc241834 1077 }
a6ec74c1 1078 }
08ca2aa3
TH
1079 if (hop > strend)
1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1081 len = hop - s;
1082 } else if (len > strend - s)
1083 len = strend - s;
1084
1085 if (datumtype == 'Z') {
1086 /* 'Z' strips stuff after first null */
f7fe979e 1087 const char *ptr, *end;
f337b084
TH
1088 end = s + len;
1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1090 sv = newSVpvn(s, ptr-s);
1091 if (howlen == e_star) /* exact for 'Z*' */
1092 len = ptr-s + (ptr != strend ? 1 : 0);
1093 } else if (datumtype == 'A') {
1094 /* 'A' strips both nulls and spaces */
f7fe979e 1095 const char *ptr;
18bdf90a
TH
1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1097 for (ptr = s+len-1; ptr >= s; ptr--)
1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1099 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1101 else ptr++;
28be1210 1102 if (ptr > s+len)
18bdf90a
TH
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 } else {
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1107 ptr++;
1108 }
08ca2aa3
TH
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1111
1112 if (utf8) {
1113 SvUTF8_on(sv);
1114 /* Undo any upgrade done due to need_utf8() */
f337b084 1115 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1116 sv_utf8_downgrade(sv, 0);
a6ec74c1 1117 }
6e449a3a 1118 mXPUSHs(sv);
08ca2aa3 1119 s += len;
a6ec74c1
JH
1120 break;
1121 case 'B':
08ca2aa3
TH
1122 case 'b': {
1123 char *str;
49704364 1124 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1125 len = (strend - s) * 8;
1126 if (checksum) {
f337b084 1127 if (utf8)
08ca2aa3 1128 while (len >= 8 && s < strend) {
f337b084 1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1130 len -= 8;
1131 }
f337b084 1132 else
fc241834 1133 while (len >= 8) {
08ca2aa3 1134 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1135 len -= 8;
1136 }
08ca2aa3
TH
1137 if (len && s < strend) {
1138 U8 bits;
f337b084
TH
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
a6ec74c1 1141 while (len-- > 0) {
92d41999 1142 if (bits & 1) cuv++;
a6ec74c1
JH
1143 bits >>= 1;
1144 }
f337b084 1145 else
a6ec74c1 1146 while (len-- > 0) {
08ca2aa3 1147 if (bits & 0x80) cuv++;
a6ec74c1
JH
1148 bits <<= 1;
1149 }
fc241834 1150 }
a6ec74c1
JH
1151 break;
1152 }
08ca2aa3 1153
561b68a9 1154 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1155 SvPOK_on(sv);
1156 str = SvPVX(sv);
1157 if (datumtype == 'b') {
f337b084 1158 U8 bits = 0;
f7fe979e 1159 const I32 ai32 = len;
08ca2aa3
TH
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1162 else if (utf8) {
1163 if (s >= strend) break;
f337b084 1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1167 }
08ca2aa3 1168 } else {
f337b084 1169 U8 bits = 0;
f7fe979e 1170 const I32 ai32 = len;
08ca2aa3
TH
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1173 else if (utf8) {
1174 if (s >= strend) break;
f337b084 1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1178 }
1179 }
1180 *str = '\0';
aa07b2f6 1181 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1182 XPUSHs(sv);
a6ec74c1 1183 break;
08ca2aa3 1184 }
a6ec74c1 1185 case 'H':
08ca2aa3 1186 case 'h': {
3297d27d 1187 char *str = NULL;
fc241834 1188 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1189 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1190 len = (strend - s) * 2;
858fe5e1
TC
1191 if (!checksum) {
1192 sv = sv_2mortal(newSV(len ? len : 1));
1193 SvPOK_on(sv);
1194 str = SvPVX(sv);
1195 }
a6ec74c1 1196 if (datumtype == 'h') {
f337b084 1197 U8 bits = 0;
9e27e96a 1198 I32 ai32 = len;
fc241834
RGS
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1201 else if (utf8) {
1202 if (s >= strend) break;
f337b084 1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1204 } else bits = * (U8 *) s++;
858fe5e1
TC
1205 if (!checksum)
1206 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1207 }
08ca2aa3 1208 } else {
f337b084 1209 U8 bits = 0;
f7fe979e 1210 const I32 ai32 = len;
08ca2aa3
TH
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1213 else if (utf8) {
1214 if (s >= strend) break;
f337b084 1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1216 } else bits = *(U8 *) s++;
858fe5e1
TC
1217 if (!checksum)
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1219 }
1220 }
858fe5e1
TC
1221 if (!checksum) {
1222 *str = '\0';
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1224 XPUSHs(sv);
1225 }
a6ec74c1 1226 break;
08ca2aa3 1227 }
1651fc44
ML
1228 case 'C':
1229 if (len == 0) {
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1233 break;
1234 }
924ba076 1235 /* FALLTHROUGH */
a6ec74c1 1236 case 'c':
1651fc44
ML
1237 while (len-- > 0 && s < strend) {
1238 int aint;
1239 if (utf8)
1240 {
1241 STRLEN retlen;
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1 || retlen == 0)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1246 s += retlen;
1247 }
1248 else
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1251 aint -= 256;
08ca2aa3 1252 if (!checksum)
6e449a3a 1253 mPUSHi(aint);
73cb7263
NC
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1256 else
1257 cuv += aint;
a6ec74c1
JH
1258 }
1259 break;
08ca2aa3
TH
1260 case 'W':
1261 W_checksum:
1651fc44 1262 if (utf8) {
08ca2aa3 1263 while (len-- > 0 && s < strend) {
08ca2aa3 1264 STRLEN retlen;
f7fe979e 1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1267 if (retlen == (STRLEN) -1 || retlen == 0)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1269 s += retlen;
1270 if (!checksum)
6e449a3a 1271 mPUSHu(val);
08ca2aa3
TH
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
d6d3e8bd 1274 else
08ca2aa3 1275 cuv += val;
fc241834 1276 }
08ca2aa3 1277 } else if (!checksum)
a6ec74c1 1278 while (len-- > 0) {
f7fe979e 1279 const U8 ch = *(U8 *) s++;
6e449a3a 1280 mPUSHu(ch);
a6ec74c1 1281 }
08ca2aa3
TH
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 else
1285 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1286 break;
1287 case 'U':
35bcd338 1288 if (len == 0) {
c5333953 1289 if (explicit_length && howlen != e_star) {
08ca2aa3 1290 /* Switch to "bytes in UTF-8" mode */
f337b084 1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1292 else
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1295 }
35bcd338
JH
1296 break;
1297 }
08ca2aa3 1298 if (len > strend - s) len = strend - s;
fc241834 1299 if (!checksum) {
08ca2aa3
TH
1300 if (len && unpack_only_one) len = 1;
1301 EXTEND(SP, len);
1302 EXTEND_MORTAL(len);
fc241834 1303 }
08ca2aa3
TH
1304 while (len-- > 0 && s < strend) {
1305 STRLEN retlen;
1306 UV auv;
1307 if (utf8) {
1308 U8 result[UTF8_MAXLEN];
f7fe979e 1309 const char *ptr = s;
08ca2aa3 1310 STRLEN len;
08ca2aa3
TH
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
230e1fce
NC
1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1314 'U'))
08ca2aa3
TH
1315 break;
1316 len = UTF8SKIP(result);
fc241834 1317 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1318 (char *) &result[1], len-1, 'U')) break;
c80e42f3 1319 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1320 s = ptr;
1321 } else {
c80e42f3 1322 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1325 s += retlen;
1326 }
1327 if (!checksum)
6e449a3a 1328 mPUSHu(auv);
73cb7263 1329 else if (checksum > bits_in_uv)
08ca2aa3 1330 cdouble += (NV) auv;
73cb7263 1331 else
08ca2aa3 1332 cuv += auv;
a6ec74c1
JH
1333 }
1334 break;
49704364
WL
1335 case 's' | TYPE_IS_SHRIEKING:
1336#if SHORTSIZE != SIZE16
73cb7263 1337 while (len-- > 0) {
08ca2aa3 1338 short ashort;
aaec8192 1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1340 if (!checksum)
6e449a3a 1341 mPUSHi(ashort);
73cb7263
NC
1342 else if (checksum > bits_in_uv)
1343 cdouble += (NV)ashort;
1344 else
1345 cuv += ashort;
49704364
WL
1346 }
1347 break;
1348#else
924ba076 1349 /* FALLTHROUGH */
a6ec74c1 1350#endif
49704364 1351 case 's':
73cb7263 1352 while (len-- > 0) {
08ca2aa3
TH
1353 I16 ai16;
1354
1355#if U16SIZE > SIZE16
1356 ai16 = 0;
1357#endif
aaec8192 1358 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1359#if U16SIZE > SIZE16
73cb7263
NC
1360 if (ai16 > 32767)
1361 ai16 -= 65536;
a6ec74c1 1362#endif
08ca2aa3 1363 if (!checksum)
6e449a3a 1364 mPUSHi(ai16);
73cb7263
NC
1365 else if (checksum > bits_in_uv)
1366 cdouble += (NV)ai16;
1367 else
1368 cuv += ai16;
a6ec74c1
JH
1369 }
1370 break;
49704364
WL
1371 case 'S' | TYPE_IS_SHRIEKING:
1372#if SHORTSIZE != SIZE16
73cb7263 1373 while (len-- > 0) {
08ca2aa3 1374 unsigned short aushort;
aaec8192
NC
1375 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1376 needs_swap);
08ca2aa3 1377 if (!checksum)
6e449a3a 1378 mPUSHu(aushort);
73cb7263
NC
1379 else if (checksum > bits_in_uv)
1380 cdouble += (NV)aushort;
1381 else
1382 cuv += aushort;
49704364
WL
1383 }
1384 break;
1385#else
924ba076 1386 /* FALLTHROUGH */
49704364 1387#endif
a6ec74c1
JH
1388 case 'v':
1389 case 'n':
1390 case 'S':
73cb7263 1391 while (len-- > 0) {
08ca2aa3
TH
1392 U16 au16;
1393#if U16SIZE > SIZE16
1394 au16 = 0;
1395#endif
aaec8192 1396 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1397 if (datumtype == 'n')
1398 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1399 if (datumtype == 'v')
1400 au16 = vtohs(au16);
08ca2aa3 1401 if (!checksum)
6e449a3a 1402 mPUSHu(au16);
73cb7263 1403 else if (checksum > bits_in_uv)
f337b084 1404 cdouble += (NV) au16;
73cb7263
NC
1405 else
1406 cuv += au16;
a6ec74c1
JH
1407 }
1408 break;
068bd2e7
MHM
1409 case 'v' | TYPE_IS_SHRIEKING:
1410 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1411 while (len-- > 0) {
08ca2aa3
TH
1412 I16 ai16;
1413# if U16SIZE > SIZE16
1414 ai16 = 0;
1415# endif
aaec8192 1416 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1417 /* There should never be any byte-swapping here. */
1418 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1419 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1420 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1421 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1422 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1423 if (!checksum)
6e449a3a 1424 mPUSHi(ai16);
73cb7263 1425 else if (checksum > bits_in_uv)
08ca2aa3 1426 cdouble += (NV) ai16;
73cb7263
NC
1427 else
1428 cuv += ai16;
068bd2e7
MHM
1429 }
1430 break;
a6ec74c1 1431 case 'i':
49704364 1432 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1433 while (len-- > 0) {
08ca2aa3 1434 int aint;
aaec8192 1435 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1436 if (!checksum)
6e449a3a 1437 mPUSHi(aint);
73cb7263
NC
1438 else if (checksum > bits_in_uv)
1439 cdouble += (NV)aint;
1440 else
1441 cuv += aint;
a6ec74c1
JH
1442 }
1443 break;
1444 case 'I':
49704364 1445 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1446 while (len-- > 0) {
08ca2aa3 1447 unsigned int auint;
aaec8192 1448 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1449 if (!checksum)
6e449a3a 1450 mPUSHu(auint);
73cb7263
NC
1451 else if (checksum > bits_in_uv)
1452 cdouble += (NV)auint;
1453 else
1454 cuv += auint;
a6ec74c1
JH
1455 }
1456 break;
92d41999 1457 case 'j':
73cb7263 1458 while (len-- > 0) {
08ca2aa3 1459 IV aiv;
aaec8192 1460 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1461 if (!checksum)
6e449a3a 1462 mPUSHi(aiv);
73cb7263
NC
1463 else if (checksum > bits_in_uv)
1464 cdouble += (NV)aiv;
1465 else
1466 cuv += aiv;
92d41999
JH
1467 }
1468 break;
1469 case 'J':
73cb7263 1470 while (len-- > 0) {
08ca2aa3 1471 UV auv;
aaec8192 1472 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1473 if (!checksum)
6e449a3a 1474 mPUSHu(auv);
73cb7263
NC
1475 else if (checksum > bits_in_uv)
1476 cdouble += (NV)auv;
1477 else
1478 cuv += auv;
92d41999
JH
1479 }
1480 break;
49704364
WL
1481 case 'l' | TYPE_IS_SHRIEKING:
1482#if LONGSIZE != SIZE32
73cb7263 1483 while (len-- > 0) {
08ca2aa3 1484 long along;
aaec8192 1485 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1486 if (!checksum)
6e449a3a 1487 mPUSHi(along);
73cb7263
NC
1488 else if (checksum > bits_in_uv)
1489 cdouble += (NV)along;
1490 else
1491 cuv += along;
49704364
WL
1492 }
1493 break;
1494#else
924ba076 1495 /* FALLTHROUGH */
a6ec74c1 1496#endif
49704364 1497 case 'l':
73cb7263 1498 while (len-- > 0) {
08ca2aa3
TH
1499 I32 ai32;
1500#if U32SIZE > SIZE32
1501 ai32 = 0;
1502#endif
aaec8192 1503 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1504#if U32SIZE > SIZE32
08ca2aa3 1505 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1506#endif
08ca2aa3 1507 if (!checksum)
6e449a3a 1508 mPUSHi(ai32);
73cb7263
NC
1509 else if (checksum > bits_in_uv)
1510 cdouble += (NV)ai32;
1511 else
1512 cuv += ai32;
a6ec74c1
JH
1513 }
1514 break;
49704364
WL
1515 case 'L' | TYPE_IS_SHRIEKING:
1516#if LONGSIZE != SIZE32
73cb7263 1517 while (len-- > 0) {
08ca2aa3 1518 unsigned long aulong;
aaec8192 1519 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1520 if (!checksum)
6e449a3a 1521 mPUSHu(aulong);
73cb7263
NC
1522 else if (checksum > bits_in_uv)
1523 cdouble += (NV)aulong;
1524 else
1525 cuv += aulong;
49704364
WL
1526 }
1527 break;
1528#else
924ba076 1529 /* FALLTHROUGH */
49704364 1530#endif
a6ec74c1
JH
1531 case 'V':
1532 case 'N':
1533 case 'L':
73cb7263 1534 while (len-- > 0) {
08ca2aa3
TH
1535 U32 au32;
1536#if U32SIZE > SIZE32
1537 au32 = 0;
1538#endif
aaec8192 1539 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1540 if (datumtype == 'N')
1541 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1542 if (datumtype == 'V')
1543 au32 = vtohl(au32);
08ca2aa3 1544 if (!checksum)
6e449a3a 1545 mPUSHu(au32);
fc241834
RGS
1546 else if (checksum > bits_in_uv)
1547 cdouble += (NV)au32;
1548 else
1549 cuv += au32;
a6ec74c1
JH
1550 }
1551 break;
068bd2e7
MHM
1552 case 'V' | TYPE_IS_SHRIEKING:
1553 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1554 while (len-- > 0) {
08ca2aa3 1555 I32 ai32;
f8e5a5db 1556#if U32SIZE > SIZE32
08ca2aa3 1557 ai32 = 0;
f8e5a5db 1558#endif
aaec8192 1559 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1560 /* There should never be any byte swapping here. */
1561 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1562 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1563 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1564 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1566 if (!checksum)
6e449a3a 1567 mPUSHi(ai32);
73cb7263
NC
1568 else if (checksum > bits_in_uv)
1569 cdouble += (NV)ai32;
1570 else
1571 cuv += ai32;
068bd2e7
MHM
1572 }
1573 break;
a6ec74c1 1574 case 'p':
a6ec74c1 1575 while (len-- > 0) {
f7fe979e 1576 const char *aptr;
aaec8192 1577 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1578 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1579 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1580 }
1581 break;
1582 case 'w':
a6ec74c1
JH
1583 {
1584 UV auv = 0;
1585 U32 bytes = 0;
fc241834 1586
08ca2aa3
TH
1587 while (len > 0 && s < strend) {
1588 U8 ch;
f337b084 1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1590 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1592 if (ch < 0x80) {
a6ec74c1 1593 bytes = 0;
6e449a3a 1594 mPUSHu(auv);
a6ec74c1
JH
1595 len--;
1596 auv = 0;
08ca2aa3 1597 continue;
a6ec74c1 1598 }
08ca2aa3 1599 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1600 const char *t;
a6ec74c1 1601
f5992bc4 1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1603 while (s < strend) {
f337b084 1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1605 sv = mul128(sv, (U8)(ch & 0x7f));
1606 if (!(ch & 0x80)) {
a6ec74c1
JH
1607 bytes = 0;
1608 break;
1609 }
1610 }
10516c54 1611 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1612 while (*t == '0')
1613 t++;
1614 sv_chop(sv, t);
6e449a3a 1615 mPUSHs(sv);
a6ec74c1
JH
1616 len--;
1617 auv = 0;
1618 }
1619 }
1620 if ((s >= strend) && bytes)
49704364 1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1622 }
1623 break;
1624 case 'P':
49704364
WL
1625 if (symptr->howlen == e_star)
1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1627 EXTEND(SP, 1);
2d3e0934 1628 if (s + sizeof(char*) <= strend) {
08ca2aa3 1629 char *aptr;
aaec8192 1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1631 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1632 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1633 }
a6ec74c1 1634 break;
c174bf3b 1635#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1636 case 'q':
73cb7263 1637 while (len-- > 0) {
08ca2aa3 1638 Quad_t aquad;
aaec8192 1639 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1640 if (!checksum)
c174bf3b 1641 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1642 else if (checksum > bits_in_uv)
1643 cdouble += (NV)aquad;
1644 else
1645 cuv += aquad;
1646 }
a6ec74c1
JH
1647 break;
1648 case 'Q':
73cb7263 1649 while (len-- > 0) {
08ca2aa3 1650 Uquad_t auquad;
aaec8192 1651 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1652 if (!checksum)
c174bf3b 1653 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1654 else if (checksum > bits_in_uv)
1655 cdouble += (NV)auquad;
1656 else
1657 cuv += auquad;
a6ec74c1
JH
1658 }
1659 break;
1640b983 1660#endif
a6ec74c1
JH
1661 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1662 case 'f':
73cb7263 1663 while (len-- > 0) {
08ca2aa3 1664 float afloat;
aaec8192 1665 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1666 if (!checksum)
6e449a3a 1667 mPUSHn(afloat);
08ca2aa3 1668 else
73cb7263 1669 cdouble += afloat;
fc241834 1670 }
a6ec74c1
JH
1671 break;
1672 case 'd':
73cb7263 1673 while (len-- > 0) {
08ca2aa3 1674 double adouble;
aaec8192 1675 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1676 if (!checksum)
6e449a3a 1677 mPUSHn(adouble);
08ca2aa3 1678 else
73cb7263 1679 cdouble += adouble;
fc241834 1680 }
a6ec74c1 1681 break;
92d41999 1682 case 'F':
73cb7263 1683 while (len-- > 0) {
275663fa 1684 NV_bytes anv;
aaec8192
NC
1685 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1686 datumtype, needs_swap);
08ca2aa3 1687 if (!checksum)
275663fa 1688 mPUSHn(anv.nv);
08ca2aa3 1689 else
275663fa 1690 cdouble += anv.nv;
fc241834 1691 }
92d41999
JH
1692 break;
1693#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1694 case 'D':
73cb7263 1695 while (len-- > 0) {
275663fa 1696 ld_bytes aldouble;
aaec8192
NC
1697 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1698 sizeof(aldouble.bytes), datumtype, needs_swap);
08ca2aa3 1699 if (!checksum)
275663fa 1700 mPUSHn(aldouble.ld);
08ca2aa3 1701 else
275663fa 1702 cdouble += aldouble.ld;
92d41999
JH
1703 }
1704 break;
1705#endif
a6ec74c1 1706 case 'u':
858fe5e1 1707 if (!checksum) {
f7fe979e 1708 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1709 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1710 if (l) SvPOK_on(sv);
1711 }
1712 if (utf8) {
1713 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1714 I32 a, b, c, d;
db187877 1715 char hunk[3];
08ca2aa3 1716
08ca2aa3
TH
1717 while (len > 0) {
1718 next_uni_uu(aTHX_ &s, strend, &a);
1719 next_uni_uu(aTHX_ &s, strend, &b);
1720 next_uni_uu(aTHX_ &s, strend, &c);
1721 next_uni_uu(aTHX_ &s, strend, &d);
1722 hunk[0] = (char)((a << 2) | (b >> 4));
1723 hunk[1] = (char)((b << 4) | (c >> 2));
1724 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1725 if (!checksum)
1726 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1727 len -= 3;
1728 }
1729 if (s < strend) {
f7fe979e
AL
1730 if (*s == '\n') {
1731 s++;
1732 }
08ca2aa3
TH
1733 else {
1734 /* possible checksum byte */
f7fe979e
AL
1735 const char *skip = s+UTF8SKIP(s);
1736 if (skip < strend && *skip == '\n')
1737 s = skip+1;
08ca2aa3
TH
1738 }
1739 }
1740 }
1741 } else {
fc241834
RGS
1742 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1743 I32 a, b, c, d;
db187877 1744 char hunk[3];
a6ec74c1 1745
fc241834
RGS
1746 len = PL_uudmap[*(U8*)s++] & 077;
1747 while (len > 0) {
1748 if (s < strend && ISUUCHAR(*s))
1749 a = PL_uudmap[*(U8*)s++] & 077;
1750 else
1751 a = 0;
1752 if (s < strend && ISUUCHAR(*s))
1753 b = PL_uudmap[*(U8*)s++] & 077;
1754 else
1755 b = 0;
1756 if (s < strend && ISUUCHAR(*s))
1757 c = PL_uudmap[*(U8*)s++] & 077;
1758 else
1759 c = 0;
1760 if (s < strend && ISUUCHAR(*s))
1761 d = PL_uudmap[*(U8*)s++] & 077;
1762 else
1763 d = 0;
1764 hunk[0] = (char)((a << 2) | (b >> 4));
1765 hunk[1] = (char)((b << 4) | (c >> 2));
1766 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1767 if (!checksum)
1768 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1769 len -= 3;
1770 }
1771 if (*s == '\n')
1772 s++;
1773 else /* possible checksum byte */
1774 if (s + 1 < strend && s[1] == '\n')
1775 s += 2;
a6ec74c1 1776 }
08ca2aa3 1777 }
858fe5e1
TC
1778 if (!checksum)
1779 XPUSHs(sv);
a6ec74c1
JH
1780 break;
1781 }
49704364 1782
a6ec74c1 1783 if (checksum) {
1109a392 1784 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1785 (checksum > bits_in_uv &&
08ca2aa3
TH
1786 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1787 NV trouble, anv;
a6ec74c1 1788
08ca2aa3 1789 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1790 while (checksum >= 16) {
1791 checksum -= 16;
08ca2aa3 1792 anv *= 65536.0;
a6ec74c1 1793 }
a6ec74c1 1794 while (cdouble < 0.0)
08ca2aa3
TH
1795 cdouble += anv;
1796 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1797 sv = newSVnv(cdouble);
a6ec74c1
JH
1798 }
1799 else {
fa8ec7c1
NC
1800 if (checksum < bits_in_uv) {
1801 UV mask = ((UV)1 << checksum) - 1;
92d41999 1802 cuv &= mask;
a6ec74c1 1803 }
c4c5f44a 1804 sv = newSVuv(cuv);
a6ec74c1 1805 }
6e449a3a 1806 mXPUSHs(sv);
a6ec74c1
JH
1807 checksum = 0;
1808 }
fc241834 1809
49704364
WL
1810 if (symptr->flags & FLAG_SLASH){
1811 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1812 break;
49704364
WL
1813 if( next_symbol(symptr) ){
1814 if( symptr->howlen == e_number )
1815 Perl_croak(aTHX_ "Count after length/code in unpack" );
1816 if( beyond ){
1817 /* ...end of char buffer then no decent length available */
1818 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1819 } else {
1820 /* take top of stack (hope it's numeric) */
1821 len = POPi;
1822 if( len < 0 )
1823 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1824 }
1825 } else {
1826 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1827 }
1828 datumtype = symptr->code;
21c16052 1829 explicit_length = FALSE;
49704364
WL
1830 goto redo_switch;
1831 }
a6ec74c1 1832 }
49704364 1833
18529408
IZ
1834 if (new_s)
1835 *new_s = s;
1836 PUTBACK;
1837 return SP - PL_stack_base - start_sp_offset;
1838}
1839
1840PP(pp_unpack)
1841{
97aff369 1842 dVAR;
18529408 1843 dSP;
bab9c0ac 1844 dPOPPOPssrl;
18529408
IZ
1845 I32 gimme = GIMME_V;
1846 STRLEN llen;
1847 STRLEN rlen;
5c144d81
NC
1848 const char *pat = SvPV_const(left, llen);
1849 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1850 const char *strend = s + rlen;
1851 const char *patend = pat + llen;
08ca2aa3 1852 I32 cnt;
18529408
IZ
1853
1854 PUTBACK;
7accc089 1855 cnt = unpackstring(pat, patend, s, strend,
49704364 1856 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1857 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1858
18529408
IZ
1859 SPAGAIN;
1860 if ( !cnt && gimme == G_SCALAR )
1861 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1862 RETURN;
1863}
1864
f337b084 1865STATIC U8 *
f7fe979e 1866doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1867{
f337b084 1868 *h++ = PL_uuemap[len];
a6ec74c1 1869 while (len > 2) {
f337b084
TH
1870 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1871 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1872 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1873 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1874 s += 3;
1875 len -= 3;
1876 }
1877 if (len > 0) {
f7fe979e 1878 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1879 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1880 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1881 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1882 *h++ = PL_uuemap[0];
a6ec74c1 1883 }
f337b084
TH
1884 *h++ = '\n';
1885 return h;
a6ec74c1
JH
1886}
1887
1888STATIC SV *
f7fe979e 1889S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1890{
8b6e33c7
AL
1891 SV *result = newSVpvn(s, l);
1892 char *const result_c = SvPV_nolen(result); /* convenience */
1893 char *out = result_c;
1894 bool skip = 1;
1895 bool ignore = 0;
a6ec74c1 1896
7918f24d
NC
1897 PERL_ARGS_ASSERT_IS_AN_INT;
1898
a6ec74c1
JH
1899 while (*s) {
1900 switch (*s) {
1901 case ' ':
1902 break;
1903 case '+':
1904 if (!skip) {
1905 SvREFCNT_dec(result);
1906 return (NULL);
1907 }
1908 break;
1909 case '0':
1910 case '1':
1911 case '2':
1912 case '3':
1913 case '4':
1914 case '5':
1915 case '6':
1916 case '7':
1917 case '8':
1918 case '9':
1919 skip = 0;
1920 if (!ignore) {
1921 *(out++) = *s;
1922 }
1923 break;
1924 case '.':
1925 ignore = 1;
1926 break;
1927 default:
1928 SvREFCNT_dec(result);
1929 return (NULL);
1930 }
1931 s++;
1932 }
1933 *(out++) = '\0';
1934 SvCUR_set(result, out - result_c);
1935 return (result);
1936}
1937
1938/* pnum must be '\0' terminated */
1939STATIC int
1940S_div128(pTHX_ SV *pnum, bool *done)
1941{
8b6e33c7
AL
1942 STRLEN len;
1943 char * const s = SvPV(pnum, len);
1944 char *t = s;
1945 int m = 0;
1946
7918f24d
NC
1947 PERL_ARGS_ASSERT_DIV128;
1948
8b6e33c7
AL
1949 *done = 1;
1950 while (*t) {
1951 const int i = m * 10 + (*t - '0');
1952 const int r = (i >> 7); /* r < 10 */
1953 m = i & 0x7F;
1954 if (r) {
1955 *done = 0;
1956 }
1957 *(t++) = '0' + r;
a6ec74c1 1958 }
8b6e33c7
AL
1959 *(t++) = '\0';
1960 SvCUR_set(pnum, (STRLEN) (t - s));
1961 return (m);
a6ec74c1
JH
1962}
1963
18529408 1964/*
7accc089
JH
1965=for apidoc packlist
1966
1967The engine implementing pack() Perl function.
1968
bfce84ec
AL
1969=cut
1970*/
7accc089
JH
1971
1972void
5aaab254 1973Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1974{
97aff369 1975 dVAR;
aadb217d
JH
1976 tempsym_t sym;
1977
7918f24d
NC
1978 PERL_ARGS_ASSERT_PACKLIST;
1979
f7fe979e 1980 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1981
f337b084
TH
1982 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1983 Also make sure any UTF8 flag is loaded */
56eb0262 1984 SvPV_force_nolen(cat);
bfce84ec
AL
1985 if (DO_UTF8(cat))
1986 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1987
49704364
WL
1988 (void)pack_rec( cat, &sym, beglist, endlist );
1989}
1990
f337b084
TH
1991/* like sv_utf8_upgrade, but also repoint the group start markers */
1992STATIC void
1993marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1994 STRLEN len;
1995 tempsym_t *group;
f7fe979e
AL
1996 const char *from_ptr, *from_start, *from_end, **marks, **m;
1997 char *to_start, *to_ptr;
f337b084
TH
1998
1999 if (SvUTF8(sv)) return;
2000
aa07b2f6 2001 from_start = SvPVX_const(sv);
f337b084
TH
2002 from_end = from_start + SvCUR(sv);
2003 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2004 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2005 if (from_ptr == from_end) {
2006 /* Simple case: no character needs to be changed */
2007 SvUTF8_on(sv);
2008 return;
2009 }
2010
3473cf63 2011 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2012 Newx(to_start, len, char);
f337b084
TH
2013 Copy(from_start, to_start, from_ptr-from_start, char);
2014 to_ptr = to_start + (from_ptr-from_start);
2015
a02a5408 2016 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2017 for (group=sym_ptr; group; group = group->previous)
2018 marks[group->level] = from_start + group->strbeg;
2019 marks[sym_ptr->level+1] = from_end+1;
2020 for (m = marks; *m < from_ptr; m++)
2021 *m = to_start + (*m-from_start);
2022
2023 for (;from_ptr < from_end; from_ptr++) {
2024 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2025 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2026 }
2027 *to_ptr = 0;
2028
2029 while (*m == from_ptr) *m++ = to_ptr;
2030 if (m != marks + sym_ptr->level+1) {
2031 Safefree(marks);
2032 Safefree(to_start);
5637ef5b
NC
2033 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2034 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2035 }
2036 for (group=sym_ptr; group; group = group->previous)
2037 group->strbeg = marks[group->level] - to_start;
2038 Safefree(marks);
2039
2040 if (SvOOK(sv)) {
2041 if (SvIVX(sv)) {
b162af07 2042 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2043 from_start -= SvIVX(sv);
2044 SvIV_set(sv, 0);
2045 }
2046 SvFLAGS(sv) &= ~SVf_OOK;
2047 }
2048 if (SvLEN(sv) != 0)
2049 Safefree(from_start);
f880fe2f 2050 SvPV_set(sv, to_start);
b162af07
SP
2051 SvCUR_set(sv, to_ptr - to_start);
2052 SvLEN_set(sv, len);
f337b084
TH
2053 SvUTF8_on(sv);
2054}
2055
2056/* Exponential string grower. Makes string extension effectively O(n)
2057 needed says how many extra bytes we need (not counting the final '\0')
2058 Only grows the string if there is an actual lack of space
2059*/
2060STATIC char *
0bd48802 2061S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2062 const STRLEN cur = SvCUR(sv);
2063 const STRLEN len = SvLEN(sv);
f337b084 2064 STRLEN extend;
7918f24d
NC
2065
2066 PERL_ARGS_ASSERT_SV_EXP_GROW;
2067
f337b084
TH
2068 if (len - cur > needed) return SvPVX(sv);
2069 extend = needed > len ? needed : len;
2070 return SvGROW(sv, len+extend+1);
2071}
49704364
WL
2072
2073STATIC
2074SV **
f337b084 2075S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2076{
97aff369 2077 dVAR;
49704364 2078 tempsym_t lookahead;
f337b084
TH
2079 I32 items = endlist - beglist;
2080 bool found = next_symbol(symptr);
2081 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2082 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2083
7918f24d
NC
2084 PERL_ARGS_ASSERT_PACK_REC;
2085
f337b084
TH
2086 if (symptr->level == 0 && found && symptr->code == 'U') {
2087 marked_upgrade(aTHX_ cat, symptr);
2088 symptr->flags |= FLAG_DO_UTF8;
2089 utf8 = 0;
49704364 2090 }
f337b084 2091 symptr->strbeg = SvCUR(cat);
49704364
WL
2092
2093 while (found) {
f337b084
TH
2094 SV *fromstr;
2095 STRLEN fromlen;
2096 I32 len;
a0714e2c 2097 SV *lengthcode = NULL;
49704364 2098 I32 datumtype = symptr->code;
f337b084
TH
2099 howlen_t howlen = symptr->howlen;
2100 char *start = SvPVX(cat);
2101 char *cur = start + SvCUR(cat);
a1219b5e 2102 bool needs_swap;
49704364 2103
f337b084
TH
2104#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2105
2106 switch (howlen) {
fc241834 2107 case e_star:
f337b084
TH
2108 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2109 0 : items;
2110 break;
2111 default:
2112 /* e_no_len and e_number */
2113 len = symptr->length;
49704364
WL
2114 break;
2115 }
2116
f337b084 2117 if (len) {
a7a3cfaa 2118 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2119
a7a3cfaa
TH
2120 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2121 /* We can process this letter. */
2122 STRLEN size = props & PACK_SIZE_MASK;
2123 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2124 }
f337b084
TH
2125 }
2126
49704364
WL
2127 /* Look ahead for next symbol. Do we have code/code? */
2128 lookahead = *symptr;
2129 found = next_symbol(&lookahead);
246f24af
TH
2130 if (symptr->flags & FLAG_SLASH) {
2131 IV count;
f337b084 2132 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2133 if (strchr("aAZ", lookahead.code)) {
2134 if (lookahead.howlen == e_number) count = lookahead.length;
2135 else {
ce399ba6 2136 if (items > 0) {
48a5da33 2137 count = sv_len_utf8(*beglist);
ce399ba6 2138 }
246f24af
TH
2139 else count = 0;
2140 if (lookahead.code == 'Z') count++;
2141 }
2142 } else {
2143 if (lookahead.howlen == e_number && lookahead.length < items)
2144 count = lookahead.length;
2145 else count = items;
2146 }
2147 lookahead.howlen = e_number;
2148 lookahead.length = count;
2149 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2150 }
49704364 2151
a1219b5e
NC
2152 needs_swap = NEEDS_SWAP(datumtype);
2153
fc241834
RGS
2154 /* Code inside the switch must take care to properly update
2155 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2156 doesn't simply leave using break */
1109a392 2157 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2158 default:
f337b084
TH
2159 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2160 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2161 case '%':
49704364 2162 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2163 {
2164 char *from;
28be1210 2165 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2166 case '.':
2167 if (howlen == e_star) from = start;
2168 else if (len == 0) from = cur;
2169 else {
2170 tempsym_t *group = symptr;
2171
2172 while (--len && group) group = group->previous;
2173 from = group ? start + group->strbeg : start;
2174 }
2175 fromstr = NEXTFROM;
2176 len = SvIV(fromstr);
2177 goto resize;
28be1210 2178 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2179 case '@':
28be1210
TH
2180 from = start + symptr->strbeg;
2181 resize:
28be1210 2182 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2183 if (len >= 0) {
2184 while (len && from < cur) {
2185 from += UTF8SKIP(from);
2186 len--;
2187 }
2188 if (from > cur)
2189 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2190 if (len) {
2191 /* Here we know from == cur */
2192 grow:
2193 GROWING(0, cat, start, cur, len);
2194 Zero(cur, len, char);
2195 cur += len;
2196 } else if (from < cur) {
2197 len = cur - from;
2198 goto shrink;
2199 } else goto no_change;
2200 } else {
2201 cur = from;
2202 len = -len;
2203 goto utf8_shrink;
f337b084 2204 }
28be1210
TH
2205 else {
2206 len -= cur - from;
f337b084 2207 if (len > 0) goto grow;
28be1210 2208 if (len == 0) goto no_change;
fc241834 2209 len = -len;
28be1210 2210 goto shrink;
f337b084 2211 }
a6ec74c1 2212 break;
28be1210 2213 }
fc241834 2214 case '(': {
49704364 2215 tempsym_t savsym = *symptr;
66c611c5
MHM
2216 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2217 symptr->flags |= group_modifiers;
49704364
WL
2218 symptr->patend = savsym.grpend;
2219 symptr->level++;
f337b084 2220 symptr->previous = &lookahead;
18529408 2221 while (len--) {
f337b084
TH
2222 U32 was_utf8;
2223 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2224 else symptr->flags &= ~FLAG_PARSE_UTF8;
2225 was_utf8 = SvUTF8(cat);
49704364 2226 symptr->patptr = savsym.grpbeg;
f337b084
TH
2227 beglist = pack_rec(cat, symptr, beglist, endlist);
2228 if (SvUTF8(cat) != was_utf8)
2229 /* This had better be an upgrade while in utf8==0 mode */
2230 utf8 = 1;
2231
49704364 2232 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2233 break; /* No way to continue */
2234 }
ee790063 2235 items = endlist - beglist;
f337b084
TH
2236 lookahead.flags = symptr->flags & ~group_modifiers;
2237 goto no_change;
18529408 2238 }
62f95557
IZ
2239 case 'X' | TYPE_IS_SHRIEKING:
2240 if (!len) /* Avoid division by 0 */
2241 len = 1;
f337b084
TH
2242 if (utf8) {
2243 char *hop, *last;
2244 I32 l = len;
2245 hop = last = start;
2246 while (hop < cur) {
2247 hop += UTF8SKIP(hop);
2248 if (--l == 0) {
2249 last = hop;
2250 l = len;
2251 }
2252 }
2253 if (last > cur)
2254 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2255 cur = last;
2256 break;
2257 }
2258 len = (cur-start) % len;
924ba076 2259 /* FALLTHROUGH */
a6ec74c1 2260 case 'X':
f337b084
TH
2261 if (utf8) {
2262 if (len < 1) goto no_change;
28be1210 2263 utf8_shrink:
f337b084
TH
2264 while (len > 0) {
2265 if (cur <= start)
28be1210
TH
2266 Perl_croak(aTHX_ "'%c' outside of string in pack",
2267 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2268 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2269 if (cur <= start)
28be1210
TH
2270 Perl_croak(aTHX_ "'%c' outside of string in pack",
2271 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2272 }
2273 len--;
2274 }
2275 } else {
fc241834 2276 shrink:
f337b084 2277 if (cur - start < len)
28be1210
TH
2278 Perl_croak(aTHX_ "'%c' outside of string in pack",
2279 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2280 cur -= len;
2281 }
2282 if (cur < start+symptr->strbeg) {
2283 /* Make sure group starts don't point into the void */
2284 tempsym_t *group;
9e27e96a 2285 const STRLEN length = cur-start;
f337b084
TH
2286 for (group = symptr;
2287 group && length < group->strbeg;
2288 group = group->previous) group->strbeg = length;
2289 lookahead.strbeg = length;
2290 }
a6ec74c1 2291 break;
fc241834
RGS
2292 case 'x' | TYPE_IS_SHRIEKING: {
2293 I32 ai32;
62f95557
IZ
2294 if (!len) /* Avoid division by 0 */
2295 len = 1;
230e1fce 2296 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2297 else ai32 = (cur - start) % len;
2298 if (ai32 == 0) goto no_change;
2299 len -= ai32;
2300 }
924ba076 2301 /* FALLTHROUGH */
a6ec74c1 2302 case 'x':
f337b084 2303 goto grow;
a6ec74c1
JH
2304 case 'A':
2305 case 'Z':
f337b084 2306 case 'a': {
f7fe979e 2307 const char *aptr;
f337b084 2308
a6ec74c1 2309 fromstr = NEXTFROM;
e62f0680 2310 aptr = SvPV_const(fromstr, fromlen);
f337b084 2311 if (DO_UTF8(fromstr)) {
f7fe979e 2312 const char *end, *s;
f337b084
TH
2313
2314 if (!utf8 && !SvUTF8(cat)) {
2315 marked_upgrade(aTHX_ cat, symptr);
2316 lookahead.flags |= FLAG_DO_UTF8;
2317 lookahead.strbeg = symptr->strbeg;
2318 utf8 = 1;
2319 start = SvPVX(cat);
2320 cur = start + SvCUR(cat);
2321 }
fc241834 2322 if (howlen == e_star) {
f337b084
TH
2323 if (utf8) goto string_copy;
2324 len = fromlen+1;
2325 }
2326 s = aptr;
2327 end = aptr + fromlen;
2328 fromlen = datumtype == 'Z' ? len-1 : len;
2329 while ((I32) fromlen > 0 && s < end) {
2330 s += UTF8SKIP(s);
2331 fromlen--;
2332 }
2333 if (s > end)
2334 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2335 if (utf8) {
fc241834 2336 len = fromlen;
f337b084
TH
2337 if (datumtype == 'Z') len++;
2338 fromlen = s-aptr;
2339 len += fromlen;
fc241834 2340
f337b084 2341 goto string_copy;
fc241834 2342 }
f337b084
TH
2343 fromlen = len - fromlen;
2344 if (datumtype == 'Z') fromlen--;
2345 if (howlen == e_star) {
2346 len = fromlen;
2347 if (datumtype == 'Z') len++;
fc241834 2348 }
f337b084 2349 GROWING(0, cat, start, cur, len);
fc241834 2350 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2351 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2352 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2353 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2354 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2355 cur += fromlen;
a6ec74c1 2356 len -= fromlen;
f337b084
TH
2357 } else if (utf8) {
2358 if (howlen == e_star) {
2359 len = fromlen;
2360 if (datumtype == 'Z') len++;
a6ec74c1 2361 }
f337b084
TH
2362 if (len <= (I32) fromlen) {
2363 fromlen = len;
2364 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2365 }
fc241834 2366 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2367 upgrade, so:
2368 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2369 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2370 len -= fromlen;
2371 while (fromlen > 0) {
230e1fce 2372 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2373 aptr++;
2374 fromlen--;
fc241834 2375 }
f337b084
TH
2376 } else {
2377 string_copy:
2378 if (howlen == e_star) {
2379 len = fromlen;
2380 if (datumtype == 'Z') len++;
2381 }
2382 if (len <= (I32) fromlen) {
2383 fromlen = len;
2384 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2385 }
f337b084
TH
2386 GROWING(0, cat, start, cur, len);
2387 Copy(aptr, cur, fromlen, char);
2388 cur += fromlen;
2389 len -= fromlen;
a6ec74c1 2390 }
f337b084
TH
2391 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2392 cur += len;
3c4fb04a 2393 SvTAINT(cat);
a6ec74c1 2394 break;
f337b084 2395 }
a6ec74c1 2396 case 'B':
f337b084 2397 case 'b': {
b83604b4 2398 const char *str, *end;
f337b084
TH
2399 I32 l, field_len;
2400 U8 bits;
2401 bool utf8_source;
2402 U32 utf8_flags;
a6ec74c1 2403
fc241834 2404 fromstr = NEXTFROM;
b83604b4 2405 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2406 end = str + fromlen;
2407 if (DO_UTF8(fromstr)) {
2408 utf8_source = TRUE;
041457d9 2409 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2410 } else {
2411 utf8_source = FALSE;
2412 utf8_flags = 0; /* Unused, but keep compilers happy */
2413 }
2414 if (howlen == e_star) len = fromlen;
2415 field_len = (len+7)/8;
2416 GROWING(utf8, cat, start, cur, field_len);
2417 if (len > (I32)fromlen) len = fromlen;
2418 bits = 0;
2419 l = 0;
2420 if (datumtype == 'B')
2421 while (l++ < len) {
2422 if (utf8_source) {
95b63a38 2423 UV val = 0;
f337b084
TH
2424 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2425 bits |= val & 1;
2426 } else bits |= *str++ & 1;
2427 if (l & 7) bits <<= 1;
fc241834 2428 else {
f337b084
TH
2429 PUSH_BYTE(utf8, cur, bits);
2430 bits = 0;
a6ec74c1
JH
2431 }
2432 }
f337b084
TH
2433 else
2434 /* datumtype == 'b' */
2435 while (l++ < len) {
2436 if (utf8_source) {
95b63a38 2437 UV val = 0;
f337b084
TH
2438 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2439 if (val & 1) bits |= 0x80;
2440 } else if (*str++ & 1)
2441 bits |= 0x80;
2442 if (l & 7) bits >>= 1;
fc241834 2443 else {
f337b084
TH
2444 PUSH_BYTE(utf8, cur, bits);
2445 bits = 0;
a6ec74c1
JH
2446 }
2447 }
f337b084
TH
2448 l--;
2449 if (l & 7) {
fc241834 2450 if (datumtype == 'B')
f337b084 2451 bits <<= 7 - (l & 7);
fc241834 2452 else
f337b084
TH
2453 bits >>= 7 - (l & 7);
2454 PUSH_BYTE(utf8, cur, bits);
2455 l += 7;
a6ec74c1 2456 }
f337b084
TH
2457 /* Determine how many chars are left in the requested field */
2458 l /= 8;
2459 if (howlen == e_star) field_len = 0;
2460 else field_len -= l;
2461 Zero(cur, field_len, char);
2462 cur += field_len;
a6ec74c1 2463 break;
f337b084 2464 }
a6ec74c1 2465 case 'H':
f337b084 2466 case 'h': {
10516c54 2467 const char *str, *end;
f337b084
TH
2468 I32 l, field_len;
2469 U8 bits;
2470 bool utf8_source;
2471 U32 utf8_flags;
a6ec74c1 2472
fc241834 2473 fromstr = NEXTFROM;
10516c54 2474 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2475 end = str + fromlen;
2476 if (DO_UTF8(fromstr)) {
2477 utf8_source = TRUE;
041457d9 2478 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2479 } else {
2480 utf8_source = FALSE;
2481 utf8_flags = 0; /* Unused, but keep compilers happy */
2482 }
2483 if (howlen == e_star) len = fromlen;
2484 field_len = (len+1)/2;
2485 GROWING(utf8, cat, start, cur, field_len);
2486 if (!utf8 && len > (I32)fromlen) len = fromlen;
2487 bits = 0;
2488 l = 0;
2489 if (datumtype == 'H')
2490 while (l++ < len) {
2491 if (utf8_source) {
95b63a38 2492 UV val = 0;
f337b084
TH
2493 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2494 if (val < 256 && isALPHA(val))
2495 bits |= (val + 9) & 0xf;
a6ec74c1 2496 else
f337b084
TH
2497 bits |= val & 0xf;
2498 } else if (isALPHA(*str))
2499 bits |= (*str++ + 9) & 0xf;
2500 else
2501 bits |= *str++ & 0xf;
2502 if (l & 1) bits <<= 4;
fc241834 2503 else {
f337b084
TH
2504 PUSH_BYTE(utf8, cur, bits);
2505 bits = 0;
a6ec74c1
JH
2506 }
2507 }
f337b084
TH
2508 else
2509 while (l++ < len) {
2510 if (utf8_source) {
95b63a38 2511 UV val = 0;
f337b084
TH
2512 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2513 if (val < 256 && isALPHA(val))
2514 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2515 else
f337b084
TH
2516 bits |= (val & 0xf) << 4;
2517 } else if (isALPHA(*str))
2518 bits |= ((*str++ + 9) & 0xf) << 4;
2519 else
2520 bits |= (*str++ & 0xf) << 4;
2521 if (l & 1) bits >>= 4;
fc241834 2522 else {
f337b084
TH
2523 PUSH_BYTE(utf8, cur, bits);
2524 bits = 0;
a6ec74c1 2525 }
fc241834 2526 }
f337b084
TH
2527 l--;
2528 if (l & 1) {
2529 PUSH_BYTE(utf8, cur, bits);
2530 l++;
2531 }
2532 /* Determine how many chars are left in the requested field */
2533 l /= 2;
2534 if (howlen == e_star) field_len = 0;
2535 else field_len -= l;
2536 Zero(cur, field_len, char);
2537 cur += field_len;
2538 break;
fc241834
RGS
2539 }
2540 case 'c':
f337b084
TH
2541 while (len-- > 0) {
2542 IV aiv;
2543 fromstr = NEXTFROM;
2544 aiv = SvIV(fromstr);
a2a5de95
NC
2545 if ((-128 > aiv || aiv > 127))
2546 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2547 "Character in 'c' format wrapped in pack");
585ec06d 2548 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2549 }
2550 break;
2551 case 'C':
f337b084
TH
2552 if (len == 0) {
2553 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2554 break;
2555 }
a6ec74c1 2556 while (len-- > 0) {
f337b084 2557 IV aiv;
a6ec74c1 2558 fromstr = NEXTFROM;
f337b084 2559 aiv = SvIV(fromstr);
a2a5de95
NC
2560 if ((0 > aiv || aiv > 0xff))
2561 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2562 "Character in 'C' format wrapped in pack");
1651fc44 2563 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2564 }
fc241834
RGS
2565 break;
2566 case 'W': {
2567 char *end;
670f1322 2568 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2569
2570 end = start+SvLEN(cat)-1;
2571 if (utf8) end -= UTF8_MAXLEN-1;
2572 while (len-- > 0) {
2573 UV auv;
2574 fromstr = NEXTFROM;
2575 auv = SvUV(fromstr);
2576 if (in_bytes) auv = auv % 0x100;
2577 if (utf8) {
2578 W_utf8:
2579 if (cur > end) {
2580 *cur = '\0';
b162af07 2581 SvCUR_set(cat, cur - start);
fc241834
RGS
2582
2583 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2584 end = start+SvLEN(cat)-UTF8_MAXLEN;
2585 }
c80e42f3
KW
2586 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2587 auv,
041457d9 2588 warn_utf8 ?
230e1fce 2589 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2590 } else {
2591 if (auv >= 0x100) {
2592 if (!SvUTF8(cat)) {
2593 *cur = '\0';
b162af07 2594 SvCUR_set(cat, cur - start);
fc241834
RGS
2595 marked_upgrade(aTHX_ cat, symptr);
2596 lookahead.flags |= FLAG_DO_UTF8;
2597 lookahead.strbeg = symptr->strbeg;
2598 utf8 = 1;
2599 start = SvPVX(cat);
2600 cur = start + SvCUR(cat);
2601 end = start+SvLEN(cat)-UTF8_MAXLEN;
2602 goto W_utf8;
2603 }
a2a5de95
NC
2604 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2605 "Character in 'W' format wrapped in pack");
fc241834
RGS
2606 auv &= 0xff;
2607 }
2608 if (cur >= end) {
2609 *cur = '\0';
b162af07 2610 SvCUR_set(cat, cur - start);
fc241834
RGS
2611 GROWING(0, cat, start, cur, len+1);
2612 end = start+SvLEN(cat)-1;
2613 }
fe2774ed 2614 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2615 }
2616 }
2617 break;
fc241834
RGS
2618 }
2619 case 'U': {
2620 char *end;
2621
2622 if (len == 0) {
2623 if (!(symptr->flags & FLAG_DO_UTF8)) {
2624 marked_upgrade(aTHX_ cat, symptr);
2625 lookahead.flags |= FLAG_DO_UTF8;
2626 lookahead.strbeg = symptr->strbeg;
2627 }
2628 utf8 = 0;
2629 goto no_change;
2630 }
2631
2632 end = start+SvLEN(cat);
2633 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2634 while (len-- > 0) {
fc241834 2635 UV auv;
a6ec74c1 2636 fromstr = NEXTFROM;
fc241834
RGS
2637 auv = SvUV(fromstr);
2638 if (utf8) {
230e1fce 2639 U8 buffer[UTF8_MAXLEN], *endb;
c80e42f3 2640 endb = uvchr_to_utf8_flags(buffer, auv,
041457d9 2641 warn_utf8 ?
fc241834
RGS
2642 0 : UNICODE_ALLOW_ANY);
2643 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2644 *cur = '\0';
b162af07 2645 SvCUR_set(cat, cur - start);
fc241834
RGS
2646 GROWING(0, cat, start, cur,
2647 len+(endb-buffer)*UTF8_EXPAND);
2648 end = start+SvLEN(cat);
2649 }
3a88beaa 2650 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
fc241834
RGS
2651 } else {
2652 if (cur >= end) {
2653 *cur = '\0';
b162af07 2654 SvCUR_set(cat, cur - start);
fc241834
RGS
2655 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2656 end = start+SvLEN(cat)-UTF8_MAXLEN;
2657 }
c80e42f3 2658 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
041457d9 2659 warn_utf8 ?
230e1fce 2660 0 : UNICODE_ALLOW_ANY);
fc241834 2661 }
a6ec74c1 2662 }
a6ec74c1 2663 break;
fc241834 2664 }
a6ec74c1
JH
2665 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2666 case 'f':
a6ec74c1 2667 while (len-- > 0) {
f337b084
TH
2668 float afloat;
2669 NV anv;
a6ec74c1 2670 fromstr = NEXTFROM;
f337b084 2671 anv = SvNV(fromstr);
85bba25f 2672# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2673 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2674 * on Alpha; fake it if we don't have them.
2675 */
f337b084 2676 if (anv > FLT_MAX)
fc241834 2677 afloat = FLT_MAX;
f337b084 2678 else if (anv < -FLT_MAX)
fc241834 2679 afloat = -FLT_MAX;
f337b084 2680 else afloat = (float)anv;
baf3cf9c 2681# else
f337b084 2682 afloat = (float)anv;
baf3cf9c 2683# endif
3a88beaa 2684 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2685 }
2686 break;
2687 case 'd':
a6ec74c1 2688 while (len-- > 0) {
f337b084
TH
2689 double adouble;
2690 NV anv;
a6ec74c1 2691 fromstr = NEXTFROM;
f337b084 2692 anv = SvNV(fromstr);
85bba25f 2693# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2694 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2695 * on Alpha; fake it if we don't have them.
2696 */
f337b084 2697 if (anv > DBL_MAX)
fc241834 2698 adouble = DBL_MAX;
f337b084 2699 else if (anv < -DBL_MAX)
fc241834 2700 adouble = -DBL_MAX;
f337b084 2701 else adouble = (double)anv;
baf3cf9c 2702# else
f337b084 2703 adouble = (double)anv;
baf3cf9c 2704# endif
3a88beaa 2705 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2706 }
2707 break;
fc241834 2708 case 'F': {
275663fa 2709 NV_bytes anv;
1109a392 2710 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2711 while (len-- > 0) {
2712 fromstr = NEXTFROM;
cd07c537
DM
2713#ifdef __GNUC__
2714 /* to work round a gcc/x86 bug; don't use SvNV */
2715 anv.nv = sv_2nv(fromstr);
2716#else
275663fa 2717 anv.nv = SvNV(fromstr);
cd07c537 2718#endif
3a88beaa 2719 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2720 }
2721 break;
fc241834 2722 }
92d41999 2723#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2724 case 'D': {
275663fa 2725 ld_bytes aldouble;
1109a392
MHM
2726 /* long doubles can have unused bits, which may be nonzero */
2727 Zero(&aldouble, 1, long double);
92d41999
JH
2728 while (len-- > 0) {
2729 fromstr = NEXTFROM;
cd07c537
DM
2730# ifdef __GNUC__
2731 /* to work round a gcc/x86 bug; don't use SvNV */
2732 aldouble.ld = (long double)sv_2nv(fromstr);
2733# else
275663fa 2734 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2735# endif
3a88beaa
NC
2736 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2737 needs_swap);
92d41999
JH
2738 }
2739 break;
fc241834 2740 }
92d41999 2741#endif
068bd2e7 2742 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2743 case 'n':
2744 while (len-- > 0) {
f337b084 2745 I16 ai16;
a6ec74c1 2746 fromstr = NEXTFROM;
ef108786 2747 ai16 = (I16)SvIV(fromstr);
ef108786 2748 ai16 = PerlSock_htons(ai16);
3a88beaa 2749 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2750 }
2751 break;
068bd2e7 2752 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2753 case 'v':
2754 while (len-- > 0) {
f337b084 2755 I16 ai16;
a6ec74c1 2756 fromstr = NEXTFROM;
ef108786 2757 ai16 = (I16)SvIV(fromstr);
ef108786 2758 ai16 = htovs(ai16);
3a88beaa 2759 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2760 }
2761 break;
49704364 2762 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2763#if SHORTSIZE != SIZE16
fc241834 2764 while (len-- > 0) {
f337b084 2765 unsigned short aushort;
fc241834
RGS
2766 fromstr = NEXTFROM;
2767 aushort = SvUV(fromstr);
3a88beaa 2768 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2769 }
49704364
WL
2770 break;
2771#else
924ba076 2772 /* FALLTHROUGH */
a6ec74c1 2773#endif
49704364 2774 case 'S':
fc241834 2775 while (len-- > 0) {
f337b084 2776 U16 au16;
fc241834
RGS
2777 fromstr = NEXTFROM;
2778 au16 = (U16)SvUV(fromstr);
3a88beaa 2779 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2780 }
2781 break;
49704364 2782 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2783#if SHORTSIZE != SIZE16
fc241834 2784 while (len-- > 0) {
f337b084 2785 short ashort;
fc241834
RGS
2786 fromstr = NEXTFROM;
2787 ashort = SvIV(fromstr);
3a88beaa 2788 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2789 }
49704364
WL
2790 break;
2791#else
924ba076 2792 /* FALLTHROUGH */
a6ec74c1 2793#endif
49704364
WL
2794 case 's':
2795 while (len-- > 0) {
f337b084 2796 I16 ai16;
49704364 2797 fromstr = NEXTFROM;
ef108786 2798 ai16 = (I16)SvIV(fromstr);
3a88beaa 2799 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2800 }
2801 break;
2802 case 'I':
49704364 2803 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2804 while (len-- > 0) {
f337b084 2805 unsigned int auint;
a6ec74c1
JH
2806 fromstr = NEXTFROM;
2807 auint = SvUV(fromstr);
3a88beaa 2808 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2809 }
2810 break;
92d41999
JH
2811 case 'j':
2812 while (len-- > 0) {
f337b084 2813 IV aiv;
92d41999
JH
2814 fromstr = NEXTFROM;
2815 aiv = SvIV(fromstr);
3a88beaa 2816 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2817 }
2818 break;
2819 case 'J':
2820 while (len-- > 0) {
f337b084 2821 UV auv;
92d41999
JH
2822 fromstr = NEXTFROM;
2823 auv = SvUV(fromstr);
3a88beaa 2824 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2825 }
2826 break;
a6ec74c1
JH
2827 case 'w':
2828 while (len-- > 0) {
f337b084 2829 NV anv;
a6ec74c1 2830 fromstr = NEXTFROM;
15e9f109 2831 anv = SvNV(fromstr);
a6ec74c1 2832
f337b084
TH
2833 if (anv < 0) {
2834 *cur = '\0';
b162af07 2835 SvCUR_set(cat, cur - start);
49704364 2836 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2837 }
a6ec74c1 2838
196b62db
NC
2839 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2840 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2841 any negative IVs will have already been got by the croak()
2842 above. IOK is untrue for fractions, so we test them
2843 against UV_MAX_P1. */
f337b084
TH
2844 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2845 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2846 char *in = buf + sizeof(buf);
196b62db 2847 UV auv = SvUV(fromstr);
a6ec74c1
JH
2848
2849 do {
eb160463 2850 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2851 auv >>= 7;
2852 } while (auv);
2853 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2854 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2855 in, (buf + sizeof(buf)) - in);
2856 } else if (SvPOKp(fromstr))
2857 goto w_string;
a6ec74c1 2858 else if (SvNOKp(fromstr)) {
0258719b 2859 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2860 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2861 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2862 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2863 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2864 Some C compilers are strict about integral constant
2865 expressions so we conservatively divide by a slightly
2866 smaller integer instead of multiplying by the exact
2867 floating-point value.
0258719b
NC
2868 */
2869#ifdef NV_MAX_10_EXP
f337b084 2870 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2871 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2872#else
f337b084 2873 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2874 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2875#endif
a6ec74c1
JH
2876 char *in = buf + sizeof(buf);
2877
8b6e33c7 2878 anv = Perl_floor(anv);
a6ec74c1 2879 do {
8b6e33c7 2880 const NV next = Perl_floor(anv / 128);
a6ec74c1 2881 if (in <= buf) /* this cannot happen ;-) */
49704364 2882 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2883 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2884 anv = next;
2885 } while (anv > 0);
a6ec74c1 2886 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2887 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2888 in, (buf + sizeof(buf)) - in);
2889 } else {
8b6e33c7
AL
2890 const char *from;
2891 char *result, *in;
735b914b
JH
2892 SV *norm;
2893 STRLEN len;
2894 bool done;
2895
f337b084 2896 w_string:
735b914b 2897 /* Copy string and check for compliance */
349d4f2f 2898 from = SvPV_const(fromstr, len);
735b914b 2899 if ((norm = is_an_int(from, len)) == NULL)
49704364 2900 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2901
a02a5408 2902 Newx(result, len, char);
735b914b
JH
2903 in = result + len;
2904 done = FALSE;
f337b084 2905 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2906 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2907 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2908 in, (result + len) - in);
735b914b
JH
2909 Safefree(result);
2910 SvREFCNT_dec(norm); /* free norm */
fc241834 2911 }
a6ec74c1
JH
2912 }
2913 break;
2914 case 'i':
49704364 2915 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2916 while (len-- > 0) {
f337b084 2917 int aint;
a6ec74c1
JH
2918 fromstr = NEXTFROM;
2919 aint = SvIV(fromstr);
3a88beaa 2920 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2921 }
2922 break;
068bd2e7 2923 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2924 case 'N':
2925 while (len-- > 0) {
f337b084 2926 U32 au32;
a6ec74c1 2927 fromstr = NEXTFROM;
ef108786 2928 au32 = SvUV(fromstr);
ef108786 2929 au32 = PerlSock_htonl(au32);
3a88beaa 2930 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2931 }
2932 break;
068bd2e7 2933 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2934 case 'V':
2935 while (len-- > 0) {
f337b084 2936 U32 au32;
a6ec74c1 2937 fromstr = NEXTFROM;
ef108786 2938 au32 = SvUV(fromstr);
ef108786 2939 au32 = htovl(au32);
3a88beaa 2940 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2941 }
2942 break;
49704364 2943 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2944#if LONGSIZE != SIZE32
fc241834 2945 while (len-- > 0) {
f337b084 2946 unsigned long aulong;
fc241834
RGS
2947 fromstr = NEXTFROM;
2948 aulong = SvUV(fromstr);
3a88beaa 2949 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2950 }
49704364
WL
2951 break;
2952#else
2953 /* Fall though! */
a6ec74c1 2954#endif
49704364 2955 case 'L':
fc241834 2956 while (len-- > 0) {
f337b084 2957 U32 au32;
fc241834
RGS
2958 fromstr = NEXTFROM;
2959 au32 = SvUV(fromstr);
3a88beaa 2960 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2961 }
2962 break;
49704364 2963 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2964#if LONGSIZE != SIZE32
fc241834 2965 while (len-- > 0) {
f337b084 2966 long along;
fc241834
RGS
2967 fromstr = NEXTFROM;
2968 along = SvIV(fromstr);
3a88beaa 2969 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2970 }
49704364
WL
2971 break;
2972#else
2973 /* Fall though! */
a6ec74c1 2974#endif
49704364
WL
2975 case 'l':
2976 while (len-- > 0) {
f337b084 2977 I32 ai32;
49704364 2978 fromstr = NEXTFROM;
ef108786 2979 ai32 = SvIV(fromstr);
3a88beaa 2980 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
2981 }
2982 break;
c174bf3b 2983#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
2984 case 'Q':
2985 while (len-- > 0) {
f337b084 2986 Uquad_t auquad;
a6ec74c1 2987 fromstr = NEXTFROM;
f337b084 2988 auquad = (Uquad_t) SvUV(fromstr);
3a88beaa 2989 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
2990 }
2991 break;
2992 case 'q':
2993 while (len-- > 0) {
f337b084 2994 Quad_t aquad;
a6ec74c1
JH
2995 fromstr = NEXTFROM;
2996 aquad = (Quad_t)SvIV(fromstr);
3a88beaa 2997 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
2998 }
2999 break;
1640b983 3000#endif
a6ec74c1
JH
3001 case 'P':
3002 len = 1; /* assume SV is correct length */
f337b084 3003 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3004 /* FALLTHROUGH */
a6ec74c1
JH
3005 case 'p':
3006 while (len-- > 0) {
83003860 3007 const char *aptr;
f337b084 3008
a6ec74c1 3009 fromstr = NEXTFROM;
28a4f200
TH
3010 SvGETMAGIC(fromstr);
3011 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3012 else {
a6ec74c1
JH
3013 /* XXX better yet, could spirit away the string to
3014 * a safe spot and hang on to it until the result
3015 * of pack() (and all copies of the result) are
3016 * gone.
3017 */
041457d9 3018 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3019 !SvREADONLY(fromstr)))) {
3020 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3021 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3022 }
3023 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3024 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3025 else
2596d9fe 3026 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3027 }
3a88beaa 3028 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3029 }
3030 break;
fc241834 3031 case 'u': {
f7fe979e 3032 const char *aptr, *aend;
fc241834 3033 bool from_utf8;
f337b084 3034
a6ec74c1 3035 fromstr = NEXTFROM;
fc241834
RGS
3036 if (len <= 2) len = 45;
3037 else len = len / 3 * 3;
3038 if (len >= 64) {
a2a5de95
NC
3039 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3040 "Field too wide in 'u' format in pack");
fc241834
RGS
3041 len = 63;
3042 }
83003860 3043 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3044 from_utf8 = DO_UTF8(fromstr);
3045 if (from_utf8) {
3046 aend = aptr + fromlen;
3f63b0e5 3047 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3048 } else aend = NULL; /* Unused, but keep compilers happy */
3049 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3050 while (fromlen > 0) {
fc241834 3051 U8 *end;
a6ec74c1 3052 I32 todo;
fc241834 3053 U8 hunk[1+63/3*4+1];
a6ec74c1 3054
eb160463 3055 if ((I32)fromlen > len)
a6ec74c1
JH
3056 todo = len;
3057 else
3058 todo = fromlen;
fc241834
RGS
3059 if (from_utf8) {
3060 char buffer[64];
3061 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3062 'u' | TYPE_IS_PACK)) {
3063 *cur = '\0';
b162af07 3064 SvCUR_set(cat, cur - start);
5637ef5b
NC
3065 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3066 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3067 aptr, aend, buffer, (long) todo);
fc241834
RGS
3068 }
3069 end = doencodes(hunk, buffer, todo);
3070 } else {
3071 end = doencodes(hunk, aptr, todo);
3072 aptr += todo;
3073 }
3a88beaa 3074 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3075 fromlen -= todo;
3076 }
a6ec74c1
JH
3077 break;
3078 }
f337b084
TH
3079 }
3080 *cur = '\0';
b162af07 3081 SvCUR_set(cat, cur - start);
f337b084 3082 no_change:
49704364 3083 *symptr = lookahead;
a6ec74c1 3084 }
49704364 3085 return beglist;
18529408
IZ
3086}
3087#undef NEXTFROM
3088
3089
3090PP(pp_pack)
3091{
97aff369 3092 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3093 SV *cat = TARG;
18529408 3094 STRLEN fromlen;
349d4f2f 3095 SV *pat_sv = *++MARK;
eb578fdb
KW
3096 const char *pat = SvPV_const(pat_sv, fromlen);
3097 const char *patend = pat + fromlen;
18529408
IZ
3098
3099 MARK++;
76f68e9b 3100 sv_setpvs(cat, "");
f337b084 3101 SvUTF8_off(cat);
18529408 3102
7accc089 3103 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3104
a6ec74c1
JH
3105 SvSETMAGIC(cat);
3106 SP = ORIGMARK;
3107 PUSHs(cat);
3108 RETURN;
3109}
a6ec74c1 3110
73cb7263
NC
3111/*
3112 * Local variables:
3113 * c-indentation-style: bsd
3114 * c-basic-offset: 4
14d04a33 3115 * indent-tabs-mode: nil
73cb7263
NC
3116 * End:
3117 *
14d04a33 3118 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3119 */