This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Sort MANIFEST
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
f337b084 154 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e
NC
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
78d46eaa 244#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
246#define PACK_SIZE_MASK 0x3F
247
298bc19c 248#include "packsizetables.c"
78d46eaa 249
20aa3a38
NC
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
08ca2aa3 258STATIC U8
f7fe979e 259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 260{
08ca2aa3 261 STRLEN retlen;
0bcc34c2 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 264 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 270 if (val >= 0x100) {
a2a5de95
NC
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
274 val &= 0xff;
275 }
276 *s += retlen;
fe2774ed 277 return (U8)val;
08ca2aa3
TH
278}
279
f337b084
TH
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
282 *(U8 *)(s)++)
283
284STATIC bool
f7fe979e 285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
286{
287 UV val;
288 STRLEN retlen;
f7fe979e 289 const char *from = *s;
08ca2aa3 290 int bad = 0;
f7fe979e 291 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
228e69a7 295 if (UNLIKELY(needs_swap))
20aa3a38
NC
296 buf += buf_len;
297
08ca2aa3
TH
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
f337b084 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
228e69a7 309 if (UNLIKELY(needs_swap))
20aa3a38
NC
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
f7fe979e 318 const char *ptr;
9e27e96a 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
c80e42f3 322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
323 }
324 if (from > end) from = end;
325 }
a2a5de95
NC
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 328 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
f7fe979e 338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 339{
97aff369 340 dVAR;
08ca2aa3 341 STRLEN retlen;
0bcc34c2 342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
345 *out = 0;
346 return FALSE;
347 }
348 *out = PL_uudmap[val] & 077;
f337b084 349 *s += retlen;
08ca2aa3
TH
350 return TRUE;
351}
78d46eaa 352
64844641 353STATIC char *
3a88beaa 354S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
7918f24d
NC
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
356
228e69a7 357 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
358 const U8 *p = start + len;
359 while (p-- > start) {
55d09dc8 360 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
361 }
362 } else {
363 const U8 * const end = start + len;
364 while (start < end) {
55d09dc8 365 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
366 start++;
367 }
f337b084 368 }
64844641 369 return dest;
f337b084
TH
370}
371
3a88beaa 372#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 373STMT_START { \
228e69a7 374 if (UNLIKELY(utf8)) \
3a88beaa 375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
230e1fce 376 else { \
228e69a7 377 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
378 S_reverse_copy((char *)(buf), cur, len); \
379 else \
380 Copy(buf, cur, len, char); \
230e1fce
NC
381 (cur) += (len); \
382 } \
f337b084
TH
383} STMT_END
384
385#define GROWING(utf8, cat, start, cur, in_len) \
386STMT_START { \
387 STRLEN glen = (in_len); \
3473cf63 388 if (utf8) glen *= UTF8_EXPAND; \
f337b084 389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 390 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
391 (cur) = (start) + SvCUR(cat); \
392 } \
393} STMT_END
394
395#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
396STMT_START { \
f7fe979e 397 const STRLEN glen = (in_len); \
f337b084 398 STRLEN gl = glen; \
3473cf63 399 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
401 *cur = '\0'; \
b162af07 402 SvCUR_set((cat), (cur) - (start)); \
0bd48802 403 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
404 (cur) = (start) + SvCUR(cat); \
405 } \
3a88beaa 406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
407} STMT_END
408
409#define PUSH_BYTE(utf8, s, byte) \
410STMT_START { \
411 if (utf8) { \
f7fe979e 412 const U8 au8 = (byte); \
3a88beaa 413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
f337b084
TH
414 } else *(U8 *)(s)++ = (byte); \
415} STMT_END
416
417/* Only to be used inside a loop (see the break) */
418#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
419STMT_START { \
420 STRLEN retlen; \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
424 *cur = '\0'; \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
426 } \
427 str += retlen; \
428} STMT_END
429
f7fe979e
AL
430static const char *_action( const tempsym_t* symptr )
431{
10edeb5d 432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
433}
434
206947d2 435/* Returns the sizeof() struct described by pat */
028d1f6d 436STATIC I32
f337b084 437S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 438{
f337b084 439 I32 total = 0;
206947d2 440
7918f24d
NC
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
442
49704364 443 while (next_symbol(symptr)) {
f337b084 444 I32 len;
f7fe979e 445 int size;
f337b084
TH
446
447 switch (symptr->howlen) {
fc241834 448 case e_star:
49704364 449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 450 _action( symptr ) );
49704364 451 break;
f337b084
TH
452 default:
453 /* e_no_len and e_number */
454 len = symptr->length;
455 break;
49704364
WL
456 }
457
a7a3cfaa 458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 459 if (!size) {
f7fe979e 460 int star;
80a13697
NC
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
463 default:
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 466 _action( symptr ) );
28be1210
TH
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
80a13697 469 case '@':
28be1210 470 case '.':
80a13697
NC
471 case '/':
472 case 'U': /* XXXX Is it correct? */
473 case 'w':
474 case 'u':
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 476 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 477 _action( symptr ) );
80a13697
NC
478 case '%':
479 size = 0;
480 break;
481 case '(':
fc241834
RGS
482 {
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
491 *symptr = savsym;
492 break;
493 }
80a13697
NC
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
496 */
497 if (!len) /* Avoid division by 0 */
498 len = 1;
499 len = total % len; /* Assumed: the start is aligned. */
500 /* FALL THROUGH */
501 case 'X':
502 size = -1;
503 if (total < len)
f7fe979e 504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
505 break;
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
508 len = 1;
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
511 len = len - star;
512 else
513 len = 0;
514 /* FALL THROUGH */
515 case 'x':
516 case 'A':
517 case 'Z':
518 case 'a':
80a13697
NC
519 size = 1;
520 break;
521 case 'B':
522 case 'b':
523 len = (len + 7)/8;
524 size = 1;
525 break;
526 case 'H':
527 case 'h':
528 len = (len + 1)/2;
529 size = 1;
530 break;
78d46eaa 531
80a13697
NC
532 case 'P':
533 len = 1;
534 size = sizeof(char*);
78d46eaa
NC
535 break;
536 }
206947d2
IZ
537 }
538 total += len * size;
539 }
540 return total;
541}
542
49704364
WL
543
544/* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
546 */
f7fe979e 547STATIC const char *
5aaab254 548S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 549{
7918f24d
NC
550 PERL_ARGS_ASSERT_GROUP_END;
551
49704364 552 while (patptr < patend) {
f7fe979e 553 const char c = *patptr++;
49704364
WL
554
555 if (isSPACE(c))
556 continue;
557 else if (c == ender)
558 return patptr-1;
559 else if (c == '#') {
560 while (patptr < patend && *patptr != '\n')
561 patptr++;
562 continue;
563 } else if (c == '(')
564 patptr = group_end(patptr, patend, ')') + 1;
565 else if (c == '[')
566 patptr = group_end(patptr, patend, ']') + 1;
18529408 567 }
49704364
WL
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
569 ender);
570 return 0;
18529408
IZ
571}
572
49704364
WL
573
574/* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
fc241834 577 */
f7fe979e 578STATIC const char *
5aaab254 579S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
580{
581 I32 len = *patptr++ - '0';
7918f24d
NC
582
583 PERL_ARGS_ASSERT_GET_NUM;
584
49704364
WL
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
589 }
590 *lenptr = len;
591 return patptr;
592}
593
594/* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
596 */
597STATIC bool
f337b084 598S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 599{
f7fe979e 600 const char* patptr = symptr->patptr;
0bcc34c2 601 const char* const patend = symptr->patend;
49704364 602
7918f24d
NC
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
604
49704364
WL
605 symptr->flags &= ~FLAG_SLASH;
606
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
609 patptr++;
610 else if (*patptr == '#') {
611 patptr++;
612 while (patptr < patend && *patptr != '\n')
613 patptr++;
614 if (patptr < patend)
615 patptr++;
616 } else {
fc241834 617 /* We should have found a template code */
49704364 618 I32 code = *patptr++ & 0xFF;
66c611c5 619 U32 inherited_modifiers = 0;
49704364
WL
620
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 625 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
626 }
627 continue;
628 }
fc241834 629
49704364 630 /* for '(', skip to ')' */
fc241834 631 if (code == '(') {
49704364
WL
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 634 _action( symptr ) );
49704364
WL
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 639 _action( symptr ) );
49704364
WL
640 }
641
66c611c5
MHM
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
646 }
647
1109a392
MHM
648 /* look for modifiers */
649 while (patptr < patend) {
b7787f18
AL
650 const char *allowed;
651 I32 modifier;
1109a392
MHM
652 switch (*patptr) {
653 case '!':
654 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 655 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
656 break;
657 case '>':
658 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 659 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
660 break;
661 case '<':
662 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 663 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
664 break;
665 default:
b7787f18
AL
666 allowed = "";
667 modifier = 0;
1109a392
MHM
668 break;
669 }
66c611c5 670
1109a392
MHM
671 if (modifier == 0)
672 break;
66c611c5 673
1109a392
MHM
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 676 allowed, _action( symptr ) );
66c611c5
MHM
677
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 684 *patptr, _action( symptr ) );
66c611c5 685
a2a5de95
NC
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 _action( symptr ) );
1109a392 691 }
66c611c5 692
1109a392
MHM
693 code |= modifier;
694 patptr++;
49704364
WL
695 }
696
66c611c5
MHM
697 /* inherit modifiers */
698 code |= inherited_modifiers;
699
fc241834 700 /* look for count and/or / */
49704364
WL
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
705
706 } else if (*patptr == '*') {
707 patptr++;
708 symptr->howlen = e_star;
709
710 } else if (*patptr == '[') {
f7fe979e 711 const char* lenptr = ++patptr;
49704364
WL
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
717 if( *lenptr != ']' )
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 719 _action( symptr ) );
49704364
WL
720 } else {
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
725 *symptr = savsym;
726 }
727 } else {
728 symptr->howlen = e_no_len;
729 symptr->length = 1;
730 }
731
732 /* try to find / */
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
735 patptr++;
736 else if (*patptr == '#') {
737 patptr++;
738 while (patptr < patend && *patptr != '\n')
739 patptr++;
740 if (patptr < patend)
741 patptr++;
742 } else {
66c611c5 743 if (*patptr == '/') {
49704364
WL
744 symptr->flags |= FLAG_SLASH;
745 patptr++;
66c611c5
MHM
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 749 _action( symptr ) );
49704364
WL
750 }
751 break;
752 }
18529408 753 }
49704364
WL
754 } else {
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
757 symptr->length = 1;
758 }
759
760 symptr->code = code;
fc241834 761 symptr->patptr = patptr;
49704364 762 return TRUE;
18529408 763 }
49704364 764 }
fc241834 765 symptr->patptr = patptr;
49704364 766 return FALSE;
18529408
IZ
767}
768
18529408 769/*
fc241834 770 There is no way to cleanly handle the case where we should process the
08ca2aa3 771 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
776 themselves if they need to do a lot of unpacks like this on it
777*/
fc241834 778STATIC bool
08ca2aa3
TH
779need_utf8(const char *pat, const char *patend)
780{
781 bool first = TRUE;
7918f24d
NC
782
783 PERL_ARGS_ASSERT_NEED_UTF8;
784
08ca2aa3
TH
785 while (pat < patend) {
786 if (pat[0] == '#') {
787 pat++;
f7fe979e 788 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
793 pat++;
794 }
795 return FALSE;
796}
797
798STATIC char
799first_symbol(const char *pat, const char *patend) {
7918f24d
NC
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801
08ca2aa3
TH
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
804 pat++;
f7fe979e 805 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
806 if (!pat) return 0;
807 pat++;
808 }
809 return 0;
810}
811
812/*
7accc089
JH
813=for apidoc unpackstring
814
21ebfc7a
DM
815The engine implementing the unpack() Perl function.
816
817Using the template pat..patend, this function unpacks the string
818s..strend into a number of mortal SVs, which it pushes onto the perl
819argument (@_) stack (so you will need to issue a C<PUTBACK> before and
820C<SPAGAIN> after the call to this function). It returns the number of
821pushed elements.
822
823The strend and patend pointers should point to the byte following the last
824character of each string.
825
826Although this function returns its values on the perl argument stack, it
827doesn't take any parameters from that stack (and thus in particular
828there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
829example).
7accc089
JH
830
831=cut */
832
833I32
f7fe979e 834Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 835{
f7fe979e 836 tempsym_t sym;
08ca2aa3 837
7918f24d
NC
838 PERL_ARGS_ASSERT_UNPACKSTRING;
839
f337b084 840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
230e1fce 845 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
846 SAVEFREEPV(s);
847 strend = s + len;
f337b084 848 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
849 }
850
f337b084
TH
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
08ca2aa3 853
f7fe979e 854 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
855
856 return unpack_rec(&sym, s, s, strend, NULL );
857}
858
4136a0f7 859STATIC I32
f7fe979e 860S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 861{
27da23d5 862 dVAR; dSP;
3297d27d 863 SV *sv = NULL;
f7fe979e 864 const I32 start_sp_offset = SP - PL_stack_base;
49704364 865 howlen_t howlen;
a6ec74c1 866 I32 checksum = 0;
92d41999 867 UV cuv = 0;
a6ec74c1 868 NV cdouble = 0.0;
f337b084 869 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 870 bool beyond = FALSE;
21c16052 871 bool explicit_length;
9e27e96a 872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
874
875 PERL_ARGS_ASSERT_UNPACK_REC;
876
28be1210 877 symptr->strbeg = s - strbeg;
49704364 878
49704364 879 while (next_symbol(symptr)) {
a7a3cfaa 880 packprops_t props;
9e27e96a 881 I32 len;
f337b084 882 I32 datumtype = symptr->code;
a1219b5e 883 bool needs_swap;
206947d2 884 /* do first one only unless in list context
08ca2aa3 885 / is implemented by unpacking the count, then popping it from the
206947d2 886 stack, so must check that we're not in the middle of a / */
49704364 887 if ( unpack_only_one
206947d2 888 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 889 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 890 break;
49704364 891
f337b084 892 switch (howlen = symptr->howlen) {
fc241834
RGS
893 case e_star:
894 len = strend - strbeg; /* long enough */
49704364 895 break;
f337b084
TH
896 default:
897 /* e_no_len and e_number */
898 len = symptr->length;
899 break;
49704364 900 }
18529408 901
21c16052 902 explicit_length = TRUE;
a6ec74c1 903 redo_switch:
49704364 904 beyond = s >= strend;
a7a3cfaa
TH
905
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
907 if (props) {
908 /* props nonzero means we can process this letter. */
9e27e96a
AL
909 const long size = props & PACK_SIZE_MASK;
910 const long howmany = (strend - s) / size;
a7a3cfaa
TH
911 if (len > howmany)
912 len = howmany;
913
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
916 EXTEND(SP, len);
917 EXTEND_MORTAL(len);
78d46eaa
NC
918 }
919 }
a7a3cfaa 920
a1219b5e
NC
921 needs_swap = NEEDS_SWAP(datumtype);
922
1109a392 923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 924 default:
1109a392 925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 926
a6ec74c1 927 case '%':
49704364 928 if (howlen == e_no_len)
18529408 929 len = 16; /* len is not specified */
a6ec74c1 930 checksum = len;
92d41999 931 cuv = 0;
a6ec74c1 932 cdouble = 0;
18529408 933 continue;
a6ec74c1 934 break;
18529408
IZ
935 case '(':
936 {
49704364 937 tempsym_t savsym = *symptr;
9e27e96a 938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 939 symptr->flags |= group_modifiers;
49704364 940 symptr->patend = savsym.grpend;
28be1210 941 symptr->previous = &savsym;
49704364 942 symptr->level++;
18529408 943 PUTBACK;
c6f750d1 944 if (len && unpack_only_one) len = 1;
18529408 945 while (len--) {
49704364 946 symptr->patptr = savsym.grpbeg;
f337b084
TH
947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
948 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
949 unpack_rec(symptr, s, strbeg, strend, &s);
950 if (s == strend && savsym.howlen == e_star)
49704364 951 break; /* No way to continue */
18529408
IZ
952 }
953 SPAGAIN;
28be1210 954 savsym.flags = symptr->flags & ~group_modifiers;
49704364 955 *symptr = savsym;
18529408
IZ
956 break;
957 }
28be1210 958 case '.' | TYPE_IS_SHRIEKING:
28be1210 959 case '.': {
9e27e96a 960 const char *from;
28be1210 961 SV *sv;
9e27e96a 962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
963 if (howlen == e_star) from = strbeg;
964 else if (len <= 0) from = s;
965 else {
966 tempsym_t *group = symptr;
967
968 while (--len && group) group = group->previous;
969 from = group ? strbeg + group->strbeg : strbeg;
970 }
971 sv = from <= s ?
00646304
CB
972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 974 mXPUSHs(sv);
28be1210
TH
975 break;
976 }
28be1210 977 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 978 case '@':
28be1210 979 s = strbeg + symptr->strbeg;
28be1210 980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 981 {
08ca2aa3
TH
982 while (len > 0) {
983 if (s >= strend)
984 Perl_croak(aTHX_ "'@' outside of string in unpack");
985 s += UTF8SKIP(s);
986 len--;
987 }
988 if (s > strend)
989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
990 } else {
28be1210 991 if (strend-s < len)
fc241834 992 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 993 s += len;
08ca2aa3 994 }
a6ec74c1 995 break;
62f95557
IZ
996 case 'X' | TYPE_IS_SHRIEKING:
997 if (!len) /* Avoid division by 0 */
998 len = 1;
08ca2aa3 999 if (utf8) {
f7fe979e 1000 const char *hop, *last;
f337b084
TH
1001 I32 l = len;
1002 hop = last = strbeg;
1003 while (hop < s) {
1004 hop += UTF8SKIP(hop);
1005 if (--l == 0) {
08ca2aa3 1006 last = hop;
f337b084
TH
1007 l = len;
1008 }
fc241834 1009 }
f337b084
TH
1010 if (last > s)
1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1012 s = last;
1013 break;
f337b084
TH
1014 }
1015 len = (s - strbeg) % len;
62f95557 1016 /* FALL THROUGH */
a6ec74c1 1017 case 'X':
08ca2aa3
TH
1018 if (utf8) {
1019 while (len > 0) {
1020 if (s <= strbeg)
1021 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1022 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1023 if (s <= strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1025 }
1026 len--;
1027 }
1028 } else {
fc241834
RGS
1029 if (len > s - strbeg)
1030 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1031 s -= len;
08ca2aa3 1032 }
a6ec74c1 1033 break;
9e27e96a
AL
1034 case 'x' | TYPE_IS_SHRIEKING: {
1035 I32 ai32;
62f95557
IZ
1036 if (!len) /* Avoid division by 0 */
1037 len = 1;
230e1fce
NC
1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1039 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1040 if (ai32 == 0) break;
1041 len -= ai32;
9e27e96a 1042 }
62f95557 1043 /* FALL THROUGH */
a6ec74c1 1044 case 'x':
08ca2aa3
TH
1045 if (utf8) {
1046 while (len>0) {
1047 if (s >= strend)
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1049 s += UTF8SKIP(s);
1050 len--;
1051 }
1052 } else {
fc241834
RGS
1053 if (len > strend - s)
1054 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 s += len;
f337b084 1056 }
a6ec74c1
JH
1057 break;
1058 case '/':
49704364
WL
1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1060 break;
a6ec74c1
JH
1061 case 'A':
1062 case 'Z':
1063 case 'a':
08ca2aa3
TH
1064 if (checksum) {
1065 /* Preliminary length estimate is assumed done in 'W' */
1066 if (len > strend - s) len = strend - s;
1067 goto W_checksum;
1068 }
1069 if (utf8) {
1070 I32 l;
f7fe979e 1071 const char *hop;
08ca2aa3
TH
1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1073 if (hop >= strend) {
1074 if (hop > strend)
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 break;
fc241834 1077 }
a6ec74c1 1078 }
08ca2aa3
TH
1079 if (hop > strend)
1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1081 len = hop - s;
1082 } else if (len > strend - s)
1083 len = strend - s;
1084
1085 if (datumtype == 'Z') {
1086 /* 'Z' strips stuff after first null */
f7fe979e 1087 const char *ptr, *end;
f337b084
TH
1088 end = s + len;
1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1090 sv = newSVpvn(s, ptr-s);
1091 if (howlen == e_star) /* exact for 'Z*' */
1092 len = ptr-s + (ptr != strend ? 1 : 0);
1093 } else if (datumtype == 'A') {
1094 /* 'A' strips both nulls and spaces */
f7fe979e 1095 const char *ptr;
18bdf90a
TH
1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1097 for (ptr = s+len-1; ptr >= s; ptr--)
1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1099 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1101 else ptr++;
28be1210 1102 if (ptr > s+len)
18bdf90a
TH
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 } else {
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1107 ptr++;
1108 }
08ca2aa3
TH
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1111
1112 if (utf8) {
1113 SvUTF8_on(sv);
1114 /* Undo any upgrade done due to need_utf8() */
f337b084 1115 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1116 sv_utf8_downgrade(sv, 0);
a6ec74c1 1117 }
6e449a3a 1118 mXPUSHs(sv);
08ca2aa3 1119 s += len;
a6ec74c1
JH
1120 break;
1121 case 'B':
08ca2aa3
TH
1122 case 'b': {
1123 char *str;
49704364 1124 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1125 len = (strend - s) * 8;
1126 if (checksum) {
f337b084 1127 if (utf8)
08ca2aa3 1128 while (len >= 8 && s < strend) {
f337b084 1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1130 len -= 8;
1131 }
f337b084 1132 else
fc241834 1133 while (len >= 8) {
08ca2aa3 1134 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1135 len -= 8;
1136 }
08ca2aa3
TH
1137 if (len && s < strend) {
1138 U8 bits;
f337b084
TH
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
a6ec74c1 1141 while (len-- > 0) {
92d41999 1142 if (bits & 1) cuv++;
a6ec74c1
JH
1143 bits >>= 1;
1144 }
f337b084 1145 else
a6ec74c1 1146 while (len-- > 0) {
08ca2aa3 1147 if (bits & 0x80) cuv++;
a6ec74c1
JH
1148 bits <<= 1;
1149 }
fc241834 1150 }
a6ec74c1
JH
1151 break;
1152 }
08ca2aa3 1153
561b68a9 1154 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1155 SvPOK_on(sv);
1156 str = SvPVX(sv);
1157 if (datumtype == 'b') {
f337b084 1158 U8 bits = 0;
f7fe979e 1159 const I32 ai32 = len;
08ca2aa3
TH
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1162 else if (utf8) {
1163 if (s >= strend) break;
f337b084 1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1167 }
08ca2aa3 1168 } else {
f337b084 1169 U8 bits = 0;
f7fe979e 1170 const I32 ai32 = len;
08ca2aa3
TH
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1173 else if (utf8) {
1174 if (s >= strend) break;
f337b084 1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1178 }
1179 }
1180 *str = '\0';
aa07b2f6 1181 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1182 XPUSHs(sv);
a6ec74c1 1183 break;
08ca2aa3 1184 }
a6ec74c1 1185 case 'H':
08ca2aa3 1186 case 'h': {
3297d27d 1187 char *str = NULL;
fc241834 1188 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1189 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1190 len = (strend - s) * 2;
858fe5e1
TC
1191 if (!checksum) {
1192 sv = sv_2mortal(newSV(len ? len : 1));
1193 SvPOK_on(sv);
1194 str = SvPVX(sv);
1195 }
a6ec74c1 1196 if (datumtype == 'h') {
f337b084 1197 U8 bits = 0;
9e27e96a 1198 I32 ai32 = len;
fc241834
RGS
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1201 else if (utf8) {
1202 if (s >= strend) break;
f337b084 1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1204 } else bits = * (U8 *) s++;
858fe5e1
TC
1205 if (!checksum)
1206 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1207 }
08ca2aa3 1208 } else {
f337b084 1209 U8 bits = 0;
f7fe979e 1210 const I32 ai32 = len;
08ca2aa3
TH
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1213 else if (utf8) {
1214 if (s >= strend) break;
f337b084 1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1216 } else bits = *(U8 *) s++;
858fe5e1
TC
1217 if (!checksum)
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1219 }
1220 }
858fe5e1
TC
1221 if (!checksum) {
1222 *str = '\0';
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1224 XPUSHs(sv);
1225 }
a6ec74c1 1226 break;
08ca2aa3 1227 }
1651fc44
ML
1228 case 'C':
1229 if (len == 0) {
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1233 break;
1234 }
1235 /* FALL THROUGH */
a6ec74c1 1236 case 'c':
1651fc44
ML
1237 while (len-- > 0 && s < strend) {
1238 int aint;
1239 if (utf8)
1240 {
1241 STRLEN retlen;
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1 || retlen == 0)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1246 s += retlen;
1247 }
1248 else
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1251 aint -= 256;
08ca2aa3 1252 if (!checksum)
6e449a3a 1253 mPUSHi(aint);
73cb7263
NC
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1256 else
1257 cuv += aint;
a6ec74c1
JH
1258 }
1259 break;
08ca2aa3
TH
1260 case 'W':
1261 W_checksum:
1651fc44 1262 if (utf8) {
08ca2aa3 1263 while (len-- > 0 && s < strend) {
08ca2aa3 1264 STRLEN retlen;
f7fe979e 1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1267 if (retlen == (STRLEN) -1 || retlen == 0)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1269 s += retlen;
1270 if (!checksum)
6e449a3a 1271 mPUSHu(val);
08ca2aa3
TH
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
d6d3e8bd 1274 else
08ca2aa3 1275 cuv += val;
fc241834 1276 }
08ca2aa3 1277 } else if (!checksum)
a6ec74c1 1278 while (len-- > 0) {
f7fe979e 1279 const U8 ch = *(U8 *) s++;
6e449a3a 1280 mPUSHu(ch);
a6ec74c1 1281 }
08ca2aa3
TH
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 else
1285 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1286 break;
1287 case 'U':
35bcd338 1288 if (len == 0) {
c5333953 1289 if (explicit_length && howlen != e_star) {
08ca2aa3 1290 /* Switch to "bytes in UTF-8" mode */
f337b084 1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1292 else
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1295 }
35bcd338
JH
1296 break;
1297 }
08ca2aa3 1298 if (len > strend - s) len = strend - s;
fc241834 1299 if (!checksum) {
08ca2aa3
TH
1300 if (len && unpack_only_one) len = 1;
1301 EXTEND(SP, len);
1302 EXTEND_MORTAL(len);
fc241834 1303 }
08ca2aa3
TH
1304 while (len-- > 0 && s < strend) {
1305 STRLEN retlen;
1306 UV auv;
1307 if (utf8) {
1308 U8 result[UTF8_MAXLEN];
f7fe979e 1309 const char *ptr = s;
08ca2aa3 1310 STRLEN len;
08ca2aa3
TH
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
230e1fce
NC
1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1314 'U'))
08ca2aa3
TH
1315 break;
1316 len = UTF8SKIP(result);
fc241834 1317 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1318 (char *) &result[1], len-1, 'U')) break;
c80e42f3 1319 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1320 s = ptr;
1321 } else {
c80e42f3 1322 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1325 s += retlen;
1326 }
1327 if (!checksum)
6e449a3a 1328 mPUSHu(auv);
73cb7263 1329 else if (checksum > bits_in_uv)
08ca2aa3 1330 cdouble += (NV) auv;
73cb7263 1331 else
08ca2aa3 1332 cuv += auv;
a6ec74c1
JH
1333 }
1334 break;
49704364
WL
1335 case 's' | TYPE_IS_SHRIEKING:
1336#if SHORTSIZE != SIZE16
73cb7263 1337 while (len-- > 0) {
08ca2aa3 1338 short ashort;
aaec8192 1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1340 if (!checksum)
6e449a3a 1341 mPUSHi(ashort);
73cb7263
NC
1342 else if (checksum > bits_in_uv)
1343 cdouble += (NV)ashort;
1344 else
1345 cuv += ashort;
49704364
WL
1346 }
1347 break;
1348#else
1349 /* Fallthrough! */
a6ec74c1 1350#endif
49704364 1351 case 's':
73cb7263 1352 while (len-- > 0) {
08ca2aa3
TH
1353 I16 ai16;
1354
1355#if U16SIZE > SIZE16
1356 ai16 = 0;
1357#endif
aaec8192 1358 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1359#if U16SIZE > SIZE16
73cb7263
NC
1360 if (ai16 > 32767)
1361 ai16 -= 65536;
a6ec74c1 1362#endif
08ca2aa3 1363 if (!checksum)
6e449a3a 1364 mPUSHi(ai16);
73cb7263
NC
1365 else if (checksum > bits_in_uv)
1366 cdouble += (NV)ai16;
1367 else
1368 cuv += ai16;
a6ec74c1
JH
1369 }
1370 break;
49704364
WL
1371 case 'S' | TYPE_IS_SHRIEKING:
1372#if SHORTSIZE != SIZE16
73cb7263 1373 while (len-- > 0) {
08ca2aa3 1374 unsigned short aushort;
aaec8192
NC
1375 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1376 needs_swap);
08ca2aa3 1377 if (!checksum)
6e449a3a 1378 mPUSHu(aushort);
73cb7263
NC
1379 else if (checksum > bits_in_uv)
1380 cdouble += (NV)aushort;
1381 else
1382 cuv += aushort;
49704364
WL
1383 }
1384 break;
1385#else
486ec47a 1386 /* Fallthrough! */
49704364 1387#endif
a6ec74c1
JH
1388 case 'v':
1389 case 'n':
1390 case 'S':
73cb7263 1391 while (len-- > 0) {
08ca2aa3
TH
1392 U16 au16;
1393#if U16SIZE > SIZE16
1394 au16 = 0;
1395#endif
aaec8192 1396 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1397 if (datumtype == 'n')
1398 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1399 if (datumtype == 'v')
1400 au16 = vtohs(au16);
08ca2aa3 1401 if (!checksum)
6e449a3a 1402 mPUSHu(au16);
73cb7263 1403 else if (checksum > bits_in_uv)
f337b084 1404 cdouble += (NV) au16;
73cb7263
NC
1405 else
1406 cuv += au16;
a6ec74c1
JH
1407 }
1408 break;
068bd2e7
MHM
1409 case 'v' | TYPE_IS_SHRIEKING:
1410 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1411 while (len-- > 0) {
08ca2aa3
TH
1412 I16 ai16;
1413# if U16SIZE > SIZE16
1414 ai16 = 0;
1415# endif
aaec8192 1416 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1417 /* There should never be any byte-swapping here. */
1418 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1419 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1420 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1421 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1422 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1423 if (!checksum)
6e449a3a 1424 mPUSHi(ai16);
73cb7263 1425 else if (checksum > bits_in_uv)
08ca2aa3 1426 cdouble += (NV) ai16;
73cb7263
NC
1427 else
1428 cuv += ai16;
068bd2e7
MHM
1429 }
1430 break;
a6ec74c1 1431 case 'i':
49704364 1432 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1433 while (len-- > 0) {
08ca2aa3 1434 int aint;
aaec8192 1435 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1436 if (!checksum)
6e449a3a 1437 mPUSHi(aint);
73cb7263
NC
1438 else if (checksum > bits_in_uv)
1439 cdouble += (NV)aint;
1440 else
1441 cuv += aint;
a6ec74c1
JH
1442 }
1443 break;
1444 case 'I':
49704364 1445 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1446 while (len-- > 0) {
08ca2aa3 1447 unsigned int auint;
aaec8192 1448 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1449 if (!checksum)
6e449a3a 1450 mPUSHu(auint);
73cb7263
NC
1451 else if (checksum > bits_in_uv)
1452 cdouble += (NV)auint;
1453 else
1454 cuv += auint;
a6ec74c1
JH
1455 }
1456 break;
92d41999 1457 case 'j':
73cb7263 1458 while (len-- > 0) {
08ca2aa3 1459 IV aiv;
aaec8192 1460 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1461 if (!checksum)
6e449a3a 1462 mPUSHi(aiv);
73cb7263
NC
1463 else if (checksum > bits_in_uv)
1464 cdouble += (NV)aiv;
1465 else
1466 cuv += aiv;
92d41999
JH
1467 }
1468 break;
1469 case 'J':
73cb7263 1470 while (len-- > 0) {
08ca2aa3 1471 UV auv;
aaec8192 1472 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1473 if (!checksum)
6e449a3a 1474 mPUSHu(auv);
73cb7263
NC
1475 else if (checksum > bits_in_uv)
1476 cdouble += (NV)auv;
1477 else
1478 cuv += auv;
92d41999
JH
1479 }
1480 break;
49704364
WL
1481 case 'l' | TYPE_IS_SHRIEKING:
1482#if LONGSIZE != SIZE32
73cb7263 1483 while (len-- > 0) {
08ca2aa3 1484 long along;
aaec8192 1485 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1486 if (!checksum)
6e449a3a 1487 mPUSHi(along);
73cb7263
NC
1488 else if (checksum > bits_in_uv)
1489 cdouble += (NV)along;
1490 else
1491 cuv += along;
49704364
WL
1492 }
1493 break;
1494#else
1495 /* Fallthrough! */
a6ec74c1 1496#endif
49704364 1497 case 'l':
73cb7263 1498 while (len-- > 0) {
08ca2aa3
TH
1499 I32 ai32;
1500#if U32SIZE > SIZE32
1501 ai32 = 0;
1502#endif
aaec8192 1503 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1504#if U32SIZE > SIZE32
08ca2aa3 1505 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1506#endif
08ca2aa3 1507 if (!checksum)
6e449a3a 1508 mPUSHi(ai32);
73cb7263
NC
1509 else if (checksum > bits_in_uv)
1510 cdouble += (NV)ai32;
1511 else
1512 cuv += ai32;
a6ec74c1
JH
1513 }
1514 break;
49704364
WL
1515 case 'L' | TYPE_IS_SHRIEKING:
1516#if LONGSIZE != SIZE32
73cb7263 1517 while (len-- > 0) {
08ca2aa3 1518 unsigned long aulong;
aaec8192 1519 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1520 if (!checksum)
6e449a3a 1521 mPUSHu(aulong);
73cb7263
NC
1522 else if (checksum > bits_in_uv)
1523 cdouble += (NV)aulong;
1524 else
1525 cuv += aulong;
49704364
WL
1526 }
1527 break;
1528#else
1529 /* Fall through! */
1530#endif
a6ec74c1
JH
1531 case 'V':
1532 case 'N':
1533 case 'L':
73cb7263 1534 while (len-- > 0) {
08ca2aa3
TH
1535 U32 au32;
1536#if U32SIZE > SIZE32
1537 au32 = 0;
1538#endif
aaec8192 1539 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1540 if (datumtype == 'N')
1541 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1542 if (datumtype == 'V')
1543 au32 = vtohl(au32);
08ca2aa3 1544 if (!checksum)
6e449a3a 1545 mPUSHu(au32);
fc241834
RGS
1546 else if (checksum > bits_in_uv)
1547 cdouble += (NV)au32;
1548 else
1549 cuv += au32;
a6ec74c1
JH
1550 }
1551 break;
068bd2e7
MHM
1552 case 'V' | TYPE_IS_SHRIEKING:
1553 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1554 while (len-- > 0) {
08ca2aa3 1555 I32 ai32;
f8e5a5db 1556#if U32SIZE > SIZE32
08ca2aa3 1557 ai32 = 0;
f8e5a5db 1558#endif
aaec8192 1559 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1560 /* There should never be any byte swapping here. */
1561 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1562 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1563 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1564 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1566 if (!checksum)
6e449a3a 1567 mPUSHi(ai32);
73cb7263
NC
1568 else if (checksum > bits_in_uv)
1569 cdouble += (NV)ai32;
1570 else
1571 cuv += ai32;
068bd2e7
MHM
1572 }
1573 break;
a6ec74c1 1574 case 'p':
a6ec74c1 1575 while (len-- > 0) {
f7fe979e 1576 const char *aptr;
aaec8192 1577 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1578 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1579 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1580 }
1581 break;
1582 case 'w':
a6ec74c1
JH
1583 {
1584 UV auv = 0;
1585 U32 bytes = 0;
fc241834 1586
08ca2aa3
TH
1587 while (len > 0 && s < strend) {
1588 U8 ch;
f337b084 1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1590 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1592 if (ch < 0x80) {
a6ec74c1 1593 bytes = 0;
6e449a3a 1594 mPUSHu(auv);
a6ec74c1
JH
1595 len--;
1596 auv = 0;
08ca2aa3 1597 continue;
a6ec74c1 1598 }
08ca2aa3 1599 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1600 const char *t;
a6ec74c1 1601
f5992bc4 1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1603 while (s < strend) {
f337b084 1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1605 sv = mul128(sv, (U8)(ch & 0x7f));
1606 if (!(ch & 0x80)) {
a6ec74c1
JH
1607 bytes = 0;
1608 break;
1609 }
1610 }
10516c54 1611 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1612 while (*t == '0')
1613 t++;
1614 sv_chop(sv, t);
6e449a3a 1615 mPUSHs(sv);
a6ec74c1
JH
1616 len--;
1617 auv = 0;
1618 }
1619 }
1620 if ((s >= strend) && bytes)
49704364 1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1622 }
1623 break;
1624 case 'P':
49704364
WL
1625 if (symptr->howlen == e_star)
1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1627 EXTEND(SP, 1);
2d3e0934 1628 if (s + sizeof(char*) <= strend) {
08ca2aa3 1629 char *aptr;
aaec8192 1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1631 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1632 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1633 }
a6ec74c1
JH
1634 break;
1635#ifdef HAS_QUAD
1636 case 'q':
73cb7263 1637 while (len-- > 0) {
08ca2aa3 1638 Quad_t aquad;
aaec8192 1639 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1640 if (!checksum)
6e449a3a
MHM
1641 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1642 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1643 else if (checksum > bits_in_uv)
1644 cdouble += (NV)aquad;
1645 else
1646 cuv += aquad;
1647 }
a6ec74c1
JH
1648 break;
1649 case 'Q':
73cb7263 1650 while (len-- > 0) {
08ca2aa3 1651 Uquad_t auquad;
aaec8192 1652 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1653 if (!checksum)
6e449a3a
MHM
1654 mPUSHs(auquad <= UV_MAX ?
1655 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1656 else if (checksum > bits_in_uv)
1657 cdouble += (NV)auquad;
1658 else
1659 cuv += auquad;
a6ec74c1
JH
1660 }
1661 break;
08ca2aa3 1662#endif /* HAS_QUAD */
a6ec74c1
JH
1663 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1664 case 'f':
73cb7263 1665 while (len-- > 0) {
08ca2aa3 1666 float afloat;
aaec8192 1667 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1668 if (!checksum)
6e449a3a 1669 mPUSHn(afloat);
08ca2aa3 1670 else
73cb7263 1671 cdouble += afloat;
fc241834 1672 }
a6ec74c1
JH
1673 break;
1674 case 'd':
73cb7263 1675 while (len-- > 0) {
08ca2aa3 1676 double adouble;
aaec8192 1677 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1678 if (!checksum)
6e449a3a 1679 mPUSHn(adouble);
08ca2aa3 1680 else
73cb7263 1681 cdouble += adouble;
fc241834 1682 }
a6ec74c1 1683 break;
92d41999 1684 case 'F':
73cb7263 1685 while (len-- > 0) {
275663fa 1686 NV_bytes anv;
aaec8192
NC
1687 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1688 datumtype, needs_swap);
08ca2aa3 1689 if (!checksum)
275663fa 1690 mPUSHn(anv.nv);
08ca2aa3 1691 else
275663fa 1692 cdouble += anv.nv;
fc241834 1693 }
92d41999
JH
1694 break;
1695#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1696 case 'D':
73cb7263 1697 while (len-- > 0) {
275663fa 1698 ld_bytes aldouble;
aaec8192
NC
1699 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1700 sizeof(aldouble.bytes), datumtype, needs_swap);
08ca2aa3 1701 if (!checksum)
275663fa 1702 mPUSHn(aldouble.ld);
08ca2aa3 1703 else
275663fa 1704 cdouble += aldouble.ld;
92d41999
JH
1705 }
1706 break;
1707#endif
a6ec74c1 1708 case 'u':
858fe5e1 1709 if (!checksum) {
f7fe979e 1710 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1711 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1712 if (l) SvPOK_on(sv);
1713 }
1714 if (utf8) {
1715 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1716 I32 a, b, c, d;
db187877 1717 char hunk[3];
08ca2aa3 1718
08ca2aa3
TH
1719 while (len > 0) {
1720 next_uni_uu(aTHX_ &s, strend, &a);
1721 next_uni_uu(aTHX_ &s, strend, &b);
1722 next_uni_uu(aTHX_ &s, strend, &c);
1723 next_uni_uu(aTHX_ &s, strend, &d);
1724 hunk[0] = (char)((a << 2) | (b >> 4));
1725 hunk[1] = (char)((b << 4) | (c >> 2));
1726 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1727 if (!checksum)
1728 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1729 len -= 3;
1730 }
1731 if (s < strend) {
f7fe979e
AL
1732 if (*s == '\n') {
1733 s++;
1734 }
08ca2aa3
TH
1735 else {
1736 /* possible checksum byte */
f7fe979e
AL
1737 const char *skip = s+UTF8SKIP(s);
1738 if (skip < strend && *skip == '\n')
1739 s = skip+1;
08ca2aa3
TH
1740 }
1741 }
1742 }
1743 } else {
fc241834
RGS
1744 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1745 I32 a, b, c, d;
db187877 1746 char hunk[3];
a6ec74c1 1747
fc241834
RGS
1748 len = PL_uudmap[*(U8*)s++] & 077;
1749 while (len > 0) {
1750 if (s < strend && ISUUCHAR(*s))
1751 a = PL_uudmap[*(U8*)s++] & 077;
1752 else
1753 a = 0;
1754 if (s < strend && ISUUCHAR(*s))
1755 b = PL_uudmap[*(U8*)s++] & 077;
1756 else
1757 b = 0;
1758 if (s < strend && ISUUCHAR(*s))
1759 c = PL_uudmap[*(U8*)s++] & 077;
1760 else
1761 c = 0;
1762 if (s < strend && ISUUCHAR(*s))
1763 d = PL_uudmap[*(U8*)s++] & 077;
1764 else
1765 d = 0;
1766 hunk[0] = (char)((a << 2) | (b >> 4));
1767 hunk[1] = (char)((b << 4) | (c >> 2));
1768 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1769 if (!checksum)
1770 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1771 len -= 3;
1772 }
1773 if (*s == '\n')
1774 s++;
1775 else /* possible checksum byte */
1776 if (s + 1 < strend && s[1] == '\n')
1777 s += 2;
a6ec74c1 1778 }
08ca2aa3 1779 }
858fe5e1
TC
1780 if (!checksum)
1781 XPUSHs(sv);
a6ec74c1
JH
1782 break;
1783 }
49704364 1784
a6ec74c1 1785 if (checksum) {
1109a392 1786 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1787 (checksum > bits_in_uv &&
08ca2aa3
TH
1788 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1789 NV trouble, anv;
a6ec74c1 1790
08ca2aa3 1791 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1792 while (checksum >= 16) {
1793 checksum -= 16;
08ca2aa3 1794 anv *= 65536.0;
a6ec74c1 1795 }
a6ec74c1 1796 while (cdouble < 0.0)
08ca2aa3
TH
1797 cdouble += anv;
1798 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1799 sv = newSVnv(cdouble);
a6ec74c1
JH
1800 }
1801 else {
fa8ec7c1
NC
1802 if (checksum < bits_in_uv) {
1803 UV mask = ((UV)1 << checksum) - 1;
92d41999 1804 cuv &= mask;
a6ec74c1 1805 }
c4c5f44a 1806 sv = newSVuv(cuv);
a6ec74c1 1807 }
6e449a3a 1808 mXPUSHs(sv);
a6ec74c1
JH
1809 checksum = 0;
1810 }
fc241834 1811
49704364
WL
1812 if (symptr->flags & FLAG_SLASH){
1813 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1814 break;
49704364
WL
1815 if( next_symbol(symptr) ){
1816 if( symptr->howlen == e_number )
1817 Perl_croak(aTHX_ "Count after length/code in unpack" );
1818 if( beyond ){
1819 /* ...end of char buffer then no decent length available */
1820 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1821 } else {
1822 /* take top of stack (hope it's numeric) */
1823 len = POPi;
1824 if( len < 0 )
1825 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1826 }
1827 } else {
1828 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1829 }
1830 datumtype = symptr->code;
21c16052 1831 explicit_length = FALSE;
49704364
WL
1832 goto redo_switch;
1833 }
a6ec74c1 1834 }
49704364 1835
18529408
IZ
1836 if (new_s)
1837 *new_s = s;
1838 PUTBACK;
1839 return SP - PL_stack_base - start_sp_offset;
1840}
1841
1842PP(pp_unpack)
1843{
97aff369 1844 dVAR;
18529408 1845 dSP;
bab9c0ac 1846 dPOPPOPssrl;
18529408
IZ
1847 I32 gimme = GIMME_V;
1848 STRLEN llen;
1849 STRLEN rlen;
5c144d81
NC
1850 const char *pat = SvPV_const(left, llen);
1851 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1852 const char *strend = s + rlen;
1853 const char *patend = pat + llen;
08ca2aa3 1854 I32 cnt;
18529408
IZ
1855
1856 PUTBACK;
7accc089 1857 cnt = unpackstring(pat, patend, s, strend,
49704364 1858 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1859 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1860
18529408
IZ
1861 SPAGAIN;
1862 if ( !cnt && gimme == G_SCALAR )
1863 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1864 RETURN;
1865}
1866
f337b084 1867STATIC U8 *
f7fe979e 1868doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1869{
f337b084 1870 *h++ = PL_uuemap[len];
a6ec74c1 1871 while (len > 2) {
f337b084
TH
1872 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1873 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1874 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1875 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1876 s += 3;
1877 len -= 3;
1878 }
1879 if (len > 0) {
f7fe979e 1880 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1881 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1882 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1883 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1884 *h++ = PL_uuemap[0];
a6ec74c1 1885 }
f337b084
TH
1886 *h++ = '\n';
1887 return h;
a6ec74c1
JH
1888}
1889
1890STATIC SV *
f7fe979e 1891S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1892{
8b6e33c7
AL
1893 SV *result = newSVpvn(s, l);
1894 char *const result_c = SvPV_nolen(result); /* convenience */
1895 char *out = result_c;
1896 bool skip = 1;
1897 bool ignore = 0;
a6ec74c1 1898
7918f24d
NC
1899 PERL_ARGS_ASSERT_IS_AN_INT;
1900
a6ec74c1
JH
1901 while (*s) {
1902 switch (*s) {
1903 case ' ':
1904 break;
1905 case '+':
1906 if (!skip) {
1907 SvREFCNT_dec(result);
1908 return (NULL);
1909 }
1910 break;
1911 case '0':
1912 case '1':
1913 case '2':
1914 case '3':
1915 case '4':
1916 case '5':
1917 case '6':
1918 case '7':
1919 case '8':
1920 case '9':
1921 skip = 0;
1922 if (!ignore) {
1923 *(out++) = *s;
1924 }
1925 break;
1926 case '.':
1927 ignore = 1;
1928 break;
1929 default:
1930 SvREFCNT_dec(result);
1931 return (NULL);
1932 }
1933 s++;
1934 }
1935 *(out++) = '\0';
1936 SvCUR_set(result, out - result_c);
1937 return (result);
1938}
1939
1940/* pnum must be '\0' terminated */
1941STATIC int
1942S_div128(pTHX_ SV *pnum, bool *done)
1943{
8b6e33c7
AL
1944 STRLEN len;
1945 char * const s = SvPV(pnum, len);
1946 char *t = s;
1947 int m = 0;
1948
7918f24d
NC
1949 PERL_ARGS_ASSERT_DIV128;
1950
8b6e33c7
AL
1951 *done = 1;
1952 while (*t) {
1953 const int i = m * 10 + (*t - '0');
1954 const int r = (i >> 7); /* r < 10 */
1955 m = i & 0x7F;
1956 if (r) {
1957 *done = 0;
1958 }
1959 *(t++) = '0' + r;
a6ec74c1 1960 }
8b6e33c7
AL
1961 *(t++) = '\0';
1962 SvCUR_set(pnum, (STRLEN) (t - s));
1963 return (m);
a6ec74c1
JH
1964}
1965
18529408 1966/*
7accc089
JH
1967=for apidoc packlist
1968
1969The engine implementing pack() Perl function.
1970
bfce84ec
AL
1971=cut
1972*/
7accc089
JH
1973
1974void
5aaab254 1975Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1976{
97aff369 1977 dVAR;
aadb217d
JH
1978 tempsym_t sym;
1979
7918f24d
NC
1980 PERL_ARGS_ASSERT_PACKLIST;
1981
f7fe979e 1982 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1983
f337b084
TH
1984 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1985 Also make sure any UTF8 flag is loaded */
56eb0262 1986 SvPV_force_nolen(cat);
bfce84ec
AL
1987 if (DO_UTF8(cat))
1988 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1989
49704364
WL
1990 (void)pack_rec( cat, &sym, beglist, endlist );
1991}
1992
f337b084
TH
1993/* like sv_utf8_upgrade, but also repoint the group start markers */
1994STATIC void
1995marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1996 STRLEN len;
1997 tempsym_t *group;
f7fe979e
AL
1998 const char *from_ptr, *from_start, *from_end, **marks, **m;
1999 char *to_start, *to_ptr;
f337b084
TH
2000
2001 if (SvUTF8(sv)) return;
2002
aa07b2f6 2003 from_start = SvPVX_const(sv);
f337b084
TH
2004 from_end = from_start + SvCUR(sv);
2005 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2006 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2007 if (from_ptr == from_end) {
2008 /* Simple case: no character needs to be changed */
2009 SvUTF8_on(sv);
2010 return;
2011 }
2012
3473cf63 2013 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2014 Newx(to_start, len, char);
f337b084
TH
2015 Copy(from_start, to_start, from_ptr-from_start, char);
2016 to_ptr = to_start + (from_ptr-from_start);
2017
a02a5408 2018 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2019 for (group=sym_ptr; group; group = group->previous)
2020 marks[group->level] = from_start + group->strbeg;
2021 marks[sym_ptr->level+1] = from_end+1;
2022 for (m = marks; *m < from_ptr; m++)
2023 *m = to_start + (*m-from_start);
2024
2025 for (;from_ptr < from_end; from_ptr++) {
2026 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2027 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2028 }
2029 *to_ptr = 0;
2030
2031 while (*m == from_ptr) *m++ = to_ptr;
2032 if (m != marks + sym_ptr->level+1) {
2033 Safefree(marks);
2034 Safefree(to_start);
5637ef5b
NC
2035 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2036 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2037 }
2038 for (group=sym_ptr; group; group = group->previous)
2039 group->strbeg = marks[group->level] - to_start;
2040 Safefree(marks);
2041
2042 if (SvOOK(sv)) {
2043 if (SvIVX(sv)) {
b162af07 2044 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2045 from_start -= SvIVX(sv);
2046 SvIV_set(sv, 0);
2047 }
2048 SvFLAGS(sv) &= ~SVf_OOK;
2049 }
2050 if (SvLEN(sv) != 0)
2051 Safefree(from_start);
f880fe2f 2052 SvPV_set(sv, to_start);
b162af07
SP
2053 SvCUR_set(sv, to_ptr - to_start);
2054 SvLEN_set(sv, len);
f337b084
TH
2055 SvUTF8_on(sv);
2056}
2057
2058/* Exponential string grower. Makes string extension effectively O(n)
2059 needed says how many extra bytes we need (not counting the final '\0')
2060 Only grows the string if there is an actual lack of space
2061*/
2062STATIC char *
0bd48802 2063S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2064 const STRLEN cur = SvCUR(sv);
2065 const STRLEN len = SvLEN(sv);
f337b084 2066 STRLEN extend;
7918f24d
NC
2067
2068 PERL_ARGS_ASSERT_SV_EXP_GROW;
2069
f337b084
TH
2070 if (len - cur > needed) return SvPVX(sv);
2071 extend = needed > len ? needed : len;
2072 return SvGROW(sv, len+extend+1);
2073}
49704364
WL
2074
2075STATIC
2076SV **
f337b084 2077S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2078{
97aff369 2079 dVAR;
49704364 2080 tempsym_t lookahead;
f337b084
TH
2081 I32 items = endlist - beglist;
2082 bool found = next_symbol(symptr);
2083 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2084 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2085
7918f24d
NC
2086 PERL_ARGS_ASSERT_PACK_REC;
2087
f337b084
TH
2088 if (symptr->level == 0 && found && symptr->code == 'U') {
2089 marked_upgrade(aTHX_ cat, symptr);
2090 symptr->flags |= FLAG_DO_UTF8;
2091 utf8 = 0;
49704364 2092 }
f337b084 2093 symptr->strbeg = SvCUR(cat);
49704364
WL
2094
2095 while (found) {
f337b084
TH
2096 SV *fromstr;
2097 STRLEN fromlen;
2098 I32 len;
a0714e2c 2099 SV *lengthcode = NULL;
49704364 2100 I32 datumtype = symptr->code;
f337b084
TH
2101 howlen_t howlen = symptr->howlen;
2102 char *start = SvPVX(cat);
2103 char *cur = start + SvCUR(cat);
a1219b5e 2104 bool needs_swap;
49704364 2105
f337b084
TH
2106#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2107
2108 switch (howlen) {
fc241834 2109 case e_star:
f337b084
TH
2110 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2111 0 : items;
2112 break;
2113 default:
2114 /* e_no_len and e_number */
2115 len = symptr->length;
49704364
WL
2116 break;
2117 }
2118
f337b084 2119 if (len) {
a7a3cfaa 2120 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2121
a7a3cfaa
TH
2122 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2123 /* We can process this letter. */
2124 STRLEN size = props & PACK_SIZE_MASK;
2125 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2126 }
f337b084
TH
2127 }
2128
49704364
WL
2129 /* Look ahead for next symbol. Do we have code/code? */
2130 lookahead = *symptr;
2131 found = next_symbol(&lookahead);
246f24af
TH
2132 if (symptr->flags & FLAG_SLASH) {
2133 IV count;
f337b084 2134 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2135 if (strchr("aAZ", lookahead.code)) {
2136 if (lookahead.howlen == e_number) count = lookahead.length;
2137 else {
ce399ba6 2138 if (items > 0) {
48a5da33 2139 count = sv_len_utf8(*beglist);
ce399ba6 2140 }
246f24af
TH
2141 else count = 0;
2142 if (lookahead.code == 'Z') count++;
2143 }
2144 } else {
2145 if (lookahead.howlen == e_number && lookahead.length < items)
2146 count = lookahead.length;
2147 else count = items;
2148 }
2149 lookahead.howlen = e_number;
2150 lookahead.length = count;
2151 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2152 }
49704364 2153
a1219b5e
NC
2154 needs_swap = NEEDS_SWAP(datumtype);
2155
fc241834
RGS
2156 /* Code inside the switch must take care to properly update
2157 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2158 doesn't simply leave using break */
1109a392 2159 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2160 default:
f337b084
TH
2161 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2162 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2163 case '%':
49704364 2164 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2165 {
2166 char *from;
28be1210 2167 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2168 case '.':
2169 if (howlen == e_star) from = start;
2170 else if (len == 0) from = cur;
2171 else {
2172 tempsym_t *group = symptr;
2173
2174 while (--len && group) group = group->previous;
2175 from = group ? start + group->strbeg : start;
2176 }
2177 fromstr = NEXTFROM;
2178 len = SvIV(fromstr);
2179 goto resize;
28be1210 2180 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2181 case '@':
28be1210
TH
2182 from = start + symptr->strbeg;
2183 resize:
28be1210 2184 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2185 if (len >= 0) {
2186 while (len && from < cur) {
2187 from += UTF8SKIP(from);
2188 len--;
2189 }
2190 if (from > cur)
2191 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2192 if (len) {
2193 /* Here we know from == cur */
2194 grow:
2195 GROWING(0, cat, start, cur, len);
2196 Zero(cur, len, char);
2197 cur += len;
2198 } else if (from < cur) {
2199 len = cur - from;
2200 goto shrink;
2201 } else goto no_change;
2202 } else {
2203 cur = from;
2204 len = -len;
2205 goto utf8_shrink;
f337b084 2206 }
28be1210
TH
2207 else {
2208 len -= cur - from;
f337b084 2209 if (len > 0) goto grow;
28be1210 2210 if (len == 0) goto no_change;
fc241834 2211 len = -len;
28be1210 2212 goto shrink;
f337b084 2213 }
a6ec74c1 2214 break;
28be1210 2215 }
fc241834 2216 case '(': {
49704364 2217 tempsym_t savsym = *symptr;
66c611c5
MHM
2218 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2219 symptr->flags |= group_modifiers;
49704364
WL
2220 symptr->patend = savsym.grpend;
2221 symptr->level++;
f337b084 2222 symptr->previous = &lookahead;
18529408 2223 while (len--) {
f337b084
TH
2224 U32 was_utf8;
2225 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2226 else symptr->flags &= ~FLAG_PARSE_UTF8;
2227 was_utf8 = SvUTF8(cat);
49704364 2228 symptr->patptr = savsym.grpbeg;
f337b084
TH
2229 beglist = pack_rec(cat, symptr, beglist, endlist);
2230 if (SvUTF8(cat) != was_utf8)
2231 /* This had better be an upgrade while in utf8==0 mode */
2232 utf8 = 1;
2233
49704364 2234 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2235 break; /* No way to continue */
2236 }
ee790063 2237 items = endlist - beglist;
f337b084
TH
2238 lookahead.flags = symptr->flags & ~group_modifiers;
2239 goto no_change;
18529408 2240 }
62f95557
IZ
2241 case 'X' | TYPE_IS_SHRIEKING:
2242 if (!len) /* Avoid division by 0 */
2243 len = 1;
f337b084
TH
2244 if (utf8) {
2245 char *hop, *last;
2246 I32 l = len;
2247 hop = last = start;
2248 while (hop < cur) {
2249 hop += UTF8SKIP(hop);
2250 if (--l == 0) {
2251 last = hop;
2252 l = len;
2253 }
2254 }
2255 if (last > cur)
2256 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2257 cur = last;
2258 break;
2259 }
2260 len = (cur-start) % len;
62f95557 2261 /* FALL THROUGH */
a6ec74c1 2262 case 'X':
f337b084
TH
2263 if (utf8) {
2264 if (len < 1) goto no_change;
28be1210 2265 utf8_shrink:
f337b084
TH
2266 while (len > 0) {
2267 if (cur <= start)
28be1210
TH
2268 Perl_croak(aTHX_ "'%c' outside of string in pack",
2269 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2270 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2271 if (cur <= start)
28be1210
TH
2272 Perl_croak(aTHX_ "'%c' outside of string in pack",
2273 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2274 }
2275 len--;
2276 }
2277 } else {
fc241834 2278 shrink:
f337b084 2279 if (cur - start < len)
28be1210
TH
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2282 cur -= len;
2283 }
2284 if (cur < start+symptr->strbeg) {
2285 /* Make sure group starts don't point into the void */
2286 tempsym_t *group;
9e27e96a 2287 const STRLEN length = cur-start;
f337b084
TH
2288 for (group = symptr;
2289 group && length < group->strbeg;
2290 group = group->previous) group->strbeg = length;
2291 lookahead.strbeg = length;
2292 }
a6ec74c1 2293 break;
fc241834
RGS
2294 case 'x' | TYPE_IS_SHRIEKING: {
2295 I32 ai32;
62f95557
IZ
2296 if (!len) /* Avoid division by 0 */
2297 len = 1;
230e1fce 2298 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2299 else ai32 = (cur - start) % len;
2300 if (ai32 == 0) goto no_change;
2301 len -= ai32;
2302 }
2303 /* FALL THROUGH */
a6ec74c1 2304 case 'x':
f337b084 2305 goto grow;
a6ec74c1
JH
2306 case 'A':
2307 case 'Z':
f337b084 2308 case 'a': {
f7fe979e 2309 const char *aptr;
f337b084 2310
a6ec74c1 2311 fromstr = NEXTFROM;
e62f0680 2312 aptr = SvPV_const(fromstr, fromlen);
f337b084 2313 if (DO_UTF8(fromstr)) {
f7fe979e 2314 const char *end, *s;
f337b084
TH
2315
2316 if (!utf8 && !SvUTF8(cat)) {
2317 marked_upgrade(aTHX_ cat, symptr);
2318 lookahead.flags |= FLAG_DO_UTF8;
2319 lookahead.strbeg = symptr->strbeg;
2320 utf8 = 1;
2321 start = SvPVX(cat);
2322 cur = start + SvCUR(cat);
2323 }
fc241834 2324 if (howlen == e_star) {
f337b084
TH
2325 if (utf8) goto string_copy;
2326 len = fromlen+1;
2327 }
2328 s = aptr;
2329 end = aptr + fromlen;
2330 fromlen = datumtype == 'Z' ? len-1 : len;
2331 while ((I32) fromlen > 0 && s < end) {
2332 s += UTF8SKIP(s);
2333 fromlen--;
2334 }
2335 if (s > end)
2336 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2337 if (utf8) {
fc241834 2338 len = fromlen;
f337b084
TH
2339 if (datumtype == 'Z') len++;
2340 fromlen = s-aptr;
2341 len += fromlen;
fc241834 2342
f337b084 2343 goto string_copy;
fc241834 2344 }
f337b084
TH
2345 fromlen = len - fromlen;
2346 if (datumtype == 'Z') fromlen--;
2347 if (howlen == e_star) {
2348 len = fromlen;
2349 if (datumtype == 'Z') len++;
fc241834 2350 }
f337b084 2351 GROWING(0, cat, start, cur, len);
fc241834 2352 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2353 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2354 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2355 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2356 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2357 cur += fromlen;
a6ec74c1 2358 len -= fromlen;
f337b084
TH
2359 } else if (utf8) {
2360 if (howlen == e_star) {
2361 len = fromlen;
2362 if (datumtype == 'Z') len++;
a6ec74c1 2363 }
f337b084
TH
2364 if (len <= (I32) fromlen) {
2365 fromlen = len;
2366 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2367 }
fc241834 2368 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2369 upgrade, so:
2370 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2371 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2372 len -= fromlen;
2373 while (fromlen > 0) {
230e1fce 2374 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2375 aptr++;
2376 fromlen--;
fc241834 2377 }
f337b084
TH
2378 } else {
2379 string_copy:
2380 if (howlen == e_star) {
2381 len = fromlen;
2382 if (datumtype == 'Z') len++;
2383 }
2384 if (len <= (I32) fromlen) {
2385 fromlen = len;
2386 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2387 }
f337b084
TH
2388 GROWING(0, cat, start, cur, len);
2389 Copy(aptr, cur, fromlen, char);
2390 cur += fromlen;
2391 len -= fromlen;
a6ec74c1 2392 }
f337b084
TH
2393 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2394 cur += len;
3c4fb04a 2395 SvTAINT(cat);
a6ec74c1 2396 break;
f337b084 2397 }
a6ec74c1 2398 case 'B':
f337b084 2399 case 'b': {
b83604b4 2400 const char *str, *end;
f337b084
TH
2401 I32 l, field_len;
2402 U8 bits;
2403 bool utf8_source;
2404 U32 utf8_flags;
a6ec74c1 2405
fc241834 2406 fromstr = NEXTFROM;
b83604b4 2407 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2408 end = str + fromlen;
2409 if (DO_UTF8(fromstr)) {
2410 utf8_source = TRUE;
041457d9 2411 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2412 } else {
2413 utf8_source = FALSE;
2414 utf8_flags = 0; /* Unused, but keep compilers happy */
2415 }
2416 if (howlen == e_star) len = fromlen;
2417 field_len = (len+7)/8;
2418 GROWING(utf8, cat, start, cur, field_len);
2419 if (len > (I32)fromlen) len = fromlen;
2420 bits = 0;
2421 l = 0;
2422 if (datumtype == 'B')
2423 while (l++ < len) {
2424 if (utf8_source) {
95b63a38 2425 UV val = 0;
f337b084
TH
2426 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2427 bits |= val & 1;
2428 } else bits |= *str++ & 1;
2429 if (l & 7) bits <<= 1;
fc241834 2430 else {
f337b084
TH
2431 PUSH_BYTE(utf8, cur, bits);
2432 bits = 0;
a6ec74c1
JH
2433 }
2434 }
f337b084
TH
2435 else
2436 /* datumtype == 'b' */
2437 while (l++ < len) {
2438 if (utf8_source) {
95b63a38 2439 UV val = 0;
f337b084
TH
2440 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2441 if (val & 1) bits |= 0x80;
2442 } else if (*str++ & 1)
2443 bits |= 0x80;
2444 if (l & 7) bits >>= 1;
fc241834 2445 else {
f337b084
TH
2446 PUSH_BYTE(utf8, cur, bits);
2447 bits = 0;
a6ec74c1
JH
2448 }
2449 }
f337b084
TH
2450 l--;
2451 if (l & 7) {
fc241834 2452 if (datumtype == 'B')
f337b084 2453 bits <<= 7 - (l & 7);
fc241834 2454 else
f337b084
TH
2455 bits >>= 7 - (l & 7);
2456 PUSH_BYTE(utf8, cur, bits);
2457 l += 7;
a6ec74c1 2458 }
f337b084
TH
2459 /* Determine how many chars are left in the requested field */
2460 l /= 8;
2461 if (howlen == e_star) field_len = 0;
2462 else field_len -= l;
2463 Zero(cur, field_len, char);
2464 cur += field_len;
a6ec74c1 2465 break;
f337b084 2466 }
a6ec74c1 2467 case 'H':
f337b084 2468 case 'h': {
10516c54 2469 const char *str, *end;
f337b084
TH
2470 I32 l, field_len;
2471 U8 bits;
2472 bool utf8_source;
2473 U32 utf8_flags;
a6ec74c1 2474
fc241834 2475 fromstr = NEXTFROM;
10516c54 2476 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2477 end = str + fromlen;
2478 if (DO_UTF8(fromstr)) {
2479 utf8_source = TRUE;
041457d9 2480 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2481 } else {
2482 utf8_source = FALSE;
2483 utf8_flags = 0; /* Unused, but keep compilers happy */
2484 }
2485 if (howlen == e_star) len = fromlen;
2486 field_len = (len+1)/2;
2487 GROWING(utf8, cat, start, cur, field_len);
2488 if (!utf8 && len > (I32)fromlen) len = fromlen;
2489 bits = 0;
2490 l = 0;
2491 if (datumtype == 'H')
2492 while (l++ < len) {
2493 if (utf8_source) {
95b63a38 2494 UV val = 0;
f337b084
TH
2495 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2496 if (val < 256 && isALPHA(val))
2497 bits |= (val + 9) & 0xf;
a6ec74c1 2498 else
f337b084
TH
2499 bits |= val & 0xf;
2500 } else if (isALPHA(*str))
2501 bits |= (*str++ + 9) & 0xf;
2502 else
2503 bits |= *str++ & 0xf;
2504 if (l & 1) bits <<= 4;
fc241834 2505 else {
f337b084
TH
2506 PUSH_BYTE(utf8, cur, bits);
2507 bits = 0;
a6ec74c1
JH
2508 }
2509 }
f337b084
TH
2510 else
2511 while (l++ < len) {
2512 if (utf8_source) {
95b63a38 2513 UV val = 0;
f337b084
TH
2514 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2515 if (val < 256 && isALPHA(val))
2516 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2517 else
f337b084
TH
2518 bits |= (val & 0xf) << 4;
2519 } else if (isALPHA(*str))
2520 bits |= ((*str++ + 9) & 0xf) << 4;
2521 else
2522 bits |= (*str++ & 0xf) << 4;
2523 if (l & 1) bits >>= 4;
fc241834 2524 else {
f337b084
TH
2525 PUSH_BYTE(utf8, cur, bits);
2526 bits = 0;
a6ec74c1 2527 }
fc241834 2528 }
f337b084
TH
2529 l--;
2530 if (l & 1) {
2531 PUSH_BYTE(utf8, cur, bits);
2532 l++;
2533 }
2534 /* Determine how many chars are left in the requested field */
2535 l /= 2;
2536 if (howlen == e_star) field_len = 0;
2537 else field_len -= l;
2538 Zero(cur, field_len, char);
2539 cur += field_len;
2540 break;
fc241834
RGS
2541 }
2542 case 'c':
f337b084
TH
2543 while (len-- > 0) {
2544 IV aiv;
2545 fromstr = NEXTFROM;
2546 aiv = SvIV(fromstr);
a2a5de95
NC
2547 if ((-128 > aiv || aiv > 127))
2548 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2549 "Character in 'c' format wrapped in pack");
585ec06d 2550 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2551 }
2552 break;
2553 case 'C':
f337b084
TH
2554 if (len == 0) {
2555 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2556 break;
2557 }
a6ec74c1 2558 while (len-- > 0) {
f337b084 2559 IV aiv;
a6ec74c1 2560 fromstr = NEXTFROM;
f337b084 2561 aiv = SvIV(fromstr);
a2a5de95
NC
2562 if ((0 > aiv || aiv > 0xff))
2563 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2564 "Character in 'C' format wrapped in pack");
1651fc44 2565 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2566 }
fc241834
RGS
2567 break;
2568 case 'W': {
2569 char *end;
670f1322 2570 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2571
2572 end = start+SvLEN(cat)-1;
2573 if (utf8) end -= UTF8_MAXLEN-1;
2574 while (len-- > 0) {
2575 UV auv;
2576 fromstr = NEXTFROM;
2577 auv = SvUV(fromstr);
2578 if (in_bytes) auv = auv % 0x100;
2579 if (utf8) {
2580 W_utf8:
2581 if (cur > end) {
2582 *cur = '\0';
b162af07 2583 SvCUR_set(cat, cur - start);
fc241834
RGS
2584
2585 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2586 end = start+SvLEN(cat)-UTF8_MAXLEN;
2587 }
c80e42f3
KW
2588 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2589 auv,
041457d9 2590 warn_utf8 ?
230e1fce 2591 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2592 } else {
2593 if (auv >= 0x100) {
2594 if (!SvUTF8(cat)) {
2595 *cur = '\0';
b162af07 2596 SvCUR_set(cat, cur - start);
fc241834
RGS
2597 marked_upgrade(aTHX_ cat, symptr);
2598 lookahead.flags |= FLAG_DO_UTF8;
2599 lookahead.strbeg = symptr->strbeg;
2600 utf8 = 1;
2601 start = SvPVX(cat);
2602 cur = start + SvCUR(cat);
2603 end = start+SvLEN(cat)-UTF8_MAXLEN;
2604 goto W_utf8;
2605 }
a2a5de95
NC
2606 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2607 "Character in 'W' format wrapped in pack");
fc241834
RGS
2608 auv &= 0xff;
2609 }
2610 if (cur >= end) {
2611 *cur = '\0';
b162af07 2612 SvCUR_set(cat, cur - start);
fc241834
RGS
2613 GROWING(0, cat, start, cur, len+1);
2614 end = start+SvLEN(cat)-1;
2615 }
fe2774ed 2616 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2617 }
2618 }
2619 break;
fc241834
RGS
2620 }
2621 case 'U': {
2622 char *end;
2623
2624 if (len == 0) {
2625 if (!(symptr->flags & FLAG_DO_UTF8)) {
2626 marked_upgrade(aTHX_ cat, symptr);
2627 lookahead.flags |= FLAG_DO_UTF8;
2628 lookahead.strbeg = symptr->strbeg;
2629 }
2630 utf8 = 0;
2631 goto no_change;
2632 }
2633
2634 end = start+SvLEN(cat);
2635 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2636 while (len-- > 0) {
fc241834 2637 UV auv;
a6ec74c1 2638 fromstr = NEXTFROM;
fc241834
RGS
2639 auv = SvUV(fromstr);
2640 if (utf8) {
230e1fce 2641 U8 buffer[UTF8_MAXLEN], *endb;
c80e42f3 2642 endb = uvchr_to_utf8_flags(buffer, auv,
041457d9 2643 warn_utf8 ?
fc241834
RGS
2644 0 : UNICODE_ALLOW_ANY);
2645 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2646 *cur = '\0';
b162af07 2647 SvCUR_set(cat, cur - start);
fc241834
RGS
2648 GROWING(0, cat, start, cur,
2649 len+(endb-buffer)*UTF8_EXPAND);
2650 end = start+SvLEN(cat);
2651 }
3a88beaa 2652 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
fc241834
RGS
2653 } else {
2654 if (cur >= end) {
2655 *cur = '\0';
b162af07 2656 SvCUR_set(cat, cur - start);
fc241834
RGS
2657 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2658 end = start+SvLEN(cat)-UTF8_MAXLEN;
2659 }
c80e42f3 2660 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
041457d9 2661 warn_utf8 ?
230e1fce 2662 0 : UNICODE_ALLOW_ANY);
fc241834 2663 }
a6ec74c1 2664 }
a6ec74c1 2665 break;
fc241834 2666 }
a6ec74c1
JH
2667 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2668 case 'f':
a6ec74c1 2669 while (len-- > 0) {
f337b084
TH
2670 float afloat;
2671 NV anv;
a6ec74c1 2672 fromstr = NEXTFROM;
f337b084 2673 anv = SvNV(fromstr);
85bba25f 2674# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2675 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2676 * on Alpha; fake it if we don't have them.
2677 */
f337b084 2678 if (anv > FLT_MAX)
fc241834 2679 afloat = FLT_MAX;
f337b084 2680 else if (anv < -FLT_MAX)
fc241834 2681 afloat = -FLT_MAX;
f337b084 2682 else afloat = (float)anv;
baf3cf9c 2683# else
f337b084 2684 afloat = (float)anv;
baf3cf9c 2685# endif
3a88beaa 2686 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2687 }
2688 break;
2689 case 'd':
a6ec74c1 2690 while (len-- > 0) {
f337b084
TH
2691 double adouble;
2692 NV anv;
a6ec74c1 2693 fromstr = NEXTFROM;
f337b084 2694 anv = SvNV(fromstr);
85bba25f 2695# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2697 * on Alpha; fake it if we don't have them.
2698 */
f337b084 2699 if (anv > DBL_MAX)
fc241834 2700 adouble = DBL_MAX;
f337b084 2701 else if (anv < -DBL_MAX)
fc241834 2702 adouble = -DBL_MAX;
f337b084 2703 else adouble = (double)anv;
baf3cf9c 2704# else
f337b084 2705 adouble = (double)anv;
baf3cf9c 2706# endif
3a88beaa 2707 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2708 }
2709 break;
fc241834 2710 case 'F': {
275663fa 2711 NV_bytes anv;
1109a392 2712 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2713 while (len-- > 0) {
2714 fromstr = NEXTFROM;
cd07c537
DM
2715#ifdef __GNUC__
2716 /* to work round a gcc/x86 bug; don't use SvNV */
2717 anv.nv = sv_2nv(fromstr);
2718#else
275663fa 2719 anv.nv = SvNV(fromstr);
cd07c537 2720#endif
3a88beaa 2721 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2722 }
2723 break;
fc241834 2724 }
92d41999 2725#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2726 case 'D': {
275663fa 2727 ld_bytes aldouble;
1109a392
MHM
2728 /* long doubles can have unused bits, which may be nonzero */
2729 Zero(&aldouble, 1, long double);
92d41999
JH
2730 while (len-- > 0) {
2731 fromstr = NEXTFROM;
cd07c537
DM
2732# ifdef __GNUC__
2733 /* to work round a gcc/x86 bug; don't use SvNV */
2734 aldouble.ld = (long double)sv_2nv(fromstr);
2735# else
275663fa 2736 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2737# endif
3a88beaa
NC
2738 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2739 needs_swap);
92d41999
JH
2740 }
2741 break;
fc241834 2742 }
92d41999 2743#endif
068bd2e7 2744 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2745 case 'n':
2746 while (len-- > 0) {
f337b084 2747 I16 ai16;
a6ec74c1 2748 fromstr = NEXTFROM;
ef108786 2749 ai16 = (I16)SvIV(fromstr);
ef108786 2750 ai16 = PerlSock_htons(ai16);
3a88beaa 2751 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2752 }
2753 break;
068bd2e7 2754 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2755 case 'v':
2756 while (len-- > 0) {
f337b084 2757 I16 ai16;
a6ec74c1 2758 fromstr = NEXTFROM;
ef108786 2759 ai16 = (I16)SvIV(fromstr);
ef108786 2760 ai16 = htovs(ai16);
3a88beaa 2761 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2762 }
2763 break;
49704364 2764 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2765#if SHORTSIZE != SIZE16
fc241834 2766 while (len-- > 0) {
f337b084 2767 unsigned short aushort;
fc241834
RGS
2768 fromstr = NEXTFROM;
2769 aushort = SvUV(fromstr);
3a88beaa 2770 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2771 }
49704364
WL
2772 break;
2773#else
2774 /* Fall through! */
a6ec74c1 2775#endif
49704364 2776 case 'S':
fc241834 2777 while (len-- > 0) {
f337b084 2778 U16 au16;
fc241834
RGS
2779 fromstr = NEXTFROM;
2780 au16 = (U16)SvUV(fromstr);
3a88beaa 2781 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2782 }
2783 break;
49704364 2784 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2785#if SHORTSIZE != SIZE16
fc241834 2786 while (len-- > 0) {
f337b084 2787 short ashort;
fc241834
RGS
2788 fromstr = NEXTFROM;
2789 ashort = SvIV(fromstr);
3a88beaa 2790 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2791 }
49704364
WL
2792 break;
2793#else
2794 /* Fall through! */
a6ec74c1 2795#endif
49704364
WL
2796 case 's':
2797 while (len-- > 0) {
f337b084 2798 I16 ai16;
49704364 2799 fromstr = NEXTFROM;
ef108786 2800 ai16 = (I16)SvIV(fromstr);
3a88beaa 2801 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2802 }
2803 break;
2804 case 'I':
49704364 2805 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2806 while (len-- > 0) {
f337b084 2807 unsigned int auint;
a6ec74c1
JH
2808 fromstr = NEXTFROM;
2809 auint = SvUV(fromstr);
3a88beaa 2810 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2811 }
2812 break;
92d41999
JH
2813 case 'j':
2814 while (len-- > 0) {
f337b084 2815 IV aiv;
92d41999
JH
2816 fromstr = NEXTFROM;
2817 aiv = SvIV(fromstr);
3a88beaa 2818 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2819 }
2820 break;
2821 case 'J':
2822 while (len-- > 0) {
f337b084 2823 UV auv;
92d41999
JH
2824 fromstr = NEXTFROM;
2825 auv = SvUV(fromstr);
3a88beaa 2826 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2827 }
2828 break;
a6ec74c1
JH
2829 case 'w':
2830 while (len-- > 0) {
f337b084 2831 NV anv;
a6ec74c1 2832 fromstr = NEXTFROM;
15e9f109 2833 anv = SvNV(fromstr);
a6ec74c1 2834
f337b084
TH
2835 if (anv < 0) {
2836 *cur = '\0';
b162af07 2837 SvCUR_set(cat, cur - start);
49704364 2838 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2839 }
a6ec74c1 2840
196b62db
NC
2841 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2842 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2843 any negative IVs will have already been got by the croak()
2844 above. IOK is untrue for fractions, so we test them
2845 against UV_MAX_P1. */
f337b084
TH
2846 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2847 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2848 char *in = buf + sizeof(buf);
196b62db 2849 UV auv = SvUV(fromstr);
a6ec74c1
JH
2850
2851 do {
eb160463 2852 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2853 auv >>= 7;
2854 } while (auv);
2855 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2856 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2857 in, (buf + sizeof(buf)) - in);
2858 } else if (SvPOKp(fromstr))
2859 goto w_string;
a6ec74c1 2860 else if (SvNOKp(fromstr)) {
0258719b 2861 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2862 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2863 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2864 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2865 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2866 Some C compilers are strict about integral constant
2867 expressions so we conservatively divide by a slightly
2868 smaller integer instead of multiplying by the exact
2869 floating-point value.
0258719b
NC
2870 */
2871#ifdef NV_MAX_10_EXP
f337b084 2872 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2873 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2874#else
f337b084 2875 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2876 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2877#endif
a6ec74c1
JH
2878 char *in = buf + sizeof(buf);
2879
8b6e33c7 2880 anv = Perl_floor(anv);
a6ec74c1 2881 do {
8b6e33c7 2882 const NV next = Perl_floor(anv / 128);
a6ec74c1 2883 if (in <= buf) /* this cannot happen ;-) */
49704364 2884 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2885 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2886 anv = next;
2887 } while (anv > 0);
a6ec74c1 2888 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2889 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2890 in, (buf + sizeof(buf)) - in);
2891 } else {
8b6e33c7
AL
2892 const char *from;
2893 char *result, *in;
735b914b
JH
2894 SV *norm;
2895 STRLEN len;
2896 bool done;
2897
f337b084 2898 w_string:
735b914b 2899 /* Copy string and check for compliance */
349d4f2f 2900 from = SvPV_const(fromstr, len);
735b914b 2901 if ((norm = is_an_int(from, len)) == NULL)
49704364 2902 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2903
a02a5408 2904 Newx(result, len, char);
735b914b
JH
2905 in = result + len;
2906 done = FALSE;
f337b084 2907 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2908 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2909 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2910 in, (result + len) - in);
735b914b
JH
2911 Safefree(result);
2912 SvREFCNT_dec(norm); /* free norm */
fc241834 2913 }
a6ec74c1
JH
2914 }
2915 break;
2916 case 'i':
49704364 2917 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2918 while (len-- > 0) {
f337b084 2919 int aint;
a6ec74c1
JH
2920 fromstr = NEXTFROM;
2921 aint = SvIV(fromstr);
3a88beaa 2922 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2923 }
2924 break;
068bd2e7 2925 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2926 case 'N':
2927 while (len-- > 0) {
f337b084 2928 U32 au32;
a6ec74c1 2929 fromstr = NEXTFROM;
ef108786 2930 au32 = SvUV(fromstr);
ef108786 2931 au32 = PerlSock_htonl(au32);
3a88beaa 2932 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2933 }
2934 break;
068bd2e7 2935 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2936 case 'V':
2937 while (len-- > 0) {
f337b084 2938 U32 au32;
a6ec74c1 2939 fromstr = NEXTFROM;
ef108786 2940 au32 = SvUV(fromstr);
ef108786 2941 au32 = htovl(au32);
3a88beaa 2942 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2943 }
2944 break;
49704364 2945 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2946#if LONGSIZE != SIZE32
fc241834 2947 while (len-- > 0) {
f337b084 2948 unsigned long aulong;
fc241834
RGS
2949 fromstr = NEXTFROM;
2950 aulong = SvUV(fromstr);
3a88beaa 2951 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2952 }
49704364
WL
2953 break;
2954#else
2955 /* Fall though! */
a6ec74c1 2956#endif
49704364 2957 case 'L':
fc241834 2958 while (len-- > 0) {
f337b084 2959 U32 au32;
fc241834
RGS
2960 fromstr = NEXTFROM;
2961 au32 = SvUV(fromstr);
3a88beaa 2962 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2963 }
2964 break;
49704364 2965 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2966#if LONGSIZE != SIZE32
fc241834 2967 while (len-- > 0) {
f337b084 2968 long along;
fc241834
RGS
2969 fromstr = NEXTFROM;
2970 along = SvIV(fromstr);
3a88beaa 2971 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 2972 }
49704364
WL
2973 break;
2974#else
2975 /* Fall though! */
a6ec74c1 2976#endif
49704364
WL
2977 case 'l':
2978 while (len-- > 0) {
f337b084 2979 I32 ai32;
49704364 2980 fromstr = NEXTFROM;
ef108786 2981 ai32 = SvIV(fromstr);
3a88beaa 2982 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
2983 }
2984 break;
2985#ifdef HAS_QUAD
2986 case 'Q':
2987 while (len-- > 0) {
f337b084 2988 Uquad_t auquad;
a6ec74c1 2989 fromstr = NEXTFROM;
f337b084 2990 auquad = (Uquad_t) SvUV(fromstr);
3a88beaa 2991 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
2992 }
2993 break;
2994 case 'q':
2995 while (len-- > 0) {
f337b084 2996 Quad_t aquad;
a6ec74c1
JH
2997 fromstr = NEXTFROM;
2998 aquad = (Quad_t)SvIV(fromstr);
3a88beaa 2999 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3000 }
3001 break;
f337b084 3002#endif /* HAS_QUAD */
a6ec74c1
JH
3003 case 'P':
3004 len = 1; /* assume SV is correct length */
f337b084 3005 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3006 /* Fall through! */
a6ec74c1
JH
3007 case 'p':
3008 while (len-- > 0) {
83003860 3009 const char *aptr;
f337b084 3010
a6ec74c1 3011 fromstr = NEXTFROM;
28a4f200
TH
3012 SvGETMAGIC(fromstr);
3013 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3014 else {
a6ec74c1
JH
3015 /* XXX better yet, could spirit away the string to
3016 * a safe spot and hang on to it until the result
3017 * of pack() (and all copies of the result) are
3018 * gone.
3019 */
041457d9 3020 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3021 !SvREADONLY(fromstr)))) {
3022 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3023 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3024 }
3025 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3026 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3027 else
2596d9fe 3028 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3029 }
3a88beaa 3030 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3031 }
3032 break;
fc241834 3033 case 'u': {
f7fe979e 3034 const char *aptr, *aend;
fc241834 3035 bool from_utf8;
f337b084 3036
a6ec74c1 3037 fromstr = NEXTFROM;
fc241834
RGS
3038 if (len <= 2) len = 45;
3039 else len = len / 3 * 3;
3040 if (len >= 64) {
a2a5de95
NC
3041 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3042 "Field too wide in 'u' format in pack");
fc241834
RGS
3043 len = 63;
3044 }
83003860 3045 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3046 from_utf8 = DO_UTF8(fromstr);
3047 if (from_utf8) {
3048 aend = aptr + fromlen;
3f63b0e5 3049 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3050 } else aend = NULL; /* Unused, but keep compilers happy */
3051 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3052 while (fromlen > 0) {
fc241834 3053 U8 *end;
a6ec74c1 3054 I32 todo;
fc241834 3055 U8 hunk[1+63/3*4+1];
a6ec74c1 3056
eb160463 3057 if ((I32)fromlen > len)
a6ec74c1
JH
3058 todo = len;
3059 else
3060 todo = fromlen;
fc241834
RGS
3061 if (from_utf8) {
3062 char buffer[64];
3063 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3064 'u' | TYPE_IS_PACK)) {
3065 *cur = '\0';
b162af07 3066 SvCUR_set(cat, cur - start);
5637ef5b
NC
3067 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3068 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3069 aptr, aend, buffer, (long) todo);
fc241834
RGS
3070 }
3071 end = doencodes(hunk, buffer, todo);
3072 } else {
3073 end = doencodes(hunk, aptr, todo);
3074 aptr += todo;
3075 }
3a88beaa 3076 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3077 fromlen -= todo;
3078 }
a6ec74c1
JH
3079 break;
3080 }
f337b084
TH
3081 }
3082 *cur = '\0';
b162af07 3083 SvCUR_set(cat, cur - start);
f337b084 3084 no_change:
49704364 3085 *symptr = lookahead;
a6ec74c1 3086 }
49704364 3087 return beglist;
18529408
IZ
3088}
3089#undef NEXTFROM
3090
3091
3092PP(pp_pack)
3093{
97aff369 3094 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3095 SV *cat = TARG;
18529408 3096 STRLEN fromlen;
349d4f2f 3097 SV *pat_sv = *++MARK;
eb578fdb
KW
3098 const char *pat = SvPV_const(pat_sv, fromlen);
3099 const char *patend = pat + fromlen;
18529408
IZ
3100
3101 MARK++;
76f68e9b 3102 sv_setpvs(cat, "");
f337b084 3103 SvUTF8_off(cat);
18529408 3104
7accc089 3105 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3106
a6ec74c1
JH
3107 SvSETMAGIC(cat);
3108 SP = ORIGMARK;
3109 PUSHs(cat);
3110 RETURN;
3111}
a6ec74c1 3112
73cb7263
NC
3113/*
3114 * Local variables:
3115 * c-indentation-style: bsd
3116 * c-basic-offset: 4
14d04a33 3117 * indent-tabs-mode: nil
73cb7263
NC
3118 * End:
3119 *
14d04a33 3120 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3121 */