This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re-implement SHIFT16 and SHIFT32 as wrappers to SHIFT_BYTES.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
133#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3 134
a1219b5e
NC
135#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
137#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
139#else
140# error "Unsupported byteorder"
141 /* Need to add code here to re-instate mixed endian support. */
142#endif
143
08ca2aa3 144/* Only to be used inside a loop (see the break) */
275663fa 145#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
146STMT_START { \
147 if (utf8) { \
f337b084 148 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 149 (char *) (buf), len, datumtype)) break; \
08ca2aa3 150 } else { \
275663fa
TC
151 Copy(s, (char *) (buf), len, char); \
152 s += len; \
08ca2aa3 153 } \
08ca2aa3
TH
154} STMT_END
155
7285e3f4
NC
156#define SHIFT16(utf8, s, strend, p, datumtype) \
157 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype)
158
159#define SHIFT32(utf8, s, strend, p, datumtype) \
160 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype)
161
275663fa
TC
162#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
163 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
164
f337b084 165#define PUSH_VAR(utf8, aptr, var) \
230e1fce 166 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 167
49704364
LW
168/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
169#define MAX_SUB_TEMPLATE_LEVEL 100
170
66c611c5 171/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
172#define FLAG_WAS_UTF8 0x40
173#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 174#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 175#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
176#define FLAG_SLASH 0x04
177#define FLAG_COMMA 0x02
178#define FLAG_PACK 0x01
179
a6ec74c1
JH
180STATIC SV *
181S_mul128(pTHX_ SV *sv, U8 m)
182{
183 STRLEN len;
184 char *s = SvPV(sv, len);
185 char *t;
a6ec74c1 186
7918f24d
NC
187 PERL_ARGS_ASSERT_MUL128;
188
a6ec74c1 189 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 190 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
191
192 sv_catsv(tmpNew, sv);
193 SvREFCNT_dec(sv); /* free old sv */
194 sv = tmpNew;
195 s = SvPV(sv, len);
196 }
197 t = s + len - 1;
198 while (!*t) /* trailing '\0'? */
199 t--;
200 while (t > s) {
f7fe979e 201 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
202 *(t--) = '0' + (char)(i % 10);
203 m = (char)(i / 10);
a6ec74c1
JH
204 }
205 return (sv);
206}
207
208/* Explosives and implosives. */
209
210#if 'I' == 73 && 'J' == 74
211/* On an ASCII/ISO kind of system */
212#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
213#else
214/*
215 Some other sort of character set - use memchr() so we don't match
216 the null byte.
217 */
218#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
219#endif
220
66c611c5 221/* type modifiers */
62f95557 222#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
223#define TYPE_IS_BIG_ENDIAN 0x200
224#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 225#define TYPE_IS_PACK 0x800
1109a392 226#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 227#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
228#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
229
7212898e
NC
230# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
231# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
232
233# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
234
2b4ad569 235# define DO_BO_UNPACK(var) \
3cc90e2e 236 STMT_START { \
a1219b5e 237 if (needs_swap) { \
e8fda8c4 238 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
239 } \
240 } STMT_END
241
2b4ad569 242# define DO_BO_PACK(var) \
3cc90e2e 243 STMT_START { \
a1219b5e 244 if (needs_swap) { \
e8fda8c4 245 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
246 } \
247 } STMT_END
248
78d46eaa 249#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 250#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
251#define PACK_SIZE_MASK 0x3F
252
298bc19c 253#include "packsizetables.c"
78d46eaa 254
08ca2aa3 255STATIC U8
f7fe979e 256uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 257{
08ca2aa3 258 STRLEN retlen;
0bcc34c2 259 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 260 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 261 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
262 warnings), but these two mean we make no progress in the string and
263 might enter an infinite loop */
264 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
265 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
266 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 267 if (val >= 0x100) {
a2a5de95
NC
268 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
269 "Character in '%c' format wrapped in unpack",
270 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
271 val &= 0xff;
272 }
273 *s += retlen;
fe2774ed 274 return (U8)val;
08ca2aa3
TH
275}
276
f337b084
TH
277#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
278 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
279 *(U8 *)(s)++)
280
281STATIC bool
f7fe979e 282uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
283{
284 UV val;
285 STRLEN retlen;
f7fe979e 286 const char *from = *s;
08ca2aa3 287 int bad = 0;
f7fe979e 288 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
289 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
f337b084 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
fe2774ed 301 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
302 }
303 /* We have enough characters for the buffer. Did we have problems ? */
304 if (bad) {
305 if (bad & 1) {
306 /* Rewalk the string fragment while warning */
f7fe979e 307 const char *ptr;
9e27e96a 308 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
309 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
310 if (ptr >= end) break;
f337b084 311 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
312 }
313 if (from > end) from = end;
314 }
a2a5de95
NC
315 if ((bad & 2))
316 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 317 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
318 "Character(s) in '%c' format wrapped in %s",
319 (int) TYPE_NO_MODIFIERS(datumtype),
320 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
321 }
322 *s = from;
323 return TRUE;
324}
325
326STATIC bool
f7fe979e 327next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 328{
97aff369 329 dVAR;
08ca2aa3 330 STRLEN retlen;
0bcc34c2 331 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
332 if (val >= 0x100 || !ISUUCHAR(val) ||
333 retlen == (STRLEN) -1 || retlen == 0) {
334 *out = 0;
335 return FALSE;
336 }
337 *out = PL_uudmap[val] & 077;
f337b084 338 *s += retlen;
08ca2aa3
TH
339 return TRUE;
340}
78d46eaa 341
64844641 342STATIC char *
14333449 343S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 344 const U8 * const end = start + len;
64844641 345
7918f24d
NC
346 PERL_ARGS_ASSERT_BYTES_TO_UNI;
347
f337b084 348 while (start < end) {
48fa4626
ST
349 const UV uv = NATIVE_TO_ASCII(*start);
350 if (UNI_IS_INVARIANT(uv))
351 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
352 else {
353 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
354 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
355 }
356 start++;
357 }
64844641 358 return dest;
f337b084
TH
359}
360
230e1fce
NC
361#define PUSH_BYTES(utf8, cur, buf, len) \
362STMT_START { \
64844641
AL
363 if (utf8) \
364 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
365 else { \
366 Copy(buf, cur, len, char); \
367 (cur) += (len); \
368 } \
f337b084
TH
369} STMT_END
370
371#define GROWING(utf8, cat, start, cur, in_len) \
372STMT_START { \
373 STRLEN glen = (in_len); \
3473cf63 374 if (utf8) glen *= UTF8_EXPAND; \
f337b084 375 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 376 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
377 (cur) = (start) + SvCUR(cat); \
378 } \
379} STMT_END
380
381#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
382STMT_START { \
f7fe979e 383 const STRLEN glen = (in_len); \
f337b084 384 STRLEN gl = glen; \
3473cf63 385 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
386 if ((cur) + gl >= (start) + SvLEN(cat)) { \
387 *cur = '\0'; \
b162af07 388 SvCUR_set((cat), (cur) - (start)); \
0bd48802 389 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
390 (cur) = (start) + SvCUR(cat); \
391 } \
392 PUSH_BYTES(utf8, cur, buf, glen); \
393} STMT_END
394
395#define PUSH_BYTE(utf8, s, byte) \
396STMT_START { \
397 if (utf8) { \
f7fe979e 398 const U8 au8 = (byte); \
64844641 399 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
400 } else *(U8 *)(s)++ = (byte); \
401} STMT_END
402
403/* Only to be used inside a loop (see the break) */
404#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
405STMT_START { \
406 STRLEN retlen; \
407 if (str >= end) break; \
408 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
409 if (retlen == (STRLEN) -1 || retlen == 0) { \
410 *cur = '\0'; \
411 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
412 } \
413 str += retlen; \
414} STMT_END
415
f7fe979e
AL
416static const char *_action( const tempsym_t* symptr )
417{
10edeb5d 418 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
419}
420
206947d2 421/* Returns the sizeof() struct described by pat */
028d1f6d 422STATIC I32
f337b084 423S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 424{
f337b084 425 I32 total = 0;
206947d2 426
7918f24d
NC
427 PERL_ARGS_ASSERT_MEASURE_STRUCT;
428
49704364 429 while (next_symbol(symptr)) {
f337b084 430 I32 len;
f7fe979e 431 int size;
f337b084
TH
432
433 switch (symptr->howlen) {
fc241834 434 case e_star:
49704364 435 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 436 _action( symptr ) );
49704364 437 break;
f337b084
TH
438 default:
439 /* e_no_len and e_number */
440 len = symptr->length;
441 break;
49704364
LW
442 }
443
a7a3cfaa 444 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 445 if (!size) {
f7fe979e 446 int star;
80a13697
NC
447 /* endianness doesn't influence the size of a type */
448 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
449 default:
450 Perl_croak(aTHX_ "Invalid type '%c' in %s",
451 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 452 _action( symptr ) );
28be1210
TH
453 case '.' | TYPE_IS_SHRIEKING:
454 case '@' | TYPE_IS_SHRIEKING:
80a13697 455 case '@':
28be1210 456 case '.':
80a13697
NC
457 case '/':
458 case 'U': /* XXXX Is it correct? */
459 case 'w':
460 case 'u':
461 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 462 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 463 _action( symptr ) );
80a13697
NC
464 case '%':
465 size = 0;
466 break;
467 case '(':
fc241834
RGS
468 {
469 tempsym_t savsym = *symptr;
470 symptr->patptr = savsym.grpbeg;
471 symptr->patend = savsym.grpend;
472 /* XXXX Theoretically, we need to measure many times at
473 different positions, since the subexpression may contain
474 alignment commands, but be not of aligned length.
475 Need to detect this and croak(). */
476 size = measure_struct(symptr);
477 *symptr = savsym;
478 break;
479 }
80a13697
NC
480 case 'X' | TYPE_IS_SHRIEKING:
481 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
482 */
483 if (!len) /* Avoid division by 0 */
484 len = 1;
485 len = total % len; /* Assumed: the start is aligned. */
486 /* FALL THROUGH */
487 case 'X':
488 size = -1;
489 if (total < len)
f7fe979e 490 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
491 break;
492 case 'x' | TYPE_IS_SHRIEKING:
493 if (!len) /* Avoid division by 0 */
494 len = 1;
495 star = total % len; /* Assumed: the start is aligned. */
496 if (star) /* Other portable ways? */
497 len = len - star;
498 else
499 len = 0;
500 /* FALL THROUGH */
501 case 'x':
502 case 'A':
503 case 'Z':
504 case 'a':
80a13697
NC
505 size = 1;
506 break;
507 case 'B':
508 case 'b':
509 len = (len + 7)/8;
510 size = 1;
511 break;
512 case 'H':
513 case 'h':
514 len = (len + 1)/2;
515 size = 1;
516 break;
78d46eaa 517
80a13697
NC
518 case 'P':
519 len = 1;
520 size = sizeof(char*);
78d46eaa
NC
521 break;
522 }
206947d2
IZ
523 }
524 total += len * size;
525 }
526 return total;
527}
528
49704364
LW
529
530/* locate matching closing parenthesis or bracket
531 * returns char pointer to char after match, or NULL
532 */
f7fe979e 533STATIC const char *
5aaab254 534S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 535{
7918f24d
NC
536 PERL_ARGS_ASSERT_GROUP_END;
537
49704364 538 while (patptr < patend) {
f7fe979e 539 const char c = *patptr++;
49704364
LW
540
541 if (isSPACE(c))
542 continue;
543 else if (c == ender)
544 return patptr-1;
545 else if (c == '#') {
546 while (patptr < patend && *patptr != '\n')
547 patptr++;
548 continue;
549 } else if (c == '(')
550 patptr = group_end(patptr, patend, ')') + 1;
551 else if (c == '[')
552 patptr = group_end(patptr, patend, ']') + 1;
18529408 553 }
49704364
LW
554 Perl_croak(aTHX_ "No group ending character '%c' found in template",
555 ender);
556 return 0;
18529408
IZ
557}
558
49704364
LW
559
560/* Convert unsigned decimal number to binary.
561 * Expects a pointer to the first digit and address of length variable
562 * Advances char pointer to 1st non-digit char and returns number
fc241834 563 */
f7fe979e 564STATIC const char *
5aaab254 565S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
566{
567 I32 len = *patptr++ - '0';
7918f24d
NC
568
569 PERL_ARGS_ASSERT_GET_NUM;
570
49704364
LW
571 while (isDIGIT(*patptr)) {
572 if (len >= 0x7FFFFFFF/10)
573 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
574 len = (len * 10) + (*patptr++ - '0');
575 }
576 *lenptr = len;
577 return patptr;
578}
579
580/* The marvellous template parsing routine: Using state stored in *symptr,
581 * locates next template code and count
582 */
583STATIC bool
f337b084 584S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 585{
f7fe979e 586 const char* patptr = symptr->patptr;
0bcc34c2 587 const char* const patend = symptr->patend;
49704364 588
7918f24d
NC
589 PERL_ARGS_ASSERT_NEXT_SYMBOL;
590
49704364
LW
591 symptr->flags &= ~FLAG_SLASH;
592
593 while (patptr < patend) {
594 if (isSPACE(*patptr))
595 patptr++;
596 else if (*patptr == '#') {
597 patptr++;
598 while (patptr < patend && *patptr != '\n')
599 patptr++;
600 if (patptr < patend)
601 patptr++;
602 } else {
fc241834 603 /* We should have found a template code */
49704364 604 I32 code = *patptr++ & 0xFF;
66c611c5 605 U32 inherited_modifiers = 0;
49704364
LW
606
607 if (code == ','){ /* grandfather in commas but with a warning */
608 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
609 symptr->flags |= FLAG_COMMA;
610 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 611 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
612 }
613 continue;
614 }
fc241834 615
49704364 616 /* for '(', skip to ')' */
fc241834 617 if (code == '(') {
49704364
LW
618 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
619 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 620 _action( symptr ) );
49704364
LW
621 symptr->grpbeg = patptr;
622 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
623 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
624 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 625 _action( symptr ) );
49704364
LW
626 }
627
66c611c5
MHM
628 /* look for group modifiers to inherit */
629 if (TYPE_ENDIANNESS(symptr->flags)) {
630 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
631 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
632 }
633
1109a392
MHM
634 /* look for modifiers */
635 while (patptr < patend) {
b7787f18
AL
636 const char *allowed;
637 I32 modifier;
1109a392
MHM
638 switch (*patptr) {
639 case '!':
640 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 641 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
642 break;
643 case '>':
644 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 645 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
646 break;
647 case '<':
648 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 649 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
650 break;
651 default:
b7787f18
AL
652 allowed = "";
653 modifier = 0;
1109a392
MHM
654 break;
655 }
66c611c5 656
1109a392
MHM
657 if (modifier == 0)
658 break;
66c611c5 659
1109a392
MHM
660 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
661 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 662 allowed, _action( symptr ) );
66c611c5
MHM
663
664 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 665 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 666 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
667 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
668 TYPE_ENDIANNESS_MASK)
669 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 670 *patptr, _action( symptr ) );
66c611c5 671
a2a5de95
NC
672 if ((code & modifier)) {
673 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
674 "Duplicate modifier '%c' after '%c' in %s",
675 *patptr, (int) TYPE_NO_MODIFIERS(code),
676 _action( symptr ) );
1109a392 677 }
66c611c5 678
1109a392
MHM
679 code |= modifier;
680 patptr++;
49704364
LW
681 }
682
66c611c5
MHM
683 /* inherit modifiers */
684 code |= inherited_modifiers;
685
fc241834 686 /* look for count and/or / */
49704364
LW
687 if (patptr < patend) {
688 if (isDIGIT(*patptr)) {
689 patptr = get_num( patptr, &symptr->length );
690 symptr->howlen = e_number;
691
692 } else if (*patptr == '*') {
693 patptr++;
694 symptr->howlen = e_star;
695
696 } else if (*patptr == '[') {
f7fe979e 697 const char* lenptr = ++patptr;
49704364
LW
698 symptr->howlen = e_number;
699 patptr = group_end( patptr, patend, ']' ) + 1;
700 /* what kind of [] is it? */
701 if (isDIGIT(*lenptr)) {
702 lenptr = get_num( lenptr, &symptr->length );
703 if( *lenptr != ']' )
704 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 705 _action( symptr ) );
49704364
LW
706 } else {
707 tempsym_t savsym = *symptr;
708 symptr->patend = patptr-1;
709 symptr->patptr = lenptr;
710 savsym.length = measure_struct(symptr);
711 *symptr = savsym;
712 }
713 } else {
714 symptr->howlen = e_no_len;
715 symptr->length = 1;
716 }
717
718 /* try to find / */
719 while (patptr < patend) {
720 if (isSPACE(*patptr))
721 patptr++;
722 else if (*patptr == '#') {
723 patptr++;
724 while (patptr < patend && *patptr != '\n')
725 patptr++;
726 if (patptr < patend)
727 patptr++;
728 } else {
66c611c5 729 if (*patptr == '/') {
49704364
LW
730 symptr->flags |= FLAG_SLASH;
731 patptr++;
66c611c5
MHM
732 if (patptr < patend &&
733 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 734 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 735 _action( symptr ) );
49704364
LW
736 }
737 break;
738 }
18529408 739 }
49704364
LW
740 } else {
741 /* at end - no count, no / */
742 symptr->howlen = e_no_len;
743 symptr->length = 1;
744 }
745
746 symptr->code = code;
fc241834 747 symptr->patptr = patptr;
49704364 748 return TRUE;
18529408 749 }
49704364 750 }
fc241834 751 symptr->patptr = patptr;
49704364 752 return FALSE;
18529408
IZ
753}
754
18529408 755/*
fc241834 756 There is no way to cleanly handle the case where we should process the
08ca2aa3 757 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
758 (e.g. estimates like strend-s as an upper bound for the number of
759 characters left wouldn't work). So if we foresee the need of this
760 (pattern starts with U or contains U0), we want to work on the encoded
761 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
762 themselves if they need to do a lot of unpacks like this on it
763*/
fc241834 764STATIC bool
08ca2aa3
TH
765need_utf8(const char *pat, const char *patend)
766{
767 bool first = TRUE;
7918f24d
NC
768
769 PERL_ARGS_ASSERT_NEED_UTF8;
770
08ca2aa3
TH
771 while (pat < patend) {
772 if (pat[0] == '#') {
773 pat++;
f7fe979e 774 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
775 if (!pat) return FALSE;
776 } else if (pat[0] == 'U') {
777 if (first || pat[1] == '0') return TRUE;
778 } else first = FALSE;
779 pat++;
780 }
781 return FALSE;
782}
783
784STATIC char
785first_symbol(const char *pat, const char *patend) {
7918f24d
NC
786 PERL_ARGS_ASSERT_FIRST_SYMBOL;
787
08ca2aa3
TH
788 while (pat < patend) {
789 if (pat[0] != '#') return pat[0];
790 pat++;
f7fe979e 791 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
792 if (!pat) return 0;
793 pat++;
794 }
795 return 0;
796}
797
798/*
7accc089
JH
799=for apidoc unpackstring
800
21ebfc7a
DM
801The engine implementing the unpack() Perl function.
802
803Using the template pat..patend, this function unpacks the string
804s..strend into a number of mortal SVs, which it pushes onto the perl
805argument (@_) stack (so you will need to issue a C<PUTBACK> before and
806C<SPAGAIN> after the call to this function). It returns the number of
807pushed elements.
808
809The strend and patend pointers should point to the byte following the last
810character of each string.
811
812Although this function returns its values on the perl argument stack, it
813doesn't take any parameters from that stack (and thus in particular
814there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
815example).
7accc089
JH
816
817=cut */
818
819I32
f7fe979e 820Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 821{
f7fe979e 822 tempsym_t sym;
08ca2aa3 823
7918f24d
NC
824 PERL_ARGS_ASSERT_UNPACKSTRING;
825
f337b084 826 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
827 else if (need_utf8(pat, patend)) {
828 /* We probably should try to avoid this in case a scalar context call
829 wouldn't get to the "U0" */
830 STRLEN len = strend - s;
230e1fce 831 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
832 SAVEFREEPV(s);
833 strend = s + len;
f337b084 834 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
835 }
836
f337b084
TH
837 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
838 flags |= FLAG_PARSE_UTF8;
08ca2aa3 839
f7fe979e 840 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
841
842 return unpack_rec(&sym, s, s, strend, NULL );
843}
844
4136a0f7 845STATIC I32
f7fe979e 846S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 847{
27da23d5 848 dVAR; dSP;
3297d27d 849 SV *sv = NULL;
f7fe979e 850 const I32 start_sp_offset = SP - PL_stack_base;
49704364 851 howlen_t howlen;
a6ec74c1 852 I32 checksum = 0;
92d41999 853 UV cuv = 0;
a6ec74c1 854 NV cdouble = 0.0;
f337b084 855 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 856 bool beyond = FALSE;
21c16052 857 bool explicit_length;
9e27e96a 858 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 859 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
860
861 PERL_ARGS_ASSERT_UNPACK_REC;
862
28be1210 863 symptr->strbeg = s - strbeg;
49704364 864
49704364 865 while (next_symbol(symptr)) {
a7a3cfaa 866 packprops_t props;
9e27e96a 867 I32 len;
f337b084 868 I32 datumtype = symptr->code;
a1219b5e 869 bool needs_swap;
206947d2 870 /* do first one only unless in list context
08ca2aa3 871 / is implemented by unpacking the count, then popping it from the
206947d2 872 stack, so must check that we're not in the middle of a / */
49704364 873 if ( unpack_only_one
206947d2 874 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 875 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 876 break;
49704364 877
f337b084 878 switch (howlen = symptr->howlen) {
fc241834
RGS
879 case e_star:
880 len = strend - strbeg; /* long enough */
49704364 881 break;
f337b084
TH
882 default:
883 /* e_no_len and e_number */
884 len = symptr->length;
885 break;
49704364 886 }
18529408 887
21c16052 888 explicit_length = TRUE;
a6ec74c1 889 redo_switch:
49704364 890 beyond = s >= strend;
a7a3cfaa
TH
891
892 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
893 if (props) {
894 /* props nonzero means we can process this letter. */
9e27e96a
AL
895 const long size = props & PACK_SIZE_MASK;
896 const long howmany = (strend - s) / size;
a7a3cfaa
TH
897 if (len > howmany)
898 len = howmany;
899
900 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
901 if (len && unpack_only_one) len = 1;
902 EXTEND(SP, len);
903 EXTEND_MORTAL(len);
78d46eaa
NC
904 }
905 }
a7a3cfaa 906
a1219b5e
NC
907 needs_swap = NEEDS_SWAP(datumtype);
908
1109a392 909 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 910 default:
1109a392 911 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 912
a6ec74c1 913 case '%':
49704364 914 if (howlen == e_no_len)
18529408 915 len = 16; /* len is not specified */
a6ec74c1 916 checksum = len;
92d41999 917 cuv = 0;
a6ec74c1 918 cdouble = 0;
18529408 919 continue;
a6ec74c1 920 break;
18529408
IZ
921 case '(':
922 {
49704364 923 tempsym_t savsym = *symptr;
9e27e96a 924 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 925 symptr->flags |= group_modifiers;
49704364 926 symptr->patend = savsym.grpend;
28be1210 927 symptr->previous = &savsym;
49704364 928 symptr->level++;
18529408 929 PUTBACK;
c6f750d1 930 if (len && unpack_only_one) len = 1;
18529408 931 while (len--) {
49704364 932 symptr->patptr = savsym.grpbeg;
f337b084
TH
933 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
934 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
935 unpack_rec(symptr, s, strbeg, strend, &s);
936 if (s == strend && savsym.howlen == e_star)
49704364 937 break; /* No way to continue */
18529408
IZ
938 }
939 SPAGAIN;
28be1210 940 savsym.flags = symptr->flags & ~group_modifiers;
49704364 941 *symptr = savsym;
18529408
IZ
942 break;
943 }
28be1210 944 case '.' | TYPE_IS_SHRIEKING:
28be1210 945 case '.': {
9e27e96a 946 const char *from;
28be1210 947 SV *sv;
9e27e96a 948 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
949 if (howlen == e_star) from = strbeg;
950 else if (len <= 0) from = s;
951 else {
952 tempsym_t *group = symptr;
953
954 while (--len && group) group = group->previous;
955 from = group ? strbeg + group->strbeg : strbeg;
956 }
957 sv = from <= s ?
00646304
CB
958 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
959 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 960 mXPUSHs(sv);
28be1210
TH
961 break;
962 }
28be1210 963 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 964 case '@':
28be1210 965 s = strbeg + symptr->strbeg;
28be1210 966 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 967 {
08ca2aa3
TH
968 while (len > 0) {
969 if (s >= strend)
970 Perl_croak(aTHX_ "'@' outside of string in unpack");
971 s += UTF8SKIP(s);
972 len--;
973 }
974 if (s > strend)
975 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
976 } else {
28be1210 977 if (strend-s < len)
fc241834 978 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 979 s += len;
08ca2aa3 980 }
a6ec74c1 981 break;
62f95557
IZ
982 case 'X' | TYPE_IS_SHRIEKING:
983 if (!len) /* Avoid division by 0 */
984 len = 1;
08ca2aa3 985 if (utf8) {
f7fe979e 986 const char *hop, *last;
f337b084
TH
987 I32 l = len;
988 hop = last = strbeg;
989 while (hop < s) {
990 hop += UTF8SKIP(hop);
991 if (--l == 0) {
08ca2aa3 992 last = hop;
f337b084
TH
993 l = len;
994 }
fc241834 995 }
f337b084
TH
996 if (last > s)
997 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
998 s = last;
999 break;
f337b084
TH
1000 }
1001 len = (s - strbeg) % len;
62f95557 1002 /* FALL THROUGH */
a6ec74c1 1003 case 'X':
08ca2aa3
TH
1004 if (utf8) {
1005 while (len > 0) {
1006 if (s <= strbeg)
1007 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1008 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1009 if (s <= strbeg)
1010 Perl_croak(aTHX_ "'X' outside of string in unpack");
1011 }
1012 len--;
1013 }
1014 } else {
fc241834
RGS
1015 if (len > s - strbeg)
1016 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1017 s -= len;
08ca2aa3 1018 }
a6ec74c1 1019 break;
9e27e96a
AL
1020 case 'x' | TYPE_IS_SHRIEKING: {
1021 I32 ai32;
62f95557
IZ
1022 if (!len) /* Avoid division by 0 */
1023 len = 1;
230e1fce
NC
1024 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1025 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1026 if (ai32 == 0) break;
1027 len -= ai32;
9e27e96a 1028 }
62f95557 1029 /* FALL THROUGH */
a6ec74c1 1030 case 'x':
08ca2aa3
TH
1031 if (utf8) {
1032 while (len>0) {
1033 if (s >= strend)
1034 Perl_croak(aTHX_ "'x' outside of string in unpack");
1035 s += UTF8SKIP(s);
1036 len--;
1037 }
1038 } else {
fc241834
RGS
1039 if (len > strend - s)
1040 Perl_croak(aTHX_ "'x' outside of string in unpack");
1041 s += len;
f337b084 1042 }
a6ec74c1
JH
1043 break;
1044 case '/':
49704364
LW
1045 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1046 break;
a6ec74c1
JH
1047 case 'A':
1048 case 'Z':
1049 case 'a':
08ca2aa3
TH
1050 if (checksum) {
1051 /* Preliminary length estimate is assumed done in 'W' */
1052 if (len > strend - s) len = strend - s;
1053 goto W_checksum;
1054 }
1055 if (utf8) {
1056 I32 l;
f7fe979e 1057 const char *hop;
08ca2aa3
TH
1058 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1059 if (hop >= strend) {
1060 if (hop > strend)
1061 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1062 break;
fc241834 1063 }
a6ec74c1 1064 }
08ca2aa3
TH
1065 if (hop > strend)
1066 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1067 len = hop - s;
1068 } else if (len > strend - s)
1069 len = strend - s;
1070
1071 if (datumtype == 'Z') {
1072 /* 'Z' strips stuff after first null */
f7fe979e 1073 const char *ptr, *end;
f337b084
TH
1074 end = s + len;
1075 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1076 sv = newSVpvn(s, ptr-s);
1077 if (howlen == e_star) /* exact for 'Z*' */
1078 len = ptr-s + (ptr != strend ? 1 : 0);
1079 } else if (datumtype == 'A') {
1080 /* 'A' strips both nulls and spaces */
f7fe979e 1081 const char *ptr;
18bdf90a
TH
1082 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1083 for (ptr = s+len-1; ptr >= s; ptr--)
1084 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1085 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1086 if (ptr >= s) ptr += UTF8SKIP(ptr);
1087 else ptr++;
28be1210 1088 if (ptr > s+len)
18bdf90a
TH
1089 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1090 } else {
1091 for (ptr = s+len-1; ptr >= s; ptr--)
1092 if (*ptr != 0 && !isSPACE(*ptr)) break;
1093 ptr++;
1094 }
08ca2aa3
TH
1095 sv = newSVpvn(s, ptr-s);
1096 } else sv = newSVpvn(s, len);
1097
1098 if (utf8) {
1099 SvUTF8_on(sv);
1100 /* Undo any upgrade done due to need_utf8() */
f337b084 1101 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1102 sv_utf8_downgrade(sv, 0);
a6ec74c1 1103 }
6e449a3a 1104 mXPUSHs(sv);
08ca2aa3 1105 s += len;
a6ec74c1
JH
1106 break;
1107 case 'B':
08ca2aa3
TH
1108 case 'b': {
1109 char *str;
49704364 1110 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1111 len = (strend - s) * 8;
1112 if (checksum) {
f337b084 1113 if (utf8)
08ca2aa3 1114 while (len >= 8 && s < strend) {
f337b084 1115 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1116 len -= 8;
1117 }
f337b084 1118 else
fc241834 1119 while (len >= 8) {
08ca2aa3 1120 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1121 len -= 8;
1122 }
08ca2aa3
TH
1123 if (len && s < strend) {
1124 U8 bits;
f337b084
TH
1125 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1126 if (datumtype == 'b')
a6ec74c1 1127 while (len-- > 0) {
92d41999 1128 if (bits & 1) cuv++;
a6ec74c1
JH
1129 bits >>= 1;
1130 }
f337b084 1131 else
a6ec74c1 1132 while (len-- > 0) {
08ca2aa3 1133 if (bits & 0x80) cuv++;
a6ec74c1
JH
1134 bits <<= 1;
1135 }
fc241834 1136 }
a6ec74c1
JH
1137 break;
1138 }
08ca2aa3 1139
561b68a9 1140 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1141 SvPOK_on(sv);
1142 str = SvPVX(sv);
1143 if (datumtype == 'b') {
f337b084 1144 U8 bits = 0;
f7fe979e 1145 const I32 ai32 = len;
08ca2aa3
TH
1146 for (len = 0; len < ai32; len++) {
1147 if (len & 7) bits >>= 1;
1148 else if (utf8) {
1149 if (s >= strend) break;
f337b084 1150 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1151 } else bits = *(U8 *) s++;
1152 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1153 }
08ca2aa3 1154 } else {
f337b084 1155 U8 bits = 0;
f7fe979e 1156 const I32 ai32 = len;
08ca2aa3
TH
1157 for (len = 0; len < ai32; len++) {
1158 if (len & 7) bits <<= 1;
1159 else if (utf8) {
1160 if (s >= strend) break;
f337b084 1161 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1162 } else bits = *(U8 *) s++;
1163 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1164 }
1165 }
1166 *str = '\0';
aa07b2f6 1167 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1168 XPUSHs(sv);
a6ec74c1 1169 break;
08ca2aa3 1170 }
a6ec74c1 1171 case 'H':
08ca2aa3 1172 case 'h': {
3297d27d 1173 char *str = NULL;
fc241834 1174 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1175 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1176 len = (strend - s) * 2;
858fe5e1
TC
1177 if (!checksum) {
1178 sv = sv_2mortal(newSV(len ? len : 1));
1179 SvPOK_on(sv);
1180 str = SvPVX(sv);
1181 }
a6ec74c1 1182 if (datumtype == 'h') {
f337b084 1183 U8 bits = 0;
9e27e96a 1184 I32 ai32 = len;
fc241834
RGS
1185 for (len = 0; len < ai32; len++) {
1186 if (len & 1) bits >>= 4;
1187 else if (utf8) {
1188 if (s >= strend) break;
f337b084 1189 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1190 } else bits = * (U8 *) s++;
858fe5e1
TC
1191 if (!checksum)
1192 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1193 }
08ca2aa3 1194 } else {
f337b084 1195 U8 bits = 0;
f7fe979e 1196 const I32 ai32 = len;
08ca2aa3
TH
1197 for (len = 0; len < ai32; len++) {
1198 if (len & 1) bits <<= 4;
1199 else if (utf8) {
1200 if (s >= strend) break;
f337b084 1201 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1202 } else bits = *(U8 *) s++;
858fe5e1
TC
1203 if (!checksum)
1204 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1205 }
1206 }
858fe5e1
TC
1207 if (!checksum) {
1208 *str = '\0';
1209 SvCUR_set(sv, str - SvPVX_const(sv));
1210 XPUSHs(sv);
1211 }
a6ec74c1 1212 break;
08ca2aa3 1213 }
1651fc44
ML
1214 case 'C':
1215 if (len == 0) {
1216 if (explicit_length)
1217 /* Switch to "character" mode */
1218 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1219 break;
1220 }
1221 /* FALL THROUGH */
a6ec74c1 1222 case 'c':
1651fc44
ML
1223 while (len-- > 0 && s < strend) {
1224 int aint;
1225 if (utf8)
1226 {
1227 STRLEN retlen;
1228 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1229 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1230 if (retlen == (STRLEN) -1 || retlen == 0)
1231 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1232 s += retlen;
1233 }
1234 else
1235 aint = *(U8 *)(s)++;
1236 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1237 aint -= 256;
08ca2aa3 1238 if (!checksum)
6e449a3a 1239 mPUSHi(aint);
73cb7263
NC
1240 else if (checksum > bits_in_uv)
1241 cdouble += (NV)aint;
1242 else
1243 cuv += aint;
a6ec74c1
JH
1244 }
1245 break;
08ca2aa3
TH
1246 case 'W':
1247 W_checksum:
1651fc44 1248 if (utf8) {
08ca2aa3 1249 while (len-- > 0 && s < strend) {
08ca2aa3 1250 STRLEN retlen;
f7fe979e 1251 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1252 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1253 if (retlen == (STRLEN) -1 || retlen == 0)
1254 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1255 s += retlen;
1256 if (!checksum)
6e449a3a 1257 mPUSHu(val);
08ca2aa3
TH
1258 else if (checksum > bits_in_uv)
1259 cdouble += (NV) val;
d6d3e8bd 1260 else
08ca2aa3 1261 cuv += val;
fc241834 1262 }
08ca2aa3 1263 } else if (!checksum)
a6ec74c1 1264 while (len-- > 0) {
f7fe979e 1265 const U8 ch = *(U8 *) s++;
6e449a3a 1266 mPUSHu(ch);
a6ec74c1 1267 }
08ca2aa3
TH
1268 else if (checksum > bits_in_uv)
1269 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1270 else
1271 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1272 break;
1273 case 'U':
35bcd338 1274 if (len == 0) {
c5333953 1275 if (explicit_length && howlen != e_star) {
08ca2aa3 1276 /* Switch to "bytes in UTF-8" mode */
f337b084 1277 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1278 else
1279 /* Should be impossible due to the need_utf8() test */
1280 Perl_croak(aTHX_ "U0 mode on a byte string");
1281 }
35bcd338
JH
1282 break;
1283 }
08ca2aa3 1284 if (len > strend - s) len = strend - s;
fc241834 1285 if (!checksum) {
08ca2aa3
TH
1286 if (len && unpack_only_one) len = 1;
1287 EXTEND(SP, len);
1288 EXTEND_MORTAL(len);
fc241834 1289 }
08ca2aa3
TH
1290 while (len-- > 0 && s < strend) {
1291 STRLEN retlen;
1292 UV auv;
1293 if (utf8) {
1294 U8 result[UTF8_MAXLEN];
f7fe979e 1295 const char *ptr = s;
08ca2aa3 1296 STRLEN len;
08ca2aa3
TH
1297 /* Bug: warns about bad utf8 even if we are short on bytes
1298 and will break out of the loop */
230e1fce
NC
1299 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1300 'U'))
08ca2aa3
TH
1301 break;
1302 len = UTF8SKIP(result);
fc241834 1303 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1304 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1305 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1306 s = ptr;
1307 } else {
cde84f2a 1308 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1309 if (retlen == (STRLEN) -1 || retlen == 0)
1310 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1311 s += retlen;
1312 }
1313 if (!checksum)
6e449a3a 1314 mPUSHu(auv);
73cb7263 1315 else if (checksum > bits_in_uv)
08ca2aa3 1316 cdouble += (NV) auv;
73cb7263 1317 else
08ca2aa3 1318 cuv += auv;
a6ec74c1
JH
1319 }
1320 break;
49704364
LW
1321 case 's' | TYPE_IS_SHRIEKING:
1322#if SHORTSIZE != SIZE16
73cb7263 1323 while (len-- > 0) {
08ca2aa3 1324 short ashort;
f337b084 1325 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
2b4ad569 1326 DO_BO_UNPACK(ashort);
08ca2aa3 1327 if (!checksum)
6e449a3a 1328 mPUSHi(ashort);
73cb7263
NC
1329 else if (checksum > bits_in_uv)
1330 cdouble += (NV)ashort;
1331 else
1332 cuv += ashort;
49704364
LW
1333 }
1334 break;
1335#else
1336 /* Fallthrough! */
a6ec74c1 1337#endif
49704364 1338 case 's':
73cb7263 1339 while (len-- > 0) {
08ca2aa3
TH
1340 I16 ai16;
1341
1342#if U16SIZE > SIZE16
1343 ai16 = 0;
1344#endif
f337b084 1345 SHIFT16(utf8, s, strend, &ai16, datumtype);
2b4ad569 1346 DO_BO_UNPACK(ai16);
1109a392 1347#if U16SIZE > SIZE16
73cb7263
NC
1348 if (ai16 > 32767)
1349 ai16 -= 65536;
a6ec74c1 1350#endif
08ca2aa3 1351 if (!checksum)
6e449a3a 1352 mPUSHi(ai16);
73cb7263
NC
1353 else if (checksum > bits_in_uv)
1354 cdouble += (NV)ai16;
1355 else
1356 cuv += ai16;
a6ec74c1
JH
1357 }
1358 break;
49704364
LW
1359 case 'S' | TYPE_IS_SHRIEKING:
1360#if SHORTSIZE != SIZE16
73cb7263 1361 while (len-- > 0) {
08ca2aa3 1362 unsigned short aushort;
f337b084 1363 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
2b4ad569 1364 DO_BO_UNPACK(aushort);
08ca2aa3 1365 if (!checksum)
6e449a3a 1366 mPUSHu(aushort);
73cb7263
NC
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)aushort;
1369 else
1370 cuv += aushort;
49704364
LW
1371 }
1372 break;
1373#else
486ec47a 1374 /* Fallthrough! */
49704364 1375#endif
a6ec74c1
JH
1376 case 'v':
1377 case 'n':
1378 case 'S':
73cb7263 1379 while (len-- > 0) {
08ca2aa3
TH
1380 U16 au16;
1381#if U16SIZE > SIZE16
1382 au16 = 0;
1383#endif
f337b084 1384 SHIFT16(utf8, s, strend, &au16, datumtype);
2b4ad569 1385 DO_BO_UNPACK(au16);
73cb7263
NC
1386 if (datumtype == 'n')
1387 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1388 if (datumtype == 'v')
1389 au16 = vtohs(au16);
08ca2aa3 1390 if (!checksum)
6e449a3a 1391 mPUSHu(au16);
73cb7263 1392 else if (checksum > bits_in_uv)
f337b084 1393 cdouble += (NV) au16;
73cb7263
NC
1394 else
1395 cuv += au16;
a6ec74c1
JH
1396 }
1397 break;
068bd2e7
MHM
1398 case 'v' | TYPE_IS_SHRIEKING:
1399 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1400 while (len-- > 0) {
08ca2aa3
TH
1401 I16 ai16;
1402# if U16SIZE > SIZE16
1403 ai16 = 0;
1404# endif
f337b084 1405 SHIFT16(utf8, s, strend, &ai16, datumtype);
e396d235
NC
1406 DO_BO_UNPACK(ai16);
1407 /* There should never be any byte-swapping here. */
1408 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1409 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1410 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1411 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1412 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1413 if (!checksum)
6e449a3a 1414 mPUSHi(ai16);
73cb7263 1415 else if (checksum > bits_in_uv)
08ca2aa3 1416 cdouble += (NV) ai16;
73cb7263
NC
1417 else
1418 cuv += ai16;
068bd2e7
MHM
1419 }
1420 break;
a6ec74c1 1421 case 'i':
49704364 1422 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1423 while (len-- > 0) {
08ca2aa3 1424 int aint;
f337b084 1425 SHIFT_VAR(utf8, s, strend, aint, datumtype);
2b4ad569 1426 DO_BO_UNPACK(aint);
08ca2aa3 1427 if (!checksum)
6e449a3a 1428 mPUSHi(aint);
73cb7263
NC
1429 else if (checksum > bits_in_uv)
1430 cdouble += (NV)aint;
1431 else
1432 cuv += aint;
a6ec74c1
JH
1433 }
1434 break;
1435 case 'I':
49704364 1436 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1437 while (len-- > 0) {
08ca2aa3 1438 unsigned int auint;
f337b084 1439 SHIFT_VAR(utf8, s, strend, auint, datumtype);
2b4ad569 1440 DO_BO_UNPACK(auint);
08ca2aa3 1441 if (!checksum)
6e449a3a 1442 mPUSHu(auint);
73cb7263
NC
1443 else if (checksum > bits_in_uv)
1444 cdouble += (NV)auint;
1445 else
1446 cuv += auint;
a6ec74c1
JH
1447 }
1448 break;
92d41999 1449 case 'j':
73cb7263 1450 while (len-- > 0) {
08ca2aa3 1451 IV aiv;
f337b084 1452 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
2b4ad569 1453 DO_BO_UNPACK(aiv);
08ca2aa3 1454 if (!checksum)
6e449a3a 1455 mPUSHi(aiv);
73cb7263
NC
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)aiv;
1458 else
1459 cuv += aiv;
92d41999
JH
1460 }
1461 break;
1462 case 'J':
73cb7263 1463 while (len-- > 0) {
08ca2aa3 1464 UV auv;
f337b084 1465 SHIFT_VAR(utf8, s, strend, auv, datumtype);
2b4ad569 1466 DO_BO_UNPACK(auv);
08ca2aa3 1467 if (!checksum)
6e449a3a 1468 mPUSHu(auv);
73cb7263
NC
1469 else if (checksum > bits_in_uv)
1470 cdouble += (NV)auv;
1471 else
1472 cuv += auv;
92d41999
JH
1473 }
1474 break;
49704364
LW
1475 case 'l' | TYPE_IS_SHRIEKING:
1476#if LONGSIZE != SIZE32
73cb7263 1477 while (len-- > 0) {
08ca2aa3 1478 long along;
f337b084 1479 SHIFT_VAR(utf8, s, strend, along, datumtype);
2b4ad569 1480 DO_BO_UNPACK(along);
08ca2aa3 1481 if (!checksum)
6e449a3a 1482 mPUSHi(along);
73cb7263
NC
1483 else if (checksum > bits_in_uv)
1484 cdouble += (NV)along;
1485 else
1486 cuv += along;
49704364
LW
1487 }
1488 break;
1489#else
1490 /* Fallthrough! */
a6ec74c1 1491#endif
49704364 1492 case 'l':
73cb7263 1493 while (len-- > 0) {
08ca2aa3
TH
1494 I32 ai32;
1495#if U32SIZE > SIZE32
1496 ai32 = 0;
1497#endif
f337b084 1498 SHIFT32(utf8, s, strend, &ai32, datumtype);
2b4ad569 1499 DO_BO_UNPACK(ai32);
25a9bd2a 1500#if U32SIZE > SIZE32
08ca2aa3 1501 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1502#endif
08ca2aa3 1503 if (!checksum)
6e449a3a 1504 mPUSHi(ai32);
73cb7263
NC
1505 else if (checksum > bits_in_uv)
1506 cdouble += (NV)ai32;
1507 else
1508 cuv += ai32;
a6ec74c1
JH
1509 }
1510 break;
49704364
LW
1511 case 'L' | TYPE_IS_SHRIEKING:
1512#if LONGSIZE != SIZE32
73cb7263 1513 while (len-- > 0) {
08ca2aa3 1514 unsigned long aulong;
f337b084 1515 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
2b4ad569 1516 DO_BO_UNPACK(aulong);
08ca2aa3 1517 if (!checksum)
6e449a3a 1518 mPUSHu(aulong);
73cb7263
NC
1519 else if (checksum > bits_in_uv)
1520 cdouble += (NV)aulong;
1521 else
1522 cuv += aulong;
49704364
LW
1523 }
1524 break;
1525#else
1526 /* Fall through! */
1527#endif
a6ec74c1
JH
1528 case 'V':
1529 case 'N':
1530 case 'L':
73cb7263 1531 while (len-- > 0) {
08ca2aa3
TH
1532 U32 au32;
1533#if U32SIZE > SIZE32
1534 au32 = 0;
1535#endif
f337b084 1536 SHIFT32(utf8, s, strend, &au32, datumtype);
2b4ad569 1537 DO_BO_UNPACK(au32);
73cb7263
NC
1538 if (datumtype == 'N')
1539 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1540 if (datumtype == 'V')
1541 au32 = vtohl(au32);
08ca2aa3 1542 if (!checksum)
6e449a3a 1543 mPUSHu(au32);
fc241834
RGS
1544 else if (checksum > bits_in_uv)
1545 cdouble += (NV)au32;
1546 else
1547 cuv += au32;
a6ec74c1
JH
1548 }
1549 break;
068bd2e7
MHM
1550 case 'V' | TYPE_IS_SHRIEKING:
1551 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1552 while (len-- > 0) {
08ca2aa3 1553 I32 ai32;
f8e5a5db 1554#if U32SIZE > SIZE32
08ca2aa3 1555 ai32 = 0;
f8e5a5db 1556#endif
f337b084 1557 SHIFT32(utf8, s, strend, &ai32, datumtype);
e396d235
NC
1558 DO_BO_UNPACK(ai32);
1559 /* There should never be any byte swapping here. */
1560 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1561 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1562 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1563 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1564 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1565 if (!checksum)
6e449a3a 1566 mPUSHi(ai32);
73cb7263
NC
1567 else if (checksum > bits_in_uv)
1568 cdouble += (NV)ai32;
1569 else
1570 cuv += ai32;
068bd2e7
MHM
1571 }
1572 break;
a6ec74c1 1573 case 'p':
a6ec74c1 1574 while (len-- > 0) {
f7fe979e 1575 const char *aptr;
f337b084 1576 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2b4ad569 1577 DO_BO_UNPACK(aptr);
c4c5f44a 1578 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1579 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1580 }
1581 break;
1582 case 'w':
a6ec74c1
JH
1583 {
1584 UV auv = 0;
1585 U32 bytes = 0;
fc241834 1586
08ca2aa3
TH
1587 while (len > 0 && s < strend) {
1588 U8 ch;
f337b084 1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1590 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1592 if (ch < 0x80) {
a6ec74c1 1593 bytes = 0;
6e449a3a 1594 mPUSHu(auv);
a6ec74c1
JH
1595 len--;
1596 auv = 0;
08ca2aa3 1597 continue;
a6ec74c1 1598 }
08ca2aa3 1599 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1600 const char *t;
a6ec74c1 1601
f5992bc4 1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1603 while (s < strend) {
f337b084 1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1605 sv = mul128(sv, (U8)(ch & 0x7f));
1606 if (!(ch & 0x80)) {
a6ec74c1
JH
1607 bytes = 0;
1608 break;
1609 }
1610 }
10516c54 1611 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1612 while (*t == '0')
1613 t++;
1614 sv_chop(sv, t);
6e449a3a 1615 mPUSHs(sv);
a6ec74c1
JH
1616 len--;
1617 auv = 0;
1618 }
1619 }
1620 if ((s >= strend) && bytes)
49704364 1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1622 }
1623 break;
1624 case 'P':
49704364
LW
1625 if (symptr->howlen == e_star)
1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1627 EXTEND(SP, 1);
2d3e0934 1628 if (s + sizeof(char*) <= strend) {
08ca2aa3 1629 char *aptr;
f337b084 1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2b4ad569 1631 DO_BO_UNPACK(aptr);
fc241834 1632 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1633 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1634 }
a6ec74c1
JH
1635 break;
1636#ifdef HAS_QUAD
1637 case 'q':
73cb7263 1638 while (len-- > 0) {
08ca2aa3 1639 Quad_t aquad;
f337b084 1640 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2b4ad569 1641 DO_BO_UNPACK(aquad);
08ca2aa3 1642 if (!checksum)
6e449a3a
MHM
1643 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1644 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1647 else
1648 cuv += aquad;
1649 }
a6ec74c1
JH
1650 break;
1651 case 'Q':
73cb7263 1652 while (len-- > 0) {
08ca2aa3 1653 Uquad_t auquad;
f337b084 1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2b4ad569 1655 DO_BO_UNPACK(auquad);
08ca2aa3 1656 if (!checksum)
6e449a3a
MHM
1657 mPUSHs(auquad <= UV_MAX ?
1658 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1659 else if (checksum > bits_in_uv)
1660 cdouble += (NV)auquad;
1661 else
1662 cuv += auquad;
a6ec74c1
JH
1663 }
1664 break;
08ca2aa3 1665#endif /* HAS_QUAD */
a6ec74c1
JH
1666 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1667 case 'f':
73cb7263 1668 while (len-- > 0) {
08ca2aa3 1669 float afloat;
f337b084 1670 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2b4ad569 1671 DO_BO_UNPACK(afloat);
08ca2aa3 1672 if (!checksum)
6e449a3a 1673 mPUSHn(afloat);
08ca2aa3 1674 else
73cb7263 1675 cdouble += afloat;
fc241834 1676 }
a6ec74c1
JH
1677 break;
1678 case 'd':
73cb7263 1679 while (len-- > 0) {
08ca2aa3 1680 double adouble;
f337b084 1681 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2b4ad569 1682 DO_BO_UNPACK(adouble);
08ca2aa3 1683 if (!checksum)
6e449a3a 1684 mPUSHn(adouble);
08ca2aa3 1685 else
73cb7263 1686 cdouble += adouble;
fc241834 1687 }
a6ec74c1 1688 break;
92d41999 1689 case 'F':
73cb7263 1690 while (len-- > 0) {
275663fa
TC
1691 NV_bytes anv;
1692 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2b4ad569 1693 DO_BO_UNPACK(anv.nv);
08ca2aa3 1694 if (!checksum)
275663fa 1695 mPUSHn(anv.nv);
08ca2aa3 1696 else
275663fa 1697 cdouble += anv.nv;
fc241834 1698 }
92d41999
JH
1699 break;
1700#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1701 case 'D':
73cb7263 1702 while (len-- > 0) {
275663fa
TC
1703 ld_bytes aldouble;
1704 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2b4ad569 1705 DO_BO_UNPACK(aldouble.ld);
08ca2aa3 1706 if (!checksum)
275663fa 1707 mPUSHn(aldouble.ld);
08ca2aa3 1708 else
275663fa 1709 cdouble += aldouble.ld;
92d41999
JH
1710 }
1711 break;
1712#endif
a6ec74c1 1713 case 'u':
858fe5e1 1714 if (!checksum) {
f7fe979e 1715 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1716 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1717 if (l) SvPOK_on(sv);
1718 }
1719 if (utf8) {
1720 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1721 I32 a, b, c, d;
db187877 1722 char hunk[3];
08ca2aa3 1723
08ca2aa3
TH
1724 while (len > 0) {
1725 next_uni_uu(aTHX_ &s, strend, &a);
1726 next_uni_uu(aTHX_ &s, strend, &b);
1727 next_uni_uu(aTHX_ &s, strend, &c);
1728 next_uni_uu(aTHX_ &s, strend, &d);
1729 hunk[0] = (char)((a << 2) | (b >> 4));
1730 hunk[1] = (char)((b << 4) | (c >> 2));
1731 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1732 if (!checksum)
1733 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1734 len -= 3;
1735 }
1736 if (s < strend) {
f7fe979e
AL
1737 if (*s == '\n') {
1738 s++;
1739 }
08ca2aa3
TH
1740 else {
1741 /* possible checksum byte */
f7fe979e
AL
1742 const char *skip = s+UTF8SKIP(s);
1743 if (skip < strend && *skip == '\n')
1744 s = skip+1;
08ca2aa3
TH
1745 }
1746 }
1747 }
1748 } else {
fc241834
RGS
1749 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1750 I32 a, b, c, d;
db187877 1751 char hunk[3];
a6ec74c1 1752
fc241834
RGS
1753 len = PL_uudmap[*(U8*)s++] & 077;
1754 while (len > 0) {
1755 if (s < strend && ISUUCHAR(*s))
1756 a = PL_uudmap[*(U8*)s++] & 077;
1757 else
1758 a = 0;
1759 if (s < strend && ISUUCHAR(*s))
1760 b = PL_uudmap[*(U8*)s++] & 077;
1761 else
1762 b = 0;
1763 if (s < strend && ISUUCHAR(*s))
1764 c = PL_uudmap[*(U8*)s++] & 077;
1765 else
1766 c = 0;
1767 if (s < strend && ISUUCHAR(*s))
1768 d = PL_uudmap[*(U8*)s++] & 077;
1769 else
1770 d = 0;
1771 hunk[0] = (char)((a << 2) | (b >> 4));
1772 hunk[1] = (char)((b << 4) | (c >> 2));
1773 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1774 if (!checksum)
1775 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1776 len -= 3;
1777 }
1778 if (*s == '\n')
1779 s++;
1780 else /* possible checksum byte */
1781 if (s + 1 < strend && s[1] == '\n')
1782 s += 2;
a6ec74c1 1783 }
08ca2aa3 1784 }
858fe5e1
TC
1785 if (!checksum)
1786 XPUSHs(sv);
a6ec74c1
JH
1787 break;
1788 }
49704364 1789
a6ec74c1 1790 if (checksum) {
1109a392 1791 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1792 (checksum > bits_in_uv &&
08ca2aa3
TH
1793 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1794 NV trouble, anv;
a6ec74c1 1795
08ca2aa3 1796 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1797 while (checksum >= 16) {
1798 checksum -= 16;
08ca2aa3 1799 anv *= 65536.0;
a6ec74c1 1800 }
a6ec74c1 1801 while (cdouble < 0.0)
08ca2aa3
TH
1802 cdouble += anv;
1803 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1804 sv = newSVnv(cdouble);
a6ec74c1
JH
1805 }
1806 else {
fa8ec7c1
NC
1807 if (checksum < bits_in_uv) {
1808 UV mask = ((UV)1 << checksum) - 1;
92d41999 1809 cuv &= mask;
a6ec74c1 1810 }
c4c5f44a 1811 sv = newSVuv(cuv);
a6ec74c1 1812 }
6e449a3a 1813 mXPUSHs(sv);
a6ec74c1
JH
1814 checksum = 0;
1815 }
fc241834 1816
49704364
LW
1817 if (symptr->flags & FLAG_SLASH){
1818 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1819 break;
49704364
LW
1820 if( next_symbol(symptr) ){
1821 if( symptr->howlen == e_number )
1822 Perl_croak(aTHX_ "Count after length/code in unpack" );
1823 if( beyond ){
1824 /* ...end of char buffer then no decent length available */
1825 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1826 } else {
1827 /* take top of stack (hope it's numeric) */
1828 len = POPi;
1829 if( len < 0 )
1830 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1831 }
1832 } else {
1833 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1834 }
1835 datumtype = symptr->code;
21c16052 1836 explicit_length = FALSE;
49704364
LW
1837 goto redo_switch;
1838 }
a6ec74c1 1839 }
49704364 1840
18529408
IZ
1841 if (new_s)
1842 *new_s = s;
1843 PUTBACK;
1844 return SP - PL_stack_base - start_sp_offset;
1845}
1846
1847PP(pp_unpack)
1848{
97aff369 1849 dVAR;
18529408 1850 dSP;
bab9c0ac 1851 dPOPPOPssrl;
18529408
IZ
1852 I32 gimme = GIMME_V;
1853 STRLEN llen;
1854 STRLEN rlen;
5c144d81
NC
1855 const char *pat = SvPV_const(left, llen);
1856 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1857 const char *strend = s + rlen;
1858 const char *patend = pat + llen;
08ca2aa3 1859 I32 cnt;
18529408
IZ
1860
1861 PUTBACK;
7accc089 1862 cnt = unpackstring(pat, patend, s, strend,
49704364 1863 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1864 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1865
18529408
IZ
1866 SPAGAIN;
1867 if ( !cnt && gimme == G_SCALAR )
1868 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1869 RETURN;
1870}
1871
f337b084 1872STATIC U8 *
f7fe979e 1873doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1874{
f337b084 1875 *h++ = PL_uuemap[len];
a6ec74c1 1876 while (len > 2) {
f337b084
TH
1877 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1878 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1879 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1880 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1881 s += 3;
1882 len -= 3;
1883 }
1884 if (len > 0) {
f7fe979e 1885 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1886 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1887 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1888 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1889 *h++ = PL_uuemap[0];
a6ec74c1 1890 }
f337b084
TH
1891 *h++ = '\n';
1892 return h;
a6ec74c1
JH
1893}
1894
1895STATIC SV *
f7fe979e 1896S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1897{
8b6e33c7
AL
1898 SV *result = newSVpvn(s, l);
1899 char *const result_c = SvPV_nolen(result); /* convenience */
1900 char *out = result_c;
1901 bool skip = 1;
1902 bool ignore = 0;
a6ec74c1 1903
7918f24d
NC
1904 PERL_ARGS_ASSERT_IS_AN_INT;
1905
a6ec74c1
JH
1906 while (*s) {
1907 switch (*s) {
1908 case ' ':
1909 break;
1910 case '+':
1911 if (!skip) {
1912 SvREFCNT_dec(result);
1913 return (NULL);
1914 }
1915 break;
1916 case '0':
1917 case '1':
1918 case '2':
1919 case '3':
1920 case '4':
1921 case '5':
1922 case '6':
1923 case '7':
1924 case '8':
1925 case '9':
1926 skip = 0;
1927 if (!ignore) {
1928 *(out++) = *s;
1929 }
1930 break;
1931 case '.':
1932 ignore = 1;
1933 break;
1934 default:
1935 SvREFCNT_dec(result);
1936 return (NULL);
1937 }
1938 s++;
1939 }
1940 *(out++) = '\0';
1941 SvCUR_set(result, out - result_c);
1942 return (result);
1943}
1944
1945/* pnum must be '\0' terminated */
1946STATIC int
1947S_div128(pTHX_ SV *pnum, bool *done)
1948{
8b6e33c7
AL
1949 STRLEN len;
1950 char * const s = SvPV(pnum, len);
1951 char *t = s;
1952 int m = 0;
1953
7918f24d
NC
1954 PERL_ARGS_ASSERT_DIV128;
1955
8b6e33c7
AL
1956 *done = 1;
1957 while (*t) {
1958 const int i = m * 10 + (*t - '0');
1959 const int r = (i >> 7); /* r < 10 */
1960 m = i & 0x7F;
1961 if (r) {
1962 *done = 0;
1963 }
1964 *(t++) = '0' + r;
a6ec74c1 1965 }
8b6e33c7
AL
1966 *(t++) = '\0';
1967 SvCUR_set(pnum, (STRLEN) (t - s));
1968 return (m);
a6ec74c1
JH
1969}
1970
18529408 1971/*
7accc089
JH
1972=for apidoc packlist
1973
1974The engine implementing pack() Perl function.
1975
bfce84ec
AL
1976=cut
1977*/
7accc089
JH
1978
1979void
5aaab254 1980Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1981{
97aff369 1982 dVAR;
aadb217d
JH
1983 tempsym_t sym;
1984
7918f24d
NC
1985 PERL_ARGS_ASSERT_PACKLIST;
1986
f7fe979e 1987 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1988
f337b084
TH
1989 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1990 Also make sure any UTF8 flag is loaded */
56eb0262 1991 SvPV_force_nolen(cat);
bfce84ec
AL
1992 if (DO_UTF8(cat))
1993 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1994
49704364
LW
1995 (void)pack_rec( cat, &sym, beglist, endlist );
1996}
1997
f337b084
TH
1998/* like sv_utf8_upgrade, but also repoint the group start markers */
1999STATIC void
2000marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2001 STRLEN len;
2002 tempsym_t *group;
f7fe979e
AL
2003 const char *from_ptr, *from_start, *from_end, **marks, **m;
2004 char *to_start, *to_ptr;
f337b084
TH
2005
2006 if (SvUTF8(sv)) return;
2007
aa07b2f6 2008 from_start = SvPVX_const(sv);
f337b084
TH
2009 from_end = from_start + SvCUR(sv);
2010 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2011 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2012 if (from_ptr == from_end) {
2013 /* Simple case: no character needs to be changed */
2014 SvUTF8_on(sv);
2015 return;
2016 }
2017
3473cf63 2018 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2019 Newx(to_start, len, char);
f337b084
TH
2020 Copy(from_start, to_start, from_ptr-from_start, char);
2021 to_ptr = to_start + (from_ptr-from_start);
2022
a02a5408 2023 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2024 for (group=sym_ptr; group; group = group->previous)
2025 marks[group->level] = from_start + group->strbeg;
2026 marks[sym_ptr->level+1] = from_end+1;
2027 for (m = marks; *m < from_ptr; m++)
2028 *m = to_start + (*m-from_start);
2029
2030 for (;from_ptr < from_end; from_ptr++) {
2031 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2032 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2033 }
2034 *to_ptr = 0;
2035
2036 while (*m == from_ptr) *m++ = to_ptr;
2037 if (m != marks + sym_ptr->level+1) {
2038 Safefree(marks);
2039 Safefree(to_start);
5637ef5b
NC
2040 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2041 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2042 }
2043 for (group=sym_ptr; group; group = group->previous)
2044 group->strbeg = marks[group->level] - to_start;
2045 Safefree(marks);
2046
2047 if (SvOOK(sv)) {
2048 if (SvIVX(sv)) {
b162af07 2049 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2050 from_start -= SvIVX(sv);
2051 SvIV_set(sv, 0);
2052 }
2053 SvFLAGS(sv) &= ~SVf_OOK;
2054 }
2055 if (SvLEN(sv) != 0)
2056 Safefree(from_start);
f880fe2f 2057 SvPV_set(sv, to_start);
b162af07
SP
2058 SvCUR_set(sv, to_ptr - to_start);
2059 SvLEN_set(sv, len);
f337b084
TH
2060 SvUTF8_on(sv);
2061}
2062
2063/* Exponential string grower. Makes string extension effectively O(n)
2064 needed says how many extra bytes we need (not counting the final '\0')
2065 Only grows the string if there is an actual lack of space
2066*/
2067STATIC char *
0bd48802 2068S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2069 const STRLEN cur = SvCUR(sv);
2070 const STRLEN len = SvLEN(sv);
f337b084 2071 STRLEN extend;
7918f24d
NC
2072
2073 PERL_ARGS_ASSERT_SV_EXP_GROW;
2074
f337b084
TH
2075 if (len - cur > needed) return SvPVX(sv);
2076 extend = needed > len ? needed : len;
2077 return SvGROW(sv, len+extend+1);
2078}
49704364
LW
2079
2080STATIC
2081SV **
f337b084 2082S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2083{
97aff369 2084 dVAR;
49704364 2085 tempsym_t lookahead;
f337b084
TH
2086 I32 items = endlist - beglist;
2087 bool found = next_symbol(symptr);
2088 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2089 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2090
7918f24d
NC
2091 PERL_ARGS_ASSERT_PACK_REC;
2092
f337b084
TH
2093 if (symptr->level == 0 && found && symptr->code == 'U') {
2094 marked_upgrade(aTHX_ cat, symptr);
2095 symptr->flags |= FLAG_DO_UTF8;
2096 utf8 = 0;
49704364 2097 }
f337b084 2098 symptr->strbeg = SvCUR(cat);
49704364
LW
2099
2100 while (found) {
f337b084
TH
2101 SV *fromstr;
2102 STRLEN fromlen;
2103 I32 len;
a0714e2c 2104 SV *lengthcode = NULL;
49704364 2105 I32 datumtype = symptr->code;
f337b084
TH
2106 howlen_t howlen = symptr->howlen;
2107 char *start = SvPVX(cat);
2108 char *cur = start + SvCUR(cat);
a1219b5e 2109 bool needs_swap;
49704364 2110
f337b084
TH
2111#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2112
2113 switch (howlen) {
fc241834 2114 case e_star:
f337b084
TH
2115 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2116 0 : items;
2117 break;
2118 default:
2119 /* e_no_len and e_number */
2120 len = symptr->length;
49704364
LW
2121 break;
2122 }
2123
f337b084 2124 if (len) {
a7a3cfaa 2125 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2126
a7a3cfaa
TH
2127 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2128 /* We can process this letter. */
2129 STRLEN size = props & PACK_SIZE_MASK;
2130 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2131 }
f337b084
TH
2132 }
2133
49704364
LW
2134 /* Look ahead for next symbol. Do we have code/code? */
2135 lookahead = *symptr;
2136 found = next_symbol(&lookahead);
246f24af
TH
2137 if (symptr->flags & FLAG_SLASH) {
2138 IV count;
f337b084 2139 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2140 if (strchr("aAZ", lookahead.code)) {
2141 if (lookahead.howlen == e_number) count = lookahead.length;
2142 else {
ce399ba6 2143 if (items > 0) {
48a5da33 2144 count = sv_len_utf8(*beglist);
ce399ba6 2145 }
246f24af
TH
2146 else count = 0;
2147 if (lookahead.code == 'Z') count++;
2148 }
2149 } else {
2150 if (lookahead.howlen == e_number && lookahead.length < items)
2151 count = lookahead.length;
2152 else count = items;
2153 }
2154 lookahead.howlen = e_number;
2155 lookahead.length = count;
2156 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2157 }
49704364 2158
a1219b5e
NC
2159 needs_swap = NEEDS_SWAP(datumtype);
2160
fc241834
RGS
2161 /* Code inside the switch must take care to properly update
2162 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2163 doesn't simply leave using break */
1109a392 2164 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2165 default:
f337b084
TH
2166 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2167 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2168 case '%':
49704364 2169 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2170 {
2171 char *from;
28be1210 2172 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2173 case '.':
2174 if (howlen == e_star) from = start;
2175 else if (len == 0) from = cur;
2176 else {
2177 tempsym_t *group = symptr;
2178
2179 while (--len && group) group = group->previous;
2180 from = group ? start + group->strbeg : start;
2181 }
2182 fromstr = NEXTFROM;
2183 len = SvIV(fromstr);
2184 goto resize;
28be1210 2185 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2186 case '@':
28be1210
TH
2187 from = start + symptr->strbeg;
2188 resize:
28be1210 2189 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2190 if (len >= 0) {
2191 while (len && from < cur) {
2192 from += UTF8SKIP(from);
2193 len--;
2194 }
2195 if (from > cur)
2196 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2197 if (len) {
2198 /* Here we know from == cur */
2199 grow:
2200 GROWING(0, cat, start, cur, len);
2201 Zero(cur, len, char);
2202 cur += len;
2203 } else if (from < cur) {
2204 len = cur - from;
2205 goto shrink;
2206 } else goto no_change;
2207 } else {
2208 cur = from;
2209 len = -len;
2210 goto utf8_shrink;
f337b084 2211 }
28be1210
TH
2212 else {
2213 len -= cur - from;
f337b084 2214 if (len > 0) goto grow;
28be1210 2215 if (len == 0) goto no_change;
fc241834 2216 len = -len;
28be1210 2217 goto shrink;
f337b084 2218 }
a6ec74c1 2219 break;
28be1210 2220 }
fc241834 2221 case '(': {
49704364 2222 tempsym_t savsym = *symptr;
66c611c5
MHM
2223 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2224 symptr->flags |= group_modifiers;
49704364
LW
2225 symptr->patend = savsym.grpend;
2226 symptr->level++;
f337b084 2227 symptr->previous = &lookahead;
18529408 2228 while (len--) {
f337b084
TH
2229 U32 was_utf8;
2230 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2231 else symptr->flags &= ~FLAG_PARSE_UTF8;
2232 was_utf8 = SvUTF8(cat);
49704364 2233 symptr->patptr = savsym.grpbeg;
f337b084
TH
2234 beglist = pack_rec(cat, symptr, beglist, endlist);
2235 if (SvUTF8(cat) != was_utf8)
2236 /* This had better be an upgrade while in utf8==0 mode */
2237 utf8 = 1;
2238
49704364 2239 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2240 break; /* No way to continue */
2241 }
ee790063 2242 items = endlist - beglist;
f337b084
TH
2243 lookahead.flags = symptr->flags & ~group_modifiers;
2244 goto no_change;
18529408 2245 }
62f95557
IZ
2246 case 'X' | TYPE_IS_SHRIEKING:
2247 if (!len) /* Avoid division by 0 */
2248 len = 1;
f337b084
TH
2249 if (utf8) {
2250 char *hop, *last;
2251 I32 l = len;
2252 hop = last = start;
2253 while (hop < cur) {
2254 hop += UTF8SKIP(hop);
2255 if (--l == 0) {
2256 last = hop;
2257 l = len;
2258 }
2259 }
2260 if (last > cur)
2261 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2262 cur = last;
2263 break;
2264 }
2265 len = (cur-start) % len;
62f95557 2266 /* FALL THROUGH */
a6ec74c1 2267 case 'X':
f337b084
TH
2268 if (utf8) {
2269 if (len < 1) goto no_change;
28be1210 2270 utf8_shrink:
f337b084
TH
2271 while (len > 0) {
2272 if (cur <= start)
28be1210
TH
2273 Perl_croak(aTHX_ "'%c' outside of string in pack",
2274 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2275 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2276 if (cur <= start)
28be1210
TH
2277 Perl_croak(aTHX_ "'%c' outside of string in pack",
2278 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2279 }
2280 len--;
2281 }
2282 } else {
fc241834 2283 shrink:
f337b084 2284 if (cur - start < len)
28be1210
TH
2285 Perl_croak(aTHX_ "'%c' outside of string in pack",
2286 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2287 cur -= len;
2288 }
2289 if (cur < start+symptr->strbeg) {
2290 /* Make sure group starts don't point into the void */
2291 tempsym_t *group;
9e27e96a 2292 const STRLEN length = cur-start;
f337b084
TH
2293 for (group = symptr;
2294 group && length < group->strbeg;
2295 group = group->previous) group->strbeg = length;
2296 lookahead.strbeg = length;
2297 }
a6ec74c1 2298 break;
fc241834
RGS
2299 case 'x' | TYPE_IS_SHRIEKING: {
2300 I32 ai32;
62f95557
IZ
2301 if (!len) /* Avoid division by 0 */
2302 len = 1;
230e1fce 2303 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2304 else ai32 = (cur - start) % len;
2305 if (ai32 == 0) goto no_change;
2306 len -= ai32;
2307 }
2308 /* FALL THROUGH */
a6ec74c1 2309 case 'x':
f337b084 2310 goto grow;
a6ec74c1
JH
2311 case 'A':
2312 case 'Z':
f337b084 2313 case 'a': {
f7fe979e 2314 const char *aptr;
f337b084 2315
a6ec74c1 2316 fromstr = NEXTFROM;
e62f0680 2317 aptr = SvPV_const(fromstr, fromlen);
f337b084 2318 if (DO_UTF8(fromstr)) {
f7fe979e 2319 const char *end, *s;
f337b084
TH
2320
2321 if (!utf8 && !SvUTF8(cat)) {
2322 marked_upgrade(aTHX_ cat, symptr);
2323 lookahead.flags |= FLAG_DO_UTF8;
2324 lookahead.strbeg = symptr->strbeg;
2325 utf8 = 1;
2326 start = SvPVX(cat);
2327 cur = start + SvCUR(cat);
2328 }
fc241834 2329 if (howlen == e_star) {
f337b084
TH
2330 if (utf8) goto string_copy;
2331 len = fromlen+1;
2332 }
2333 s = aptr;
2334 end = aptr + fromlen;
2335 fromlen = datumtype == 'Z' ? len-1 : len;
2336 while ((I32) fromlen > 0 && s < end) {
2337 s += UTF8SKIP(s);
2338 fromlen--;
2339 }
2340 if (s > end)
2341 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2342 if (utf8) {
fc241834 2343 len = fromlen;
f337b084
TH
2344 if (datumtype == 'Z') len++;
2345 fromlen = s-aptr;
2346 len += fromlen;
fc241834 2347
f337b084 2348 goto string_copy;
fc241834 2349 }
f337b084
TH
2350 fromlen = len - fromlen;
2351 if (datumtype == 'Z') fromlen--;
2352 if (howlen == e_star) {
2353 len = fromlen;
2354 if (datumtype == 'Z') len++;
fc241834 2355 }
f337b084 2356 GROWING(0, cat, start, cur, len);
fc241834 2357 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2358 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2359 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2360 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2361 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2362 cur += fromlen;
a6ec74c1 2363 len -= fromlen;
f337b084
TH
2364 } else if (utf8) {
2365 if (howlen == e_star) {
2366 len = fromlen;
2367 if (datumtype == 'Z') len++;
a6ec74c1 2368 }
f337b084
TH
2369 if (len <= (I32) fromlen) {
2370 fromlen = len;
2371 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2372 }
fc241834 2373 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2374 upgrade, so:
2375 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2376 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2377 len -= fromlen;
2378 while (fromlen > 0) {
230e1fce 2379 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2380 aptr++;
2381 fromlen--;
fc241834 2382 }
f337b084
TH
2383 } else {
2384 string_copy:
2385 if (howlen == e_star) {
2386 len = fromlen;
2387 if (datumtype == 'Z') len++;
2388 }
2389 if (len <= (I32) fromlen) {
2390 fromlen = len;
2391 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2392 }
f337b084
TH
2393 GROWING(0, cat, start, cur, len);
2394 Copy(aptr, cur, fromlen, char);
2395 cur += fromlen;
2396 len -= fromlen;
a6ec74c1 2397 }
f337b084
TH
2398 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2399 cur += len;
3c4fb04a 2400 SvTAINT(cat);
a6ec74c1 2401 break;
f337b084 2402 }
a6ec74c1 2403 case 'B':
f337b084 2404 case 'b': {
b83604b4 2405 const char *str, *end;
f337b084
TH
2406 I32 l, field_len;
2407 U8 bits;
2408 bool utf8_source;
2409 U32 utf8_flags;
a6ec74c1 2410
fc241834 2411 fromstr = NEXTFROM;
b83604b4 2412 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2413 end = str + fromlen;
2414 if (DO_UTF8(fromstr)) {
2415 utf8_source = TRUE;
041457d9 2416 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2417 } else {
2418 utf8_source = FALSE;
2419 utf8_flags = 0; /* Unused, but keep compilers happy */
2420 }
2421 if (howlen == e_star) len = fromlen;
2422 field_len = (len+7)/8;
2423 GROWING(utf8, cat, start, cur, field_len);
2424 if (len > (I32)fromlen) len = fromlen;
2425 bits = 0;
2426 l = 0;
2427 if (datumtype == 'B')
2428 while (l++ < len) {
2429 if (utf8_source) {
95b63a38 2430 UV val = 0;
f337b084
TH
2431 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2432 bits |= val & 1;
2433 } else bits |= *str++ & 1;
2434 if (l & 7) bits <<= 1;
fc241834 2435 else {
f337b084
TH
2436 PUSH_BYTE(utf8, cur, bits);
2437 bits = 0;
a6ec74c1
JH
2438 }
2439 }
f337b084
TH
2440 else
2441 /* datumtype == 'b' */
2442 while (l++ < len) {
2443 if (utf8_source) {
95b63a38 2444 UV val = 0;
f337b084
TH
2445 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2446 if (val & 1) bits |= 0x80;
2447 } else if (*str++ & 1)
2448 bits |= 0x80;
2449 if (l & 7) bits >>= 1;
fc241834 2450 else {
f337b084
TH
2451 PUSH_BYTE(utf8, cur, bits);
2452 bits = 0;
a6ec74c1
JH
2453 }
2454 }
f337b084
TH
2455 l--;
2456 if (l & 7) {
fc241834 2457 if (datumtype == 'B')
f337b084 2458 bits <<= 7 - (l & 7);
fc241834 2459 else
f337b084
TH
2460 bits >>= 7 - (l & 7);
2461 PUSH_BYTE(utf8, cur, bits);
2462 l += 7;
a6ec74c1 2463 }
f337b084
TH
2464 /* Determine how many chars are left in the requested field */
2465 l /= 8;
2466 if (howlen == e_star) field_len = 0;
2467 else field_len -= l;
2468 Zero(cur, field_len, char);
2469 cur += field_len;
a6ec74c1 2470 break;
f337b084 2471 }
a6ec74c1 2472 case 'H':
f337b084 2473 case 'h': {
10516c54 2474 const char *str, *end;
f337b084
TH
2475 I32 l, field_len;
2476 U8 bits;
2477 bool utf8_source;
2478 U32 utf8_flags;
a6ec74c1 2479
fc241834 2480 fromstr = NEXTFROM;
10516c54 2481 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2482 end = str + fromlen;
2483 if (DO_UTF8(fromstr)) {
2484 utf8_source = TRUE;
041457d9 2485 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2486 } else {
2487 utf8_source = FALSE;
2488 utf8_flags = 0; /* Unused, but keep compilers happy */
2489 }
2490 if (howlen == e_star) len = fromlen;
2491 field_len = (len+1)/2;
2492 GROWING(utf8, cat, start, cur, field_len);
2493 if (!utf8 && len > (I32)fromlen) len = fromlen;
2494 bits = 0;
2495 l = 0;
2496 if (datumtype == 'H')
2497 while (l++ < len) {
2498 if (utf8_source) {
95b63a38 2499 UV val = 0;
f337b084
TH
2500 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2501 if (val < 256 && isALPHA(val))
2502 bits |= (val + 9) & 0xf;
a6ec74c1 2503 else
f337b084
TH
2504 bits |= val & 0xf;
2505 } else if (isALPHA(*str))
2506 bits |= (*str++ + 9) & 0xf;
2507 else
2508 bits |= *str++ & 0xf;
2509 if (l & 1) bits <<= 4;
fc241834 2510 else {
f337b084
TH
2511 PUSH_BYTE(utf8, cur, bits);
2512 bits = 0;
a6ec74c1
JH
2513 }
2514 }
f337b084
TH
2515 else
2516 while (l++ < len) {
2517 if (utf8_source) {
95b63a38 2518 UV val = 0;
f337b084
TH
2519 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2520 if (val < 256 && isALPHA(val))
2521 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2522 else
f337b084
TH
2523 bits |= (val & 0xf) << 4;
2524 } else if (isALPHA(*str))
2525 bits |= ((*str++ + 9) & 0xf) << 4;
2526 else
2527 bits |= (*str++ & 0xf) << 4;
2528 if (l & 1) bits >>= 4;
fc241834 2529 else {
f337b084
TH
2530 PUSH_BYTE(utf8, cur, bits);
2531 bits = 0;
a6ec74c1 2532 }
fc241834 2533 }
f337b084
TH
2534 l--;
2535 if (l & 1) {
2536 PUSH_BYTE(utf8, cur, bits);
2537 l++;
2538 }
2539 /* Determine how many chars are left in the requested field */
2540 l /= 2;
2541 if (howlen == e_star) field_len = 0;
2542 else field_len -= l;
2543 Zero(cur, field_len, char);
2544 cur += field_len;
2545 break;
fc241834
RGS
2546 }
2547 case 'c':
f337b084
TH
2548 while (len-- > 0) {
2549 IV aiv;
2550 fromstr = NEXTFROM;
2551 aiv = SvIV(fromstr);
a2a5de95
NC
2552 if ((-128 > aiv || aiv > 127))
2553 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2554 "Character in 'c' format wrapped in pack");
585ec06d 2555 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2556 }
2557 break;
2558 case 'C':
f337b084
TH
2559 if (len == 0) {
2560 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2561 break;
2562 }
a6ec74c1 2563 while (len-- > 0) {
f337b084 2564 IV aiv;
a6ec74c1 2565 fromstr = NEXTFROM;
f337b084 2566 aiv = SvIV(fromstr);
a2a5de95
NC
2567 if ((0 > aiv || aiv > 0xff))
2568 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2569 "Character in 'C' format wrapped in pack");
1651fc44 2570 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2571 }
fc241834
RGS
2572 break;
2573 case 'W': {
2574 char *end;
670f1322 2575 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2576
2577 end = start+SvLEN(cat)-1;
2578 if (utf8) end -= UTF8_MAXLEN-1;
2579 while (len-- > 0) {
2580 UV auv;
2581 fromstr = NEXTFROM;
2582 auv = SvUV(fromstr);
2583 if (in_bytes) auv = auv % 0x100;
2584 if (utf8) {
2585 W_utf8:
2586 if (cur > end) {
2587 *cur = '\0';
b162af07 2588 SvCUR_set(cat, cur - start);
fc241834
RGS
2589
2590 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2591 end = start+SvLEN(cat)-UTF8_MAXLEN;
2592 }
230e1fce
NC
2593 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2594 NATIVE_TO_UNI(auv),
041457d9 2595 warn_utf8 ?
230e1fce 2596 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2597 } else {
2598 if (auv >= 0x100) {
2599 if (!SvUTF8(cat)) {
2600 *cur = '\0';
b162af07 2601 SvCUR_set(cat, cur - start);
fc241834
RGS
2602 marked_upgrade(aTHX_ cat, symptr);
2603 lookahead.flags |= FLAG_DO_UTF8;
2604 lookahead.strbeg = symptr->strbeg;
2605 utf8 = 1;
2606 start = SvPVX(cat);
2607 cur = start + SvCUR(cat);
2608 end = start+SvLEN(cat)-UTF8_MAXLEN;
2609 goto W_utf8;
2610 }
a2a5de95
NC
2611 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2612 "Character in 'W' format wrapped in pack");
fc241834
RGS
2613 auv &= 0xff;
2614 }
2615 if (cur >= end) {
2616 *cur = '\0';
b162af07 2617 SvCUR_set(cat, cur - start);
fc241834
RGS
2618 GROWING(0, cat, start, cur, len+1);
2619 end = start+SvLEN(cat)-1;
2620 }
fe2774ed 2621 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2622 }
2623 }
2624 break;
fc241834
RGS
2625 }
2626 case 'U': {
2627 char *end;
2628
2629 if (len == 0) {
2630 if (!(symptr->flags & FLAG_DO_UTF8)) {
2631 marked_upgrade(aTHX_ cat, symptr);
2632 lookahead.flags |= FLAG_DO_UTF8;
2633 lookahead.strbeg = symptr->strbeg;
2634 }
2635 utf8 = 0;
2636 goto no_change;
2637 }
2638
2639 end = start+SvLEN(cat);
2640 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2641 while (len-- > 0) {
fc241834 2642 UV auv;
a6ec74c1 2643 fromstr = NEXTFROM;
fc241834
RGS
2644 auv = SvUV(fromstr);
2645 if (utf8) {
230e1fce 2646 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2647 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2648 warn_utf8 ?
fc241834
RGS
2649 0 : UNICODE_ALLOW_ANY);
2650 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2651 *cur = '\0';
b162af07 2652 SvCUR_set(cat, cur - start);
fc241834
RGS
2653 GROWING(0, cat, start, cur,
2654 len+(endb-buffer)*UTF8_EXPAND);
2655 end = start+SvLEN(cat);
2656 }
64844641 2657 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2658 } else {
2659 if (cur >= end) {
2660 *cur = '\0';
b162af07 2661 SvCUR_set(cat, cur - start);
fc241834
RGS
2662 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2663 end = start+SvLEN(cat)-UTF8_MAXLEN;
2664 }
230e1fce 2665 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2666 warn_utf8 ?
230e1fce 2667 0 : UNICODE_ALLOW_ANY);
fc241834 2668 }
a6ec74c1 2669 }
a6ec74c1 2670 break;
fc241834 2671 }
a6ec74c1
JH
2672 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2673 case 'f':
a6ec74c1 2674 while (len-- > 0) {
f337b084
TH
2675 float afloat;
2676 NV anv;
a6ec74c1 2677 fromstr = NEXTFROM;
f337b084 2678 anv = SvNV(fromstr);
85bba25f 2679# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2680 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2681 * on Alpha; fake it if we don't have them.
2682 */
f337b084 2683 if (anv > FLT_MAX)
fc241834 2684 afloat = FLT_MAX;
f337b084 2685 else if (anv < -FLT_MAX)
fc241834 2686 afloat = -FLT_MAX;
f337b084 2687 else afloat = (float)anv;
baf3cf9c 2688# else
f337b084 2689 afloat = (float)anv;
baf3cf9c 2690# endif
2b4ad569 2691 DO_BO_PACK(afloat);
f337b084 2692 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2693 }
2694 break;
2695 case 'd':
a6ec74c1 2696 while (len-- > 0) {
f337b084
TH
2697 double adouble;
2698 NV anv;
a6ec74c1 2699 fromstr = NEXTFROM;
f337b084 2700 anv = SvNV(fromstr);
85bba25f 2701# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2702 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2703 * on Alpha; fake it if we don't have them.
2704 */
f337b084 2705 if (anv > DBL_MAX)
fc241834 2706 adouble = DBL_MAX;
f337b084 2707 else if (anv < -DBL_MAX)
fc241834 2708 adouble = -DBL_MAX;
f337b084 2709 else adouble = (double)anv;
baf3cf9c 2710# else
f337b084 2711 adouble = (double)anv;
baf3cf9c 2712# endif
2b4ad569 2713 DO_BO_PACK(adouble);
f337b084 2714 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2715 }
2716 break;
fc241834 2717 case 'F': {
275663fa 2718 NV_bytes anv;
1109a392 2719 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2720 while (len-- > 0) {
2721 fromstr = NEXTFROM;
cd07c537
DM
2722#ifdef __GNUC__
2723 /* to work round a gcc/x86 bug; don't use SvNV */
2724 anv.nv = sv_2nv(fromstr);
2725#else
275663fa 2726 anv.nv = SvNV(fromstr);
cd07c537 2727#endif
2b4ad569 2728 DO_BO_PACK(anv);
275663fa 2729 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2730 }
2731 break;
fc241834 2732 }
92d41999 2733#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2734 case 'D': {
275663fa 2735 ld_bytes aldouble;
1109a392
MHM
2736 /* long doubles can have unused bits, which may be nonzero */
2737 Zero(&aldouble, 1, long double);
92d41999
JH
2738 while (len-- > 0) {
2739 fromstr = NEXTFROM;
cd07c537
DM
2740# ifdef __GNUC__
2741 /* to work round a gcc/x86 bug; don't use SvNV */
2742 aldouble.ld = (long double)sv_2nv(fromstr);
2743# else
275663fa 2744 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2745# endif
2b4ad569 2746 DO_BO_PACK(aldouble);
275663fa 2747 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2748 }
2749 break;
fc241834 2750 }
92d41999 2751#endif
068bd2e7 2752 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2753 case 'n':
2754 while (len-- > 0) {
f337b084 2755 I16 ai16;
a6ec74c1 2756 fromstr = NEXTFROM;
ef108786 2757 ai16 = (I16)SvIV(fromstr);
ef108786 2758 ai16 = PerlSock_htons(ai16);
f337b084 2759 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2760 }
2761 break;
068bd2e7 2762 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2763 case 'v':
2764 while (len-- > 0) {
f337b084 2765 I16 ai16;
a6ec74c1 2766 fromstr = NEXTFROM;
ef108786 2767 ai16 = (I16)SvIV(fromstr);
ef108786 2768 ai16 = htovs(ai16);
f337b084 2769 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2770 }
2771 break;
49704364 2772 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2773#if SHORTSIZE != SIZE16
fc241834 2774 while (len-- > 0) {
f337b084 2775 unsigned short aushort;
fc241834
RGS
2776 fromstr = NEXTFROM;
2777 aushort = SvUV(fromstr);
2b4ad569 2778 DO_BO_PACK(aushort);
f337b084 2779 PUSH_VAR(utf8, cur, aushort);
fc241834 2780 }
49704364
LW
2781 break;
2782#else
2783 /* Fall through! */
a6ec74c1 2784#endif
49704364 2785 case 'S':
fc241834 2786 while (len-- > 0) {
f337b084 2787 U16 au16;
fc241834
RGS
2788 fromstr = NEXTFROM;
2789 au16 = (U16)SvUV(fromstr);
2b4ad569 2790 DO_BO_PACK(au16);
f337b084 2791 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2792 }
2793 break;
49704364 2794 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2795#if SHORTSIZE != SIZE16
fc241834 2796 while (len-- > 0) {
f337b084 2797 short ashort;
fc241834
RGS
2798 fromstr = NEXTFROM;
2799 ashort = SvIV(fromstr);
2b4ad569 2800 DO_BO_PACK(ashort);
f337b084 2801 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2802 }
49704364
LW
2803 break;
2804#else
2805 /* Fall through! */
a6ec74c1 2806#endif
49704364
LW
2807 case 's':
2808 while (len-- > 0) {
f337b084 2809 I16 ai16;
49704364 2810 fromstr = NEXTFROM;
ef108786 2811 ai16 = (I16)SvIV(fromstr);
2b4ad569 2812 DO_BO_PACK(ai16);
f337b084 2813 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2814 }
2815 break;
2816 case 'I':
49704364 2817 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2818 while (len-- > 0) {
f337b084 2819 unsigned int auint;
a6ec74c1
JH
2820 fromstr = NEXTFROM;
2821 auint = SvUV(fromstr);
2b4ad569 2822 DO_BO_PACK(auint);
f337b084 2823 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2824 }
2825 break;
92d41999
JH
2826 case 'j':
2827 while (len-- > 0) {
f337b084 2828 IV aiv;
92d41999
JH
2829 fromstr = NEXTFROM;
2830 aiv = SvIV(fromstr);
2b4ad569 2831 DO_BO_PACK(aiv);
f337b084 2832 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2833 }
2834 break;
2835 case 'J':
2836 while (len-- > 0) {
f337b084 2837 UV auv;
92d41999
JH
2838 fromstr = NEXTFROM;
2839 auv = SvUV(fromstr);
2b4ad569 2840 DO_BO_PACK(auv);
f337b084 2841 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2842 }
2843 break;
a6ec74c1
JH
2844 case 'w':
2845 while (len-- > 0) {
f337b084 2846 NV anv;
a6ec74c1 2847 fromstr = NEXTFROM;
15e9f109 2848 anv = SvNV(fromstr);
a6ec74c1 2849
f337b084
TH
2850 if (anv < 0) {
2851 *cur = '\0';
b162af07 2852 SvCUR_set(cat, cur - start);
49704364 2853 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2854 }
a6ec74c1 2855
196b62db
NC
2856 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2857 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2858 any negative IVs will have already been got by the croak()
2859 above. IOK is untrue for fractions, so we test them
2860 against UV_MAX_P1. */
f337b084
TH
2861 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2862 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2863 char *in = buf + sizeof(buf);
196b62db 2864 UV auv = SvUV(fromstr);
a6ec74c1
JH
2865
2866 do {
eb160463 2867 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2868 auv >>= 7;
2869 } while (auv);
2870 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2871 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2872 in, (buf + sizeof(buf)) - in);
2873 } else if (SvPOKp(fromstr))
2874 goto w_string;
a6ec74c1 2875 else if (SvNOKp(fromstr)) {
0258719b 2876 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2877 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2878 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2879 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2880 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2881 Some C compilers are strict about integral constant
2882 expressions so we conservatively divide by a slightly
2883 smaller integer instead of multiplying by the exact
2884 floating-point value.
0258719b
NC
2885 */
2886#ifdef NV_MAX_10_EXP
f337b084 2887 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2888 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2889#else
f337b084 2890 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2891 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2892#endif
a6ec74c1
JH
2893 char *in = buf + sizeof(buf);
2894
8b6e33c7 2895 anv = Perl_floor(anv);
a6ec74c1 2896 do {
8b6e33c7 2897 const NV next = Perl_floor(anv / 128);
a6ec74c1 2898 if (in <= buf) /* this cannot happen ;-) */
49704364 2899 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2900 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2901 anv = next;
2902 } while (anv > 0);
a6ec74c1 2903 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2904 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2905 in, (buf + sizeof(buf)) - in);
2906 } else {
8b6e33c7
AL
2907 const char *from;
2908 char *result, *in;
735b914b
JH
2909 SV *norm;
2910 STRLEN len;
2911 bool done;
2912
f337b084 2913 w_string:
735b914b 2914 /* Copy string and check for compliance */
349d4f2f 2915 from = SvPV_const(fromstr, len);
735b914b 2916 if ((norm = is_an_int(from, len)) == NULL)
49704364 2917 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2918
a02a5408 2919 Newx(result, len, char);
735b914b
JH
2920 in = result + len;
2921 done = FALSE;
f337b084 2922 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2923 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2924 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2925 in, (result + len) - in);
735b914b
JH
2926 Safefree(result);
2927 SvREFCNT_dec(norm); /* free norm */
fc241834 2928 }
a6ec74c1
JH
2929 }
2930 break;
2931 case 'i':
49704364 2932 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2933 while (len-- > 0) {
f337b084 2934 int aint;
a6ec74c1
JH
2935 fromstr = NEXTFROM;
2936 aint = SvIV(fromstr);
2b4ad569 2937 DO_BO_PACK(aint);
f337b084 2938 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
2939 }
2940 break;
068bd2e7 2941 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2942 case 'N':
2943 while (len-- > 0) {
f337b084 2944 U32 au32;
a6ec74c1 2945 fromstr = NEXTFROM;
ef108786 2946 au32 = SvUV(fromstr);
ef108786 2947 au32 = PerlSock_htonl(au32);
f337b084 2948 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2949 }
2950 break;
068bd2e7 2951 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2952 case 'V':
2953 while (len-- > 0) {
f337b084 2954 U32 au32;
a6ec74c1 2955 fromstr = NEXTFROM;
ef108786 2956 au32 = SvUV(fromstr);
ef108786 2957 au32 = htovl(au32);
f337b084 2958 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2959 }
2960 break;
49704364 2961 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2962#if LONGSIZE != SIZE32
fc241834 2963 while (len-- > 0) {
f337b084 2964 unsigned long aulong;
fc241834
RGS
2965 fromstr = NEXTFROM;
2966 aulong = SvUV(fromstr);
2b4ad569 2967 DO_BO_PACK(aulong);
f337b084 2968 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 2969 }
49704364
LW
2970 break;
2971#else
2972 /* Fall though! */
a6ec74c1 2973#endif
49704364 2974 case 'L':
fc241834 2975 while (len-- > 0) {
f337b084 2976 U32 au32;
fc241834
RGS
2977 fromstr = NEXTFROM;
2978 au32 = SvUV(fromstr);
2b4ad569 2979 DO_BO_PACK(au32);
f337b084 2980 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2981 }
2982 break;
49704364 2983 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2984#if LONGSIZE != SIZE32
fc241834 2985 while (len-- > 0) {
f337b084 2986 long along;
fc241834
RGS
2987 fromstr = NEXTFROM;
2988 along = SvIV(fromstr);
2b4ad569 2989 DO_BO_PACK(along);
f337b084 2990 PUSH_VAR(utf8, cur, along);
a6ec74c1 2991 }
49704364
LW
2992 break;
2993#else
2994 /* Fall though! */
a6ec74c1 2995#endif
49704364
LW
2996 case 'l':
2997 while (len-- > 0) {
f337b084 2998 I32 ai32;
49704364 2999 fromstr = NEXTFROM;
ef108786 3000 ai32 = SvIV(fromstr);
2b4ad569 3001 DO_BO_PACK(ai32);
f337b084 3002 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3003 }
3004 break;
3005#ifdef HAS_QUAD
3006 case 'Q':
3007 while (len-- > 0) {
f337b084 3008 Uquad_t auquad;
a6ec74c1 3009 fromstr = NEXTFROM;
f337b084 3010 auquad = (Uquad_t) SvUV(fromstr);
2b4ad569 3011 DO_BO_PACK(auquad);
f337b084 3012 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3013 }
3014 break;
3015 case 'q':
3016 while (len-- > 0) {
f337b084 3017 Quad_t aquad;
a6ec74c1
JH
3018 fromstr = NEXTFROM;
3019 aquad = (Quad_t)SvIV(fromstr);
2b4ad569 3020 DO_BO_PACK(aquad);
f337b084 3021 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3022 }
3023 break;
f337b084 3024#endif /* HAS_QUAD */
a6ec74c1
JH
3025 case 'P':
3026 len = 1; /* assume SV is correct length */
f337b084 3027 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3028 /* Fall through! */
a6ec74c1
JH
3029 case 'p':
3030 while (len-- > 0) {
83003860 3031 const char *aptr;
f337b084 3032
a6ec74c1 3033 fromstr = NEXTFROM;
28a4f200 3034 SvGETMAGIC(fromstr);
3035 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3036 else {
a6ec74c1
JH
3037 /* XXX better yet, could spirit away the string to
3038 * a safe spot and hang on to it until the result
3039 * of pack() (and all copies of the result) are
3040 * gone.
3041 */
041457d9 3042 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3043 !SvREADONLY(fromstr)))) {
3044 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3045 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3046 }
3047 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3048 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3049 else
2596d9fe 3050 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3051 }
2b4ad569 3052 DO_BO_PACK(aptr);
f337b084 3053 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3054 }
3055 break;
fc241834 3056 case 'u': {
f7fe979e 3057 const char *aptr, *aend;
fc241834 3058 bool from_utf8;
f337b084 3059
a6ec74c1 3060 fromstr = NEXTFROM;
fc241834
RGS
3061 if (len <= 2) len = 45;
3062 else len = len / 3 * 3;
3063 if (len >= 64) {
a2a5de95
NC
3064 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 "Field too wide in 'u' format in pack");
fc241834
RGS
3066 len = 63;
3067 }
83003860 3068 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3069 from_utf8 = DO_UTF8(fromstr);
3070 if (from_utf8) {
3071 aend = aptr + fromlen;
3f63b0e5 3072 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3073 } else aend = NULL; /* Unused, but keep compilers happy */
3074 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3075 while (fromlen > 0) {
fc241834 3076 U8 *end;
a6ec74c1 3077 I32 todo;
fc241834 3078 U8 hunk[1+63/3*4+1];
a6ec74c1 3079
eb160463 3080 if ((I32)fromlen > len)
a6ec74c1
JH
3081 todo = len;
3082 else
3083 todo = fromlen;
fc241834
RGS
3084 if (from_utf8) {
3085 char buffer[64];
3086 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3087 'u' | TYPE_IS_PACK)) {
3088 *cur = '\0';
b162af07 3089 SvCUR_set(cat, cur - start);
5637ef5b
NC
3090 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3091 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3092 aptr, aend, buffer, (long) todo);
fc241834
RGS
3093 }
3094 end = doencodes(hunk, buffer, todo);
3095 } else {
3096 end = doencodes(hunk, aptr, todo);
3097 aptr += todo;
3098 }
3099 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3100 fromlen -= todo;
3101 }
a6ec74c1
JH
3102 break;
3103 }
f337b084
TH
3104 }
3105 *cur = '\0';
b162af07 3106 SvCUR_set(cat, cur - start);
f337b084 3107 no_change:
49704364 3108 *symptr = lookahead;
a6ec74c1 3109 }
49704364 3110 return beglist;
18529408
IZ
3111}
3112#undef NEXTFROM
3113
3114
3115PP(pp_pack)
3116{
97aff369 3117 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3118 SV *cat = TARG;
18529408 3119 STRLEN fromlen;
349d4f2f 3120 SV *pat_sv = *++MARK;
eb578fdb
KW
3121 const char *pat = SvPV_const(pat_sv, fromlen);
3122 const char *patend = pat + fromlen;
18529408
IZ
3123
3124 MARK++;
76f68e9b 3125 sv_setpvs(cat, "");
f337b084 3126 SvUTF8_off(cat);
18529408 3127
7accc089 3128 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3129
a6ec74c1
JH
3130 SvSETMAGIC(cat);
3131 SP = ORIGMARK;
3132 PUSHs(cat);
3133 RETURN;
3134}
a6ec74c1 3135
73cb7263
NC
3136/*
3137 * Local variables:
3138 * c-indentation-style: bsd
3139 * c-basic-offset: 4
14d04a33 3140 * indent-tabs-mode: nil
73cb7263
NC
3141 * End:
3142 *
14d04a33 3143 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3144 */