This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
When endian-swapping in unpack, simply copy the bytes in reverse order.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
133#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3 134
a1219b5e
NC
135#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
137#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
139#else
140# error "Unsupported byteorder"
20aa3a38
NC
141 /* Need to add code here to re-instate mixed endian support.
142 NEEDS_SWAP would need to hold a flag indicating which action to
143 take, and S_reverse_copy and the code in uni_to_bytes would need
144 logic adding to deal with any mixed-endian transformations needed.
145 */
a1219b5e
NC
146#endif
147
08ca2aa3 148/* Only to be used inside a loop (see the break) */
275663fa 149#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084 152 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 153 (char *) (buf), len, datumtype)) break; \
08ca2aa3 154 } else { \
20aa3a38
NC
155 if (needs_swap) \
156 S_reverse_copy(s, (char *) (buf), len); \
157 else \
158 Copy(s, (char *) (buf), len, char); \
275663fa 159 s += len; \
08ca2aa3 160 } \
08ca2aa3
TH
161} STMT_END
162
7285e3f4
NC
163#define SHIFT16(utf8, s, strend, p, datumtype) \
164 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype)
165
166#define SHIFT32(utf8, s, strend, p, datumtype) \
167 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype)
168
275663fa
TC
169#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
170 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
171
f337b084 172#define PUSH_VAR(utf8, aptr, var) \
230e1fce 173 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 174
49704364
WL
175/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
176#define MAX_SUB_TEMPLATE_LEVEL 100
177
66c611c5 178/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
179#define FLAG_WAS_UTF8 0x40
180#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 181#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 182#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
183#define FLAG_SLASH 0x04
184#define FLAG_COMMA 0x02
185#define FLAG_PACK 0x01
186
a6ec74c1
JH
187STATIC SV *
188S_mul128(pTHX_ SV *sv, U8 m)
189{
190 STRLEN len;
191 char *s = SvPV(sv, len);
192 char *t;
a6ec74c1 193
7918f24d
NC
194 PERL_ARGS_ASSERT_MUL128;
195
a6ec74c1 196 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 197 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
198
199 sv_catsv(tmpNew, sv);
200 SvREFCNT_dec(sv); /* free old sv */
201 sv = tmpNew;
202 s = SvPV(sv, len);
203 }
204 t = s + len - 1;
205 while (!*t) /* trailing '\0'? */
206 t--;
207 while (t > s) {
f7fe979e 208 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
209 *(t--) = '0' + (char)(i % 10);
210 m = (char)(i / 10);
a6ec74c1
JH
211 }
212 return (sv);
213}
214
215/* Explosives and implosives. */
216
217#if 'I' == 73 && 'J' == 74
218/* On an ASCII/ISO kind of system */
219#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220#else
221/*
222 Some other sort of character set - use memchr() so we don't match
223 the null byte.
224 */
225#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
226#endif
227
66c611c5 228/* type modifiers */
62f95557 229#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
230#define TYPE_IS_BIG_ENDIAN 0x200
231#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 232#define TYPE_IS_PACK 0x800
1109a392 233#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 234#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
235#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
236
7212898e
NC
237# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
238# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
239
240# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
241
2b4ad569 242# define DO_BO_PACK(var) \
3cc90e2e 243 STMT_START { \
a1219b5e 244 if (needs_swap) { \
e8fda8c4 245 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
246 } \
247 } STMT_END
248
78d46eaa 249#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 250#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
251#define PACK_SIZE_MASK 0x3F
252
298bc19c 253#include "packsizetables.c"
78d46eaa 254
20aa3a38
NC
255static void
256S_reverse_copy(const char *src, char *dest, STRLEN len)
257{
258 dest += len;
259 while (len--)
260 *--dest = *src++;
261}
262
08ca2aa3 263STATIC U8
f7fe979e 264uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 265{
08ca2aa3 266 STRLEN retlen;
0bcc34c2 267 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 269 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
270 warnings), but these two mean we make no progress in the string and
271 might enter an infinite loop */
272 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
273 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
274 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 275 if (val >= 0x100) {
a2a5de95
NC
276 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
277 "Character in '%c' format wrapped in unpack",
278 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
279 val &= 0xff;
280 }
281 *s += retlen;
fe2774ed 282 return (U8)val;
08ca2aa3
TH
283}
284
f337b084
TH
285#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
286 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
287 *(U8 *)(s)++)
288
289STATIC bool
f7fe979e 290uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
291{
292 UV val;
293 STRLEN retlen;
f7fe979e 294 const char *from = *s;
08ca2aa3 295 int bad = 0;
f7fe979e 296 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 297 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
298 const bool needs_swap = NEEDS_SWAP(datumtype);
299
300 if (needs_swap)
301 buf += buf_len;
302
08ca2aa3
TH
303 for (;buf_len > 0; buf_len--) {
304 if (from >= end) return FALSE;
f337b084 305 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
306 if (retlen == (STRLEN) -1 || retlen == 0) {
307 from += UTF8SKIP(from);
308 bad |= 1;
309 } else from += retlen;
310 if (val >= 0x100) {
311 bad |= 2;
312 val &= 0xff;
313 }
20aa3a38
NC
314 if (needs_swap)
315 *(U8 *)--buf = (U8)val;
316 else
317 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
318 }
319 /* We have enough characters for the buffer. Did we have problems ? */
320 if (bad) {
321 if (bad & 1) {
322 /* Rewalk the string fragment while warning */
f7fe979e 323 const char *ptr;
9e27e96a 324 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
325 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
326 if (ptr >= end) break;
f337b084 327 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
328 }
329 if (from > end) from = end;
330 }
a2a5de95
NC
331 if ((bad & 2))
332 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 333 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
334 "Character(s) in '%c' format wrapped in %s",
335 (int) TYPE_NO_MODIFIERS(datumtype),
336 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
337 }
338 *s = from;
339 return TRUE;
340}
341
342STATIC bool
f7fe979e 343next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 344{
97aff369 345 dVAR;
08ca2aa3 346 STRLEN retlen;
0bcc34c2 347 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
348 if (val >= 0x100 || !ISUUCHAR(val) ||
349 retlen == (STRLEN) -1 || retlen == 0) {
350 *out = 0;
351 return FALSE;
352 }
353 *out = PL_uudmap[val] & 077;
f337b084 354 *s += retlen;
08ca2aa3
TH
355 return TRUE;
356}
78d46eaa 357
64844641 358STATIC char *
14333449 359S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 360 const U8 * const end = start + len;
64844641 361
7918f24d
NC
362 PERL_ARGS_ASSERT_BYTES_TO_UNI;
363
f337b084 364 while (start < end) {
48fa4626
TS
365 const UV uv = NATIVE_TO_ASCII(*start);
366 if (UNI_IS_INVARIANT(uv))
367 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
368 else {
369 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
370 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
371 }
372 start++;
373 }
64844641 374 return dest;
f337b084
TH
375}
376
230e1fce
NC
377#define PUSH_BYTES(utf8, cur, buf, len) \
378STMT_START { \
64844641
AL
379 if (utf8) \
380 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
381 else { \
382 Copy(buf, cur, len, char); \
383 (cur) += (len); \
384 } \
f337b084
TH
385} STMT_END
386
387#define GROWING(utf8, cat, start, cur, in_len) \
388STMT_START { \
389 STRLEN glen = (in_len); \
3473cf63 390 if (utf8) glen *= UTF8_EXPAND; \
f337b084 391 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 392 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
393 (cur) = (start) + SvCUR(cat); \
394 } \
395} STMT_END
396
397#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
398STMT_START { \
f7fe979e 399 const STRLEN glen = (in_len); \
f337b084 400 STRLEN gl = glen; \
3473cf63 401 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
402 if ((cur) + gl >= (start) + SvLEN(cat)) { \
403 *cur = '\0'; \
b162af07 404 SvCUR_set((cat), (cur) - (start)); \
0bd48802 405 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
406 (cur) = (start) + SvCUR(cat); \
407 } \
408 PUSH_BYTES(utf8, cur, buf, glen); \
409} STMT_END
410
411#define PUSH_BYTE(utf8, s, byte) \
412STMT_START { \
413 if (utf8) { \
f7fe979e 414 const U8 au8 = (byte); \
64844641 415 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
416 } else *(U8 *)(s)++ = (byte); \
417} STMT_END
418
419/* Only to be used inside a loop (see the break) */
420#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
421STMT_START { \
422 STRLEN retlen; \
423 if (str >= end) break; \
424 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
425 if (retlen == (STRLEN) -1 || retlen == 0) { \
426 *cur = '\0'; \
427 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
428 } \
429 str += retlen; \
430} STMT_END
431
f7fe979e
AL
432static const char *_action( const tempsym_t* symptr )
433{
10edeb5d 434 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
435}
436
206947d2 437/* Returns the sizeof() struct described by pat */
028d1f6d 438STATIC I32
f337b084 439S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 440{
f337b084 441 I32 total = 0;
206947d2 442
7918f24d
NC
443 PERL_ARGS_ASSERT_MEASURE_STRUCT;
444
49704364 445 while (next_symbol(symptr)) {
f337b084 446 I32 len;
f7fe979e 447 int size;
f337b084
TH
448
449 switch (symptr->howlen) {
fc241834 450 case e_star:
49704364 451 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 452 _action( symptr ) );
49704364 453 break;
f337b084
TH
454 default:
455 /* e_no_len and e_number */
456 len = symptr->length;
457 break;
49704364
WL
458 }
459
a7a3cfaa 460 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 461 if (!size) {
f7fe979e 462 int star;
80a13697
NC
463 /* endianness doesn't influence the size of a type */
464 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
465 default:
466 Perl_croak(aTHX_ "Invalid type '%c' in %s",
467 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 468 _action( symptr ) );
28be1210
TH
469 case '.' | TYPE_IS_SHRIEKING:
470 case '@' | TYPE_IS_SHRIEKING:
80a13697 471 case '@':
28be1210 472 case '.':
80a13697
NC
473 case '/':
474 case 'U': /* XXXX Is it correct? */
475 case 'w':
476 case 'u':
477 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 478 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 479 _action( symptr ) );
80a13697
NC
480 case '%':
481 size = 0;
482 break;
483 case '(':
fc241834
RGS
484 {
485 tempsym_t savsym = *symptr;
486 symptr->patptr = savsym.grpbeg;
487 symptr->patend = savsym.grpend;
488 /* XXXX Theoretically, we need to measure many times at
489 different positions, since the subexpression may contain
490 alignment commands, but be not of aligned length.
491 Need to detect this and croak(). */
492 size = measure_struct(symptr);
493 *symptr = savsym;
494 break;
495 }
80a13697
NC
496 case 'X' | TYPE_IS_SHRIEKING:
497 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
498 */
499 if (!len) /* Avoid division by 0 */
500 len = 1;
501 len = total % len; /* Assumed: the start is aligned. */
502 /* FALL THROUGH */
503 case 'X':
504 size = -1;
505 if (total < len)
f7fe979e 506 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
507 break;
508 case 'x' | TYPE_IS_SHRIEKING:
509 if (!len) /* Avoid division by 0 */
510 len = 1;
511 star = total % len; /* Assumed: the start is aligned. */
512 if (star) /* Other portable ways? */
513 len = len - star;
514 else
515 len = 0;
516 /* FALL THROUGH */
517 case 'x':
518 case 'A':
519 case 'Z':
520 case 'a':
80a13697
NC
521 size = 1;
522 break;
523 case 'B':
524 case 'b':
525 len = (len + 7)/8;
526 size = 1;
527 break;
528 case 'H':
529 case 'h':
530 len = (len + 1)/2;
531 size = 1;
532 break;
78d46eaa 533
80a13697
NC
534 case 'P':
535 len = 1;
536 size = sizeof(char*);
78d46eaa
NC
537 break;
538 }
206947d2
IZ
539 }
540 total += len * size;
541 }
542 return total;
543}
544
49704364
WL
545
546/* locate matching closing parenthesis or bracket
547 * returns char pointer to char after match, or NULL
548 */
f7fe979e 549STATIC const char *
5aaab254 550S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 551{
7918f24d
NC
552 PERL_ARGS_ASSERT_GROUP_END;
553
49704364 554 while (patptr < patend) {
f7fe979e 555 const char c = *patptr++;
49704364
WL
556
557 if (isSPACE(c))
558 continue;
559 else if (c == ender)
560 return patptr-1;
561 else if (c == '#') {
562 while (patptr < patend && *patptr != '\n')
563 patptr++;
564 continue;
565 } else if (c == '(')
566 patptr = group_end(patptr, patend, ')') + 1;
567 else if (c == '[')
568 patptr = group_end(patptr, patend, ']') + 1;
18529408 569 }
49704364
WL
570 Perl_croak(aTHX_ "No group ending character '%c' found in template",
571 ender);
572 return 0;
18529408
IZ
573}
574
49704364
WL
575
576/* Convert unsigned decimal number to binary.
577 * Expects a pointer to the first digit and address of length variable
578 * Advances char pointer to 1st non-digit char and returns number
fc241834 579 */
f7fe979e 580STATIC const char *
5aaab254 581S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
582{
583 I32 len = *patptr++ - '0';
7918f24d
NC
584
585 PERL_ARGS_ASSERT_GET_NUM;
586
49704364
WL
587 while (isDIGIT(*patptr)) {
588 if (len >= 0x7FFFFFFF/10)
589 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
590 len = (len * 10) + (*patptr++ - '0');
591 }
592 *lenptr = len;
593 return patptr;
594}
595
596/* The marvellous template parsing routine: Using state stored in *symptr,
597 * locates next template code and count
598 */
599STATIC bool
f337b084 600S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 601{
f7fe979e 602 const char* patptr = symptr->patptr;
0bcc34c2 603 const char* const patend = symptr->patend;
49704364 604
7918f24d
NC
605 PERL_ARGS_ASSERT_NEXT_SYMBOL;
606
49704364
WL
607 symptr->flags &= ~FLAG_SLASH;
608
609 while (patptr < patend) {
610 if (isSPACE(*patptr))
611 patptr++;
612 else if (*patptr == '#') {
613 patptr++;
614 while (patptr < patend && *patptr != '\n')
615 patptr++;
616 if (patptr < patend)
617 patptr++;
618 } else {
fc241834 619 /* We should have found a template code */
49704364 620 I32 code = *patptr++ & 0xFF;
66c611c5 621 U32 inherited_modifiers = 0;
49704364
WL
622
623 if (code == ','){ /* grandfather in commas but with a warning */
624 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
625 symptr->flags |= FLAG_COMMA;
626 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 627 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
628 }
629 continue;
630 }
fc241834 631
49704364 632 /* for '(', skip to ')' */
fc241834 633 if (code == '(') {
49704364
WL
634 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
635 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 636 _action( symptr ) );
49704364
WL
637 symptr->grpbeg = patptr;
638 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
639 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
640 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 641 _action( symptr ) );
49704364
WL
642 }
643
66c611c5
MHM
644 /* look for group modifiers to inherit */
645 if (TYPE_ENDIANNESS(symptr->flags)) {
646 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
647 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
648 }
649
1109a392
MHM
650 /* look for modifiers */
651 while (patptr < patend) {
b7787f18
AL
652 const char *allowed;
653 I32 modifier;
1109a392
MHM
654 switch (*patptr) {
655 case '!':
656 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 657 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
658 break;
659 case '>':
660 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 661 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
662 break;
663 case '<':
664 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 665 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
666 break;
667 default:
b7787f18
AL
668 allowed = "";
669 modifier = 0;
1109a392
MHM
670 break;
671 }
66c611c5 672
1109a392
MHM
673 if (modifier == 0)
674 break;
66c611c5 675
1109a392
MHM
676 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
677 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 678 allowed, _action( symptr ) );
66c611c5
MHM
679
680 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 681 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 682 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
683 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
684 TYPE_ENDIANNESS_MASK)
685 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 686 *patptr, _action( symptr ) );
66c611c5 687
a2a5de95
NC
688 if ((code & modifier)) {
689 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
690 "Duplicate modifier '%c' after '%c' in %s",
691 *patptr, (int) TYPE_NO_MODIFIERS(code),
692 _action( symptr ) );
1109a392 693 }
66c611c5 694
1109a392
MHM
695 code |= modifier;
696 patptr++;
49704364
WL
697 }
698
66c611c5
MHM
699 /* inherit modifiers */
700 code |= inherited_modifiers;
701
fc241834 702 /* look for count and/or / */
49704364
WL
703 if (patptr < patend) {
704 if (isDIGIT(*patptr)) {
705 patptr = get_num( patptr, &symptr->length );
706 symptr->howlen = e_number;
707
708 } else if (*patptr == '*') {
709 patptr++;
710 symptr->howlen = e_star;
711
712 } else if (*patptr == '[') {
f7fe979e 713 const char* lenptr = ++patptr;
49704364
WL
714 symptr->howlen = e_number;
715 patptr = group_end( patptr, patend, ']' ) + 1;
716 /* what kind of [] is it? */
717 if (isDIGIT(*lenptr)) {
718 lenptr = get_num( lenptr, &symptr->length );
719 if( *lenptr != ']' )
720 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 721 _action( symptr ) );
49704364
WL
722 } else {
723 tempsym_t savsym = *symptr;
724 symptr->patend = patptr-1;
725 symptr->patptr = lenptr;
726 savsym.length = measure_struct(symptr);
727 *symptr = savsym;
728 }
729 } else {
730 symptr->howlen = e_no_len;
731 symptr->length = 1;
732 }
733
734 /* try to find / */
735 while (patptr < patend) {
736 if (isSPACE(*patptr))
737 patptr++;
738 else if (*patptr == '#') {
739 patptr++;
740 while (patptr < patend && *patptr != '\n')
741 patptr++;
742 if (patptr < patend)
743 patptr++;
744 } else {
66c611c5 745 if (*patptr == '/') {
49704364
WL
746 symptr->flags |= FLAG_SLASH;
747 patptr++;
66c611c5
MHM
748 if (patptr < patend &&
749 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 750 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 751 _action( symptr ) );
49704364
WL
752 }
753 break;
754 }
18529408 755 }
49704364
WL
756 } else {
757 /* at end - no count, no / */
758 symptr->howlen = e_no_len;
759 symptr->length = 1;
760 }
761
762 symptr->code = code;
fc241834 763 symptr->patptr = patptr;
49704364 764 return TRUE;
18529408 765 }
49704364 766 }
fc241834 767 symptr->patptr = patptr;
49704364 768 return FALSE;
18529408
IZ
769}
770
18529408 771/*
fc241834 772 There is no way to cleanly handle the case where we should process the
08ca2aa3 773 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
774 (e.g. estimates like strend-s as an upper bound for the number of
775 characters left wouldn't work). So if we foresee the need of this
776 (pattern starts with U or contains U0), we want to work on the encoded
777 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
778 themselves if they need to do a lot of unpacks like this on it
779*/
fc241834 780STATIC bool
08ca2aa3
TH
781need_utf8(const char *pat, const char *patend)
782{
783 bool first = TRUE;
7918f24d
NC
784
785 PERL_ARGS_ASSERT_NEED_UTF8;
786
08ca2aa3
TH
787 while (pat < patend) {
788 if (pat[0] == '#') {
789 pat++;
f7fe979e 790 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
791 if (!pat) return FALSE;
792 } else if (pat[0] == 'U') {
793 if (first || pat[1] == '0') return TRUE;
794 } else first = FALSE;
795 pat++;
796 }
797 return FALSE;
798}
799
800STATIC char
801first_symbol(const char *pat, const char *patend) {
7918f24d
NC
802 PERL_ARGS_ASSERT_FIRST_SYMBOL;
803
08ca2aa3
TH
804 while (pat < patend) {
805 if (pat[0] != '#') return pat[0];
806 pat++;
f7fe979e 807 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
808 if (!pat) return 0;
809 pat++;
810 }
811 return 0;
812}
813
814/*
7accc089
JH
815=for apidoc unpackstring
816
21ebfc7a
DM
817The engine implementing the unpack() Perl function.
818
819Using the template pat..patend, this function unpacks the string
820s..strend into a number of mortal SVs, which it pushes onto the perl
821argument (@_) stack (so you will need to issue a C<PUTBACK> before and
822C<SPAGAIN> after the call to this function). It returns the number of
823pushed elements.
824
825The strend and patend pointers should point to the byte following the last
826character of each string.
827
828Although this function returns its values on the perl argument stack, it
829doesn't take any parameters from that stack (and thus in particular
830there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
831example).
7accc089
JH
832
833=cut */
834
835I32
f7fe979e 836Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 837{
f7fe979e 838 tempsym_t sym;
08ca2aa3 839
7918f24d
NC
840 PERL_ARGS_ASSERT_UNPACKSTRING;
841
f337b084 842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
230e1fce 847 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
848 SAVEFREEPV(s);
849 strend = s + len;
f337b084 850 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
851 }
852
f337b084
TH
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
08ca2aa3 855
f7fe979e 856 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
857
858 return unpack_rec(&sym, s, s, strend, NULL );
859}
860
4136a0f7 861STATIC I32
f7fe979e 862S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 863{
27da23d5 864 dVAR; dSP;
3297d27d 865 SV *sv = NULL;
f7fe979e 866 const I32 start_sp_offset = SP - PL_stack_base;
49704364 867 howlen_t howlen;
a6ec74c1 868 I32 checksum = 0;
92d41999 869 UV cuv = 0;
a6ec74c1 870 NV cdouble = 0.0;
f337b084 871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 872 bool beyond = FALSE;
21c16052 873 bool explicit_length;
9e27e96a 874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
876
877 PERL_ARGS_ASSERT_UNPACK_REC;
878
28be1210 879 symptr->strbeg = s - strbeg;
49704364 880
49704364 881 while (next_symbol(symptr)) {
a7a3cfaa 882 packprops_t props;
9e27e96a 883 I32 len;
f337b084 884 I32 datumtype = symptr->code;
a1219b5e 885 bool needs_swap;
206947d2 886 /* do first one only unless in list context
08ca2aa3 887 / is implemented by unpacking the count, then popping it from the
206947d2 888 stack, so must check that we're not in the middle of a / */
49704364 889 if ( unpack_only_one
206947d2 890 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 891 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 892 break;
49704364 893
f337b084 894 switch (howlen = symptr->howlen) {
fc241834
RGS
895 case e_star:
896 len = strend - strbeg; /* long enough */
49704364 897 break;
f337b084
TH
898 default:
899 /* e_no_len and e_number */
900 len = symptr->length;
901 break;
49704364 902 }
18529408 903
21c16052 904 explicit_length = TRUE;
a6ec74c1 905 redo_switch:
49704364 906 beyond = s >= strend;
a7a3cfaa
TH
907
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
909 if (props) {
910 /* props nonzero means we can process this letter. */
9e27e96a
AL
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
a7a3cfaa
TH
913 if (len > howmany)
914 len = howmany;
915
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
918 EXTEND(SP, len);
919 EXTEND_MORTAL(len);
78d46eaa
NC
920 }
921 }
a7a3cfaa 922
a1219b5e
NC
923 needs_swap = NEEDS_SWAP(datumtype);
924
1109a392 925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 926 default:
1109a392 927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 928
a6ec74c1 929 case '%':
49704364 930 if (howlen == e_no_len)
18529408 931 len = 16; /* len is not specified */
a6ec74c1 932 checksum = len;
92d41999 933 cuv = 0;
a6ec74c1 934 cdouble = 0;
18529408 935 continue;
a6ec74c1 936 break;
18529408
IZ
937 case '(':
938 {
49704364 939 tempsym_t savsym = *symptr;
9e27e96a 940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 941 symptr->flags |= group_modifiers;
49704364 942 symptr->patend = savsym.grpend;
28be1210 943 symptr->previous = &savsym;
49704364 944 symptr->level++;
18529408 945 PUTBACK;
c6f750d1 946 if (len && unpack_only_one) len = 1;
18529408 947 while (len--) {
49704364 948 symptr->patptr = savsym.grpbeg;
f337b084
TH
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
49704364 953 break; /* No way to continue */
18529408
IZ
954 }
955 SPAGAIN;
28be1210 956 savsym.flags = symptr->flags & ~group_modifiers;
49704364 957 *symptr = savsym;
18529408
IZ
958 break;
959 }
28be1210 960 case '.' | TYPE_IS_SHRIEKING:
28be1210 961 case '.': {
9e27e96a 962 const char *from;
28be1210 963 SV *sv;
9e27e96a 964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
967 else {
968 tempsym_t *group = symptr;
969
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
972 }
973 sv = from <= s ?
00646304
CB
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 976 mXPUSHs(sv);
28be1210
TH
977 break;
978 }
28be1210 979 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 980 case '@':
28be1210 981 s = strbeg + symptr->strbeg;
28be1210 982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 983 {
08ca2aa3
TH
984 while (len > 0) {
985 if (s >= strend)
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
987 s += UTF8SKIP(s);
988 len--;
989 }
990 if (s > strend)
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 } else {
28be1210 993 if (strend-s < len)
fc241834 994 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 995 s += len;
08ca2aa3 996 }
a6ec74c1 997 break;
62f95557
IZ
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1000 len = 1;
08ca2aa3 1001 if (utf8) {
f7fe979e 1002 const char *hop, *last;
f337b084
TH
1003 I32 l = len;
1004 hop = last = strbeg;
1005 while (hop < s) {
1006 hop += UTF8SKIP(hop);
1007 if (--l == 0) {
08ca2aa3 1008 last = hop;
f337b084
TH
1009 l = len;
1010 }
fc241834 1011 }
f337b084
TH
1012 if (last > s)
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1014 s = last;
1015 break;
f337b084
TH
1016 }
1017 len = (s - strbeg) % len;
62f95557 1018 /* FALL THROUGH */
a6ec74c1 1019 case 'X':
08ca2aa3
TH
1020 if (utf8) {
1021 while (len > 0) {
1022 if (s <= strbeg)
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1025 if (s <= strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1027 }
1028 len--;
1029 }
1030 } else {
fc241834
RGS
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033 s -= len;
08ca2aa3 1034 }
a6ec74c1 1035 break;
9e27e96a
AL
1036 case 'x' | TYPE_IS_SHRIEKING: {
1037 I32 ai32;
62f95557
IZ
1038 if (!len) /* Avoid division by 0 */
1039 len = 1;
230e1fce
NC
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1042 if (ai32 == 0) break;
1043 len -= ai32;
9e27e96a 1044 }
62f95557 1045 /* FALL THROUGH */
a6ec74c1 1046 case 'x':
08ca2aa3
TH
1047 if (utf8) {
1048 while (len>0) {
1049 if (s >= strend)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += UTF8SKIP(s);
1052 len--;
1053 }
1054 } else {
fc241834
RGS
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1057 s += len;
f337b084 1058 }
a6ec74c1
JH
1059 break;
1060 case '/':
49704364
WL
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1062 break;
a6ec74c1
JH
1063 case 'A':
1064 case 'Z':
1065 case 'a':
08ca2aa3
TH
1066 if (checksum) {
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1069 goto W_checksum;
1070 }
1071 if (utf8) {
1072 I32 l;
f7fe979e 1073 const char *hop;
08ca2aa3
TH
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1076 if (hop > strend)
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 break;
fc241834 1079 }
a6ec74c1 1080 }
08ca2aa3
TH
1081 if (hop > strend)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 len = hop - s;
1084 } else if (len > strend - s)
1085 len = strend - s;
1086
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
f7fe979e 1089 const char *ptr, *end;
f337b084
TH
1090 end = s + len;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
f7fe979e 1097 const char *ptr;
18bdf90a
TH
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1101 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 else ptr++;
28be1210 1104 if (ptr > s+len)
18bdf90a
TH
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1106 } else {
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 ptr++;
1110 }
08ca2aa3
TH
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1113
1114 if (utf8) {
1115 SvUTF8_on(sv);
1116 /* Undo any upgrade done due to need_utf8() */
f337b084 1117 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1118 sv_utf8_downgrade(sv, 0);
a6ec74c1 1119 }
6e449a3a 1120 mXPUSHs(sv);
08ca2aa3 1121 s += len;
a6ec74c1
JH
1122 break;
1123 case 'B':
08ca2aa3
TH
1124 case 'b': {
1125 char *str;
49704364 1126 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1127 len = (strend - s) * 8;
1128 if (checksum) {
f337b084 1129 if (utf8)
08ca2aa3 1130 while (len >= 8 && s < strend) {
f337b084 1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1132 len -= 8;
1133 }
f337b084 1134 else
fc241834 1135 while (len >= 8) {
08ca2aa3 1136 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1137 len -= 8;
1138 }
08ca2aa3
TH
1139 if (len && s < strend) {
1140 U8 bits;
f337b084
TH
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
a6ec74c1 1143 while (len-- > 0) {
92d41999 1144 if (bits & 1) cuv++;
a6ec74c1
JH
1145 bits >>= 1;
1146 }
f337b084 1147 else
a6ec74c1 1148 while (len-- > 0) {
08ca2aa3 1149 if (bits & 0x80) cuv++;
a6ec74c1
JH
1150 bits <<= 1;
1151 }
fc241834 1152 }
a6ec74c1
JH
1153 break;
1154 }
08ca2aa3 1155
561b68a9 1156 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1157 SvPOK_on(sv);
1158 str = SvPVX(sv);
1159 if (datumtype == 'b') {
f337b084 1160 U8 bits = 0;
f7fe979e 1161 const I32 ai32 = len;
08ca2aa3
TH
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1164 else if (utf8) {
1165 if (s >= strend) break;
f337b084 1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1169 }
08ca2aa3 1170 } else {
f337b084 1171 U8 bits = 0;
f7fe979e 1172 const I32 ai32 = len;
08ca2aa3
TH
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1175 else if (utf8) {
1176 if (s >= strend) break;
f337b084 1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1180 }
1181 }
1182 *str = '\0';
aa07b2f6 1183 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1184 XPUSHs(sv);
a6ec74c1 1185 break;
08ca2aa3 1186 }
a6ec74c1 1187 case 'H':
08ca2aa3 1188 case 'h': {
3297d27d 1189 char *str = NULL;
fc241834 1190 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1191 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1192 len = (strend - s) * 2;
858fe5e1
TC
1193 if (!checksum) {
1194 sv = sv_2mortal(newSV(len ? len : 1));
1195 SvPOK_on(sv);
1196 str = SvPVX(sv);
1197 }
a6ec74c1 1198 if (datumtype == 'h') {
f337b084 1199 U8 bits = 0;
9e27e96a 1200 I32 ai32 = len;
fc241834
RGS
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1203 else if (utf8) {
1204 if (s >= strend) break;
f337b084 1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1206 } else bits = * (U8 *) s++;
858fe5e1
TC
1207 if (!checksum)
1208 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1209 }
08ca2aa3 1210 } else {
f337b084 1211 U8 bits = 0;
f7fe979e 1212 const I32 ai32 = len;
08ca2aa3
TH
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1215 else if (utf8) {
1216 if (s >= strend) break;
f337b084 1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1218 } else bits = *(U8 *) s++;
858fe5e1
TC
1219 if (!checksum)
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1221 }
1222 }
858fe5e1
TC
1223 if (!checksum) {
1224 *str = '\0';
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1226 XPUSHs(sv);
1227 }
a6ec74c1 1228 break;
08ca2aa3 1229 }
1651fc44
ML
1230 case 'C':
1231 if (len == 0) {
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1235 break;
1236 }
1237 /* FALL THROUGH */
a6ec74c1 1238 case 'c':
1651fc44
ML
1239 while (len-- > 0 && s < strend) {
1240 int aint;
1241 if (utf8)
1242 {
1243 STRLEN retlen;
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 }
1250 else
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1253 aint -= 256;
08ca2aa3 1254 if (!checksum)
6e449a3a 1255 mPUSHi(aint);
73cb7263
NC
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1258 else
1259 cuv += aint;
a6ec74c1
JH
1260 }
1261 break;
08ca2aa3
TH
1262 case 'W':
1263 W_checksum:
1651fc44 1264 if (utf8) {
08ca2aa3 1265 while (len-- > 0 && s < strend) {
08ca2aa3 1266 STRLEN retlen;
f7fe979e 1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 s += retlen;
1272 if (!checksum)
6e449a3a 1273 mPUSHu(val);
08ca2aa3
TH
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
d6d3e8bd 1276 else
08ca2aa3 1277 cuv += val;
fc241834 1278 }
08ca2aa3 1279 } else if (!checksum)
a6ec74c1 1280 while (len-- > 0) {
f7fe979e 1281 const U8 ch = *(U8 *) s++;
6e449a3a 1282 mPUSHu(ch);
a6ec74c1 1283 }
08ca2aa3
TH
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1286 else
1287 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1288 break;
1289 case 'U':
35bcd338 1290 if (len == 0) {
c5333953 1291 if (explicit_length && howlen != e_star) {
08ca2aa3 1292 /* Switch to "bytes in UTF-8" mode */
f337b084 1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1294 else
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1297 }
35bcd338
JH
1298 break;
1299 }
08ca2aa3 1300 if (len > strend - s) len = strend - s;
fc241834 1301 if (!checksum) {
08ca2aa3
TH
1302 if (len && unpack_only_one) len = 1;
1303 EXTEND(SP, len);
1304 EXTEND_MORTAL(len);
fc241834 1305 }
08ca2aa3
TH
1306 while (len-- > 0 && s < strend) {
1307 STRLEN retlen;
1308 UV auv;
1309 if (utf8) {
1310 U8 result[UTF8_MAXLEN];
f7fe979e 1311 const char *ptr = s;
08ca2aa3 1312 STRLEN len;
08ca2aa3
TH
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
230e1fce
NC
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 'U'))
08ca2aa3
TH
1317 break;
1318 len = UTF8SKIP(result);
fc241834 1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1320 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1321 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1322 s = ptr;
1323 } else {
cde84f2a 1324 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1325 if (retlen == (STRLEN) -1 || retlen == 0)
1326 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1327 s += retlen;
1328 }
1329 if (!checksum)
6e449a3a 1330 mPUSHu(auv);
73cb7263 1331 else if (checksum > bits_in_uv)
08ca2aa3 1332 cdouble += (NV) auv;
73cb7263 1333 else
08ca2aa3 1334 cuv += auv;
a6ec74c1
JH
1335 }
1336 break;
49704364
WL
1337 case 's' | TYPE_IS_SHRIEKING:
1338#if SHORTSIZE != SIZE16
73cb7263 1339 while (len-- > 0) {
08ca2aa3 1340 short ashort;
f337b084 1341 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
08ca2aa3 1342 if (!checksum)
6e449a3a 1343 mPUSHi(ashort);
73cb7263
NC
1344 else if (checksum > bits_in_uv)
1345 cdouble += (NV)ashort;
1346 else
1347 cuv += ashort;
49704364
WL
1348 }
1349 break;
1350#else
1351 /* Fallthrough! */
a6ec74c1 1352#endif
49704364 1353 case 's':
73cb7263 1354 while (len-- > 0) {
08ca2aa3
TH
1355 I16 ai16;
1356
1357#if U16SIZE > SIZE16
1358 ai16 = 0;
1359#endif
f337b084 1360 SHIFT16(utf8, s, strend, &ai16, datumtype);
1109a392 1361#if U16SIZE > SIZE16
73cb7263
NC
1362 if (ai16 > 32767)
1363 ai16 -= 65536;
a6ec74c1 1364#endif
08ca2aa3 1365 if (!checksum)
6e449a3a 1366 mPUSHi(ai16);
73cb7263
NC
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1369 else
1370 cuv += ai16;
a6ec74c1
JH
1371 }
1372 break;
49704364
WL
1373 case 'S' | TYPE_IS_SHRIEKING:
1374#if SHORTSIZE != SIZE16
73cb7263 1375 while (len-- > 0) {
08ca2aa3 1376 unsigned short aushort;
f337b084 1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
08ca2aa3 1378 if (!checksum)
6e449a3a 1379 mPUSHu(aushort);
73cb7263
NC
1380 else if (checksum > bits_in_uv)
1381 cdouble += (NV)aushort;
1382 else
1383 cuv += aushort;
49704364
WL
1384 }
1385 break;
1386#else
486ec47a 1387 /* Fallthrough! */
49704364 1388#endif
a6ec74c1
JH
1389 case 'v':
1390 case 'n':
1391 case 'S':
73cb7263 1392 while (len-- > 0) {
08ca2aa3
TH
1393 U16 au16;
1394#if U16SIZE > SIZE16
1395 au16 = 0;
1396#endif
f337b084 1397 SHIFT16(utf8, s, strend, &au16, datumtype);
73cb7263
NC
1398 if (datumtype == 'n')
1399 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1400 if (datumtype == 'v')
1401 au16 = vtohs(au16);
08ca2aa3 1402 if (!checksum)
6e449a3a 1403 mPUSHu(au16);
73cb7263 1404 else if (checksum > bits_in_uv)
f337b084 1405 cdouble += (NV) au16;
73cb7263
NC
1406 else
1407 cuv += au16;
a6ec74c1
JH
1408 }
1409 break;
068bd2e7
MHM
1410 case 'v' | TYPE_IS_SHRIEKING:
1411 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1412 while (len-- > 0) {
08ca2aa3
TH
1413 I16 ai16;
1414# if U16SIZE > SIZE16
1415 ai16 = 0;
1416# endif
f337b084 1417 SHIFT16(utf8, s, strend, &ai16, datumtype);
e396d235
NC
1418 /* There should never be any byte-swapping here. */
1419 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1420 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1421 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1422 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1423 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1424 if (!checksum)
6e449a3a 1425 mPUSHi(ai16);
73cb7263 1426 else if (checksum > bits_in_uv)
08ca2aa3 1427 cdouble += (NV) ai16;
73cb7263
NC
1428 else
1429 cuv += ai16;
068bd2e7
MHM
1430 }
1431 break;
a6ec74c1 1432 case 'i':
49704364 1433 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1434 while (len-- > 0) {
08ca2aa3 1435 int aint;
f337b084 1436 SHIFT_VAR(utf8, s, strend, aint, datumtype);
08ca2aa3 1437 if (!checksum)
6e449a3a 1438 mPUSHi(aint);
73cb7263
NC
1439 else if (checksum > bits_in_uv)
1440 cdouble += (NV)aint;
1441 else
1442 cuv += aint;
a6ec74c1
JH
1443 }
1444 break;
1445 case 'I':
49704364 1446 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1447 while (len-- > 0) {
08ca2aa3 1448 unsigned int auint;
f337b084 1449 SHIFT_VAR(utf8, s, strend, auint, datumtype);
08ca2aa3 1450 if (!checksum)
6e449a3a 1451 mPUSHu(auint);
73cb7263
NC
1452 else if (checksum > bits_in_uv)
1453 cdouble += (NV)auint;
1454 else
1455 cuv += auint;
a6ec74c1
JH
1456 }
1457 break;
92d41999 1458 case 'j':
73cb7263 1459 while (len-- > 0) {
08ca2aa3 1460 IV aiv;
f337b084 1461 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
08ca2aa3 1462 if (!checksum)
6e449a3a 1463 mPUSHi(aiv);
73cb7263
NC
1464 else if (checksum > bits_in_uv)
1465 cdouble += (NV)aiv;
1466 else
1467 cuv += aiv;
92d41999
JH
1468 }
1469 break;
1470 case 'J':
73cb7263 1471 while (len-- > 0) {
08ca2aa3 1472 UV auv;
f337b084 1473 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1474 if (!checksum)
6e449a3a 1475 mPUSHu(auv);
73cb7263
NC
1476 else if (checksum > bits_in_uv)
1477 cdouble += (NV)auv;
1478 else
1479 cuv += auv;
92d41999
JH
1480 }
1481 break;
49704364
WL
1482 case 'l' | TYPE_IS_SHRIEKING:
1483#if LONGSIZE != SIZE32
73cb7263 1484 while (len-- > 0) {
08ca2aa3 1485 long along;
f337b084 1486 SHIFT_VAR(utf8, s, strend, along, datumtype);
08ca2aa3 1487 if (!checksum)
6e449a3a 1488 mPUSHi(along);
73cb7263
NC
1489 else if (checksum > bits_in_uv)
1490 cdouble += (NV)along;
1491 else
1492 cuv += along;
49704364
WL
1493 }
1494 break;
1495#else
1496 /* Fallthrough! */
a6ec74c1 1497#endif
49704364 1498 case 'l':
73cb7263 1499 while (len-- > 0) {
08ca2aa3
TH
1500 I32 ai32;
1501#if U32SIZE > SIZE32
1502 ai32 = 0;
1503#endif
f337b084 1504 SHIFT32(utf8, s, strend, &ai32, datumtype);
25a9bd2a 1505#if U32SIZE > SIZE32
08ca2aa3 1506 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1507#endif
08ca2aa3 1508 if (!checksum)
6e449a3a 1509 mPUSHi(ai32);
73cb7263
NC
1510 else if (checksum > bits_in_uv)
1511 cdouble += (NV)ai32;
1512 else
1513 cuv += ai32;
a6ec74c1
JH
1514 }
1515 break;
49704364
WL
1516 case 'L' | TYPE_IS_SHRIEKING:
1517#if LONGSIZE != SIZE32
73cb7263 1518 while (len-- > 0) {
08ca2aa3 1519 unsigned long aulong;
f337b084 1520 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
08ca2aa3 1521 if (!checksum)
6e449a3a 1522 mPUSHu(aulong);
73cb7263
NC
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV)aulong;
1525 else
1526 cuv += aulong;
49704364
WL
1527 }
1528 break;
1529#else
1530 /* Fall through! */
1531#endif
a6ec74c1
JH
1532 case 'V':
1533 case 'N':
1534 case 'L':
73cb7263 1535 while (len-- > 0) {
08ca2aa3
TH
1536 U32 au32;
1537#if U32SIZE > SIZE32
1538 au32 = 0;
1539#endif
f337b084 1540 SHIFT32(utf8, s, strend, &au32, datumtype);
73cb7263
NC
1541 if (datumtype == 'N')
1542 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1543 if (datumtype == 'V')
1544 au32 = vtohl(au32);
08ca2aa3 1545 if (!checksum)
6e449a3a 1546 mPUSHu(au32);
fc241834
RGS
1547 else if (checksum > bits_in_uv)
1548 cdouble += (NV)au32;
1549 else
1550 cuv += au32;
a6ec74c1
JH
1551 }
1552 break;
068bd2e7
MHM
1553 case 'V' | TYPE_IS_SHRIEKING:
1554 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1555 while (len-- > 0) {
08ca2aa3 1556 I32 ai32;
f8e5a5db 1557#if U32SIZE > SIZE32
08ca2aa3 1558 ai32 = 0;
f8e5a5db 1559#endif
f337b084 1560 SHIFT32(utf8, s, strend, &ai32, datumtype);
e396d235
NC
1561 /* There should never be any byte swapping here. */
1562 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1563 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1564 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1565 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1566 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1567 if (!checksum)
6e449a3a 1568 mPUSHi(ai32);
73cb7263
NC
1569 else if (checksum > bits_in_uv)
1570 cdouble += (NV)ai32;
1571 else
1572 cuv += ai32;
068bd2e7
MHM
1573 }
1574 break;
a6ec74c1 1575 case 'p':
a6ec74c1 1576 while (len-- > 0) {
f7fe979e 1577 const char *aptr;
f337b084 1578 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
c4c5f44a 1579 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1580 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1581 }
1582 break;
1583 case 'w':
a6ec74c1
JH
1584 {
1585 UV auv = 0;
1586 U32 bytes = 0;
fc241834 1587
08ca2aa3
TH
1588 while (len > 0 && s < strend) {
1589 U8 ch;
f337b084 1590 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1591 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1592 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1593 if (ch < 0x80) {
a6ec74c1 1594 bytes = 0;
6e449a3a 1595 mPUSHu(auv);
a6ec74c1
JH
1596 len--;
1597 auv = 0;
08ca2aa3 1598 continue;
a6ec74c1 1599 }
08ca2aa3 1600 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1601 const char *t;
a6ec74c1 1602
f5992bc4 1603 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1604 while (s < strend) {
f337b084 1605 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1606 sv = mul128(sv, (U8)(ch & 0x7f));
1607 if (!(ch & 0x80)) {
a6ec74c1
JH
1608 bytes = 0;
1609 break;
1610 }
1611 }
10516c54 1612 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1613 while (*t == '0')
1614 t++;
1615 sv_chop(sv, t);
6e449a3a 1616 mPUSHs(sv);
a6ec74c1
JH
1617 len--;
1618 auv = 0;
1619 }
1620 }
1621 if ((s >= strend) && bytes)
49704364 1622 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1623 }
1624 break;
1625 case 'P':
49704364
WL
1626 if (symptr->howlen == e_star)
1627 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1628 EXTEND(SP, 1);
2d3e0934 1629 if (s + sizeof(char*) <= strend) {
08ca2aa3 1630 char *aptr;
f337b084 1631 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
fc241834 1632 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1633 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1634 }
a6ec74c1
JH
1635 break;
1636#ifdef HAS_QUAD
1637 case 'q':
73cb7263 1638 while (len-- > 0) {
08ca2aa3 1639 Quad_t aquad;
f337b084 1640 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
08ca2aa3 1641 if (!checksum)
6e449a3a
MHM
1642 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1643 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1644 else if (checksum > bits_in_uv)
1645 cdouble += (NV)aquad;
1646 else
1647 cuv += aquad;
1648 }
a6ec74c1
JH
1649 break;
1650 case 'Q':
73cb7263 1651 while (len-- > 0) {
08ca2aa3 1652 Uquad_t auquad;
f337b084 1653 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
08ca2aa3 1654 if (!checksum)
6e449a3a
MHM
1655 mPUSHs(auquad <= UV_MAX ?
1656 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1657 else if (checksum > bits_in_uv)
1658 cdouble += (NV)auquad;
1659 else
1660 cuv += auquad;
a6ec74c1
JH
1661 }
1662 break;
08ca2aa3 1663#endif /* HAS_QUAD */
a6ec74c1
JH
1664 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1665 case 'f':
73cb7263 1666 while (len-- > 0) {
08ca2aa3 1667 float afloat;
f337b084 1668 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3 1669 if (!checksum)
6e449a3a 1670 mPUSHn(afloat);
08ca2aa3 1671 else
73cb7263 1672 cdouble += afloat;
fc241834 1673 }
a6ec74c1
JH
1674 break;
1675 case 'd':
73cb7263 1676 while (len-- > 0) {
08ca2aa3 1677 double adouble;
f337b084 1678 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3 1679 if (!checksum)
6e449a3a 1680 mPUSHn(adouble);
08ca2aa3 1681 else
73cb7263 1682 cdouble += adouble;
fc241834 1683 }
a6ec74c1 1684 break;
92d41999 1685 case 'F':
73cb7263 1686 while (len-- > 0) {
275663fa
TC
1687 NV_bytes anv;
1688 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
08ca2aa3 1689 if (!checksum)
275663fa 1690 mPUSHn(anv.nv);
08ca2aa3 1691 else
275663fa 1692 cdouble += anv.nv;
fc241834 1693 }
92d41999
JH
1694 break;
1695#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1696 case 'D':
73cb7263 1697 while (len-- > 0) {
275663fa
TC
1698 ld_bytes aldouble;
1699 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
08ca2aa3 1700 if (!checksum)
275663fa 1701 mPUSHn(aldouble.ld);
08ca2aa3 1702 else
275663fa 1703 cdouble += aldouble.ld;
92d41999
JH
1704 }
1705 break;
1706#endif
a6ec74c1 1707 case 'u':
858fe5e1 1708 if (!checksum) {
f7fe979e 1709 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1710 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1711 if (l) SvPOK_on(sv);
1712 }
1713 if (utf8) {
1714 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1715 I32 a, b, c, d;
db187877 1716 char hunk[3];
08ca2aa3 1717
08ca2aa3
TH
1718 while (len > 0) {
1719 next_uni_uu(aTHX_ &s, strend, &a);
1720 next_uni_uu(aTHX_ &s, strend, &b);
1721 next_uni_uu(aTHX_ &s, strend, &c);
1722 next_uni_uu(aTHX_ &s, strend, &d);
1723 hunk[0] = (char)((a << 2) | (b >> 4));
1724 hunk[1] = (char)((b << 4) | (c >> 2));
1725 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1726 if (!checksum)
1727 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1728 len -= 3;
1729 }
1730 if (s < strend) {
f7fe979e
AL
1731 if (*s == '\n') {
1732 s++;
1733 }
08ca2aa3
TH
1734 else {
1735 /* possible checksum byte */
f7fe979e
AL
1736 const char *skip = s+UTF8SKIP(s);
1737 if (skip < strend && *skip == '\n')
1738 s = skip+1;
08ca2aa3
TH
1739 }
1740 }
1741 }
1742 } else {
fc241834
RGS
1743 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1744 I32 a, b, c, d;
db187877 1745 char hunk[3];
a6ec74c1 1746
fc241834
RGS
1747 len = PL_uudmap[*(U8*)s++] & 077;
1748 while (len > 0) {
1749 if (s < strend && ISUUCHAR(*s))
1750 a = PL_uudmap[*(U8*)s++] & 077;
1751 else
1752 a = 0;
1753 if (s < strend && ISUUCHAR(*s))
1754 b = PL_uudmap[*(U8*)s++] & 077;
1755 else
1756 b = 0;
1757 if (s < strend && ISUUCHAR(*s))
1758 c = PL_uudmap[*(U8*)s++] & 077;
1759 else
1760 c = 0;
1761 if (s < strend && ISUUCHAR(*s))
1762 d = PL_uudmap[*(U8*)s++] & 077;
1763 else
1764 d = 0;
1765 hunk[0] = (char)((a << 2) | (b >> 4));
1766 hunk[1] = (char)((b << 4) | (c >> 2));
1767 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1768 if (!checksum)
1769 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1770 len -= 3;
1771 }
1772 if (*s == '\n')
1773 s++;
1774 else /* possible checksum byte */
1775 if (s + 1 < strend && s[1] == '\n')
1776 s += 2;
a6ec74c1 1777 }
08ca2aa3 1778 }
858fe5e1
TC
1779 if (!checksum)
1780 XPUSHs(sv);
a6ec74c1
JH
1781 break;
1782 }
49704364 1783
a6ec74c1 1784 if (checksum) {
1109a392 1785 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1786 (checksum > bits_in_uv &&
08ca2aa3
TH
1787 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1788 NV trouble, anv;
a6ec74c1 1789
08ca2aa3 1790 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1791 while (checksum >= 16) {
1792 checksum -= 16;
08ca2aa3 1793 anv *= 65536.0;
a6ec74c1 1794 }
a6ec74c1 1795 while (cdouble < 0.0)
08ca2aa3
TH
1796 cdouble += anv;
1797 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1798 sv = newSVnv(cdouble);
a6ec74c1
JH
1799 }
1800 else {
fa8ec7c1
NC
1801 if (checksum < bits_in_uv) {
1802 UV mask = ((UV)1 << checksum) - 1;
92d41999 1803 cuv &= mask;
a6ec74c1 1804 }
c4c5f44a 1805 sv = newSVuv(cuv);
a6ec74c1 1806 }
6e449a3a 1807 mXPUSHs(sv);
a6ec74c1
JH
1808 checksum = 0;
1809 }
fc241834 1810
49704364
WL
1811 if (symptr->flags & FLAG_SLASH){
1812 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1813 break;
49704364
WL
1814 if( next_symbol(symptr) ){
1815 if( symptr->howlen == e_number )
1816 Perl_croak(aTHX_ "Count after length/code in unpack" );
1817 if( beyond ){
1818 /* ...end of char buffer then no decent length available */
1819 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1820 } else {
1821 /* take top of stack (hope it's numeric) */
1822 len = POPi;
1823 if( len < 0 )
1824 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1825 }
1826 } else {
1827 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1828 }
1829 datumtype = symptr->code;
21c16052 1830 explicit_length = FALSE;
49704364
WL
1831 goto redo_switch;
1832 }
a6ec74c1 1833 }
49704364 1834
18529408
IZ
1835 if (new_s)
1836 *new_s = s;
1837 PUTBACK;
1838 return SP - PL_stack_base - start_sp_offset;
1839}
1840
1841PP(pp_unpack)
1842{
97aff369 1843 dVAR;
18529408 1844 dSP;
bab9c0ac 1845 dPOPPOPssrl;
18529408
IZ
1846 I32 gimme = GIMME_V;
1847 STRLEN llen;
1848 STRLEN rlen;
5c144d81
NC
1849 const char *pat = SvPV_const(left, llen);
1850 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1851 const char *strend = s + rlen;
1852 const char *patend = pat + llen;
08ca2aa3 1853 I32 cnt;
18529408
IZ
1854
1855 PUTBACK;
7accc089 1856 cnt = unpackstring(pat, patend, s, strend,
49704364 1857 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1858 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1859
18529408
IZ
1860 SPAGAIN;
1861 if ( !cnt && gimme == G_SCALAR )
1862 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1863 RETURN;
1864}
1865
f337b084 1866STATIC U8 *
f7fe979e 1867doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1868{
f337b084 1869 *h++ = PL_uuemap[len];
a6ec74c1 1870 while (len > 2) {
f337b084
TH
1871 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1872 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1873 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1874 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1875 s += 3;
1876 len -= 3;
1877 }
1878 if (len > 0) {
f7fe979e 1879 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1880 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1881 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1882 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1883 *h++ = PL_uuemap[0];
a6ec74c1 1884 }
f337b084
TH
1885 *h++ = '\n';
1886 return h;
a6ec74c1
JH
1887}
1888
1889STATIC SV *
f7fe979e 1890S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1891{
8b6e33c7
AL
1892 SV *result = newSVpvn(s, l);
1893 char *const result_c = SvPV_nolen(result); /* convenience */
1894 char *out = result_c;
1895 bool skip = 1;
1896 bool ignore = 0;
a6ec74c1 1897
7918f24d
NC
1898 PERL_ARGS_ASSERT_IS_AN_INT;
1899
a6ec74c1
JH
1900 while (*s) {
1901 switch (*s) {
1902 case ' ':
1903 break;
1904 case '+':
1905 if (!skip) {
1906 SvREFCNT_dec(result);
1907 return (NULL);
1908 }
1909 break;
1910 case '0':
1911 case '1':
1912 case '2':
1913 case '3':
1914 case '4':
1915 case '5':
1916 case '6':
1917 case '7':
1918 case '8':
1919 case '9':
1920 skip = 0;
1921 if (!ignore) {
1922 *(out++) = *s;
1923 }
1924 break;
1925 case '.':
1926 ignore = 1;
1927 break;
1928 default:
1929 SvREFCNT_dec(result);
1930 return (NULL);
1931 }
1932 s++;
1933 }
1934 *(out++) = '\0';
1935 SvCUR_set(result, out - result_c);
1936 return (result);
1937}
1938
1939/* pnum must be '\0' terminated */
1940STATIC int
1941S_div128(pTHX_ SV *pnum, bool *done)
1942{
8b6e33c7
AL
1943 STRLEN len;
1944 char * const s = SvPV(pnum, len);
1945 char *t = s;
1946 int m = 0;
1947
7918f24d
NC
1948 PERL_ARGS_ASSERT_DIV128;
1949
8b6e33c7
AL
1950 *done = 1;
1951 while (*t) {
1952 const int i = m * 10 + (*t - '0');
1953 const int r = (i >> 7); /* r < 10 */
1954 m = i & 0x7F;
1955 if (r) {
1956 *done = 0;
1957 }
1958 *(t++) = '0' + r;
a6ec74c1 1959 }
8b6e33c7
AL
1960 *(t++) = '\0';
1961 SvCUR_set(pnum, (STRLEN) (t - s));
1962 return (m);
a6ec74c1
JH
1963}
1964
18529408 1965/*
7accc089
JH
1966=for apidoc packlist
1967
1968The engine implementing pack() Perl function.
1969
bfce84ec
AL
1970=cut
1971*/
7accc089
JH
1972
1973void
5aaab254 1974Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1975{
97aff369 1976 dVAR;
aadb217d
JH
1977 tempsym_t sym;
1978
7918f24d
NC
1979 PERL_ARGS_ASSERT_PACKLIST;
1980
f7fe979e 1981 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1982
f337b084
TH
1983 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1984 Also make sure any UTF8 flag is loaded */
56eb0262 1985 SvPV_force_nolen(cat);
bfce84ec
AL
1986 if (DO_UTF8(cat))
1987 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1988
49704364
WL
1989 (void)pack_rec( cat, &sym, beglist, endlist );
1990}
1991
f337b084
TH
1992/* like sv_utf8_upgrade, but also repoint the group start markers */
1993STATIC void
1994marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1995 STRLEN len;
1996 tempsym_t *group;
f7fe979e
AL
1997 const char *from_ptr, *from_start, *from_end, **marks, **m;
1998 char *to_start, *to_ptr;
f337b084
TH
1999
2000 if (SvUTF8(sv)) return;
2001
aa07b2f6 2002 from_start = SvPVX_const(sv);
f337b084
TH
2003 from_end = from_start + SvCUR(sv);
2004 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2005 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2006 if (from_ptr == from_end) {
2007 /* Simple case: no character needs to be changed */
2008 SvUTF8_on(sv);
2009 return;
2010 }
2011
3473cf63 2012 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2013 Newx(to_start, len, char);
f337b084
TH
2014 Copy(from_start, to_start, from_ptr-from_start, char);
2015 to_ptr = to_start + (from_ptr-from_start);
2016
a02a5408 2017 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2018 for (group=sym_ptr; group; group = group->previous)
2019 marks[group->level] = from_start + group->strbeg;
2020 marks[sym_ptr->level+1] = from_end+1;
2021 for (m = marks; *m < from_ptr; m++)
2022 *m = to_start + (*m-from_start);
2023
2024 for (;from_ptr < from_end; from_ptr++) {
2025 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2026 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2027 }
2028 *to_ptr = 0;
2029
2030 while (*m == from_ptr) *m++ = to_ptr;
2031 if (m != marks + sym_ptr->level+1) {
2032 Safefree(marks);
2033 Safefree(to_start);
5637ef5b
NC
2034 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2035 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2036 }
2037 for (group=sym_ptr; group; group = group->previous)
2038 group->strbeg = marks[group->level] - to_start;
2039 Safefree(marks);
2040
2041 if (SvOOK(sv)) {
2042 if (SvIVX(sv)) {
b162af07 2043 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2044 from_start -= SvIVX(sv);
2045 SvIV_set(sv, 0);
2046 }
2047 SvFLAGS(sv) &= ~SVf_OOK;
2048 }
2049 if (SvLEN(sv) != 0)
2050 Safefree(from_start);
f880fe2f 2051 SvPV_set(sv, to_start);
b162af07
SP
2052 SvCUR_set(sv, to_ptr - to_start);
2053 SvLEN_set(sv, len);
f337b084
TH
2054 SvUTF8_on(sv);
2055}
2056
2057/* Exponential string grower. Makes string extension effectively O(n)
2058 needed says how many extra bytes we need (not counting the final '\0')
2059 Only grows the string if there is an actual lack of space
2060*/
2061STATIC char *
0bd48802 2062S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2063 const STRLEN cur = SvCUR(sv);
2064 const STRLEN len = SvLEN(sv);
f337b084 2065 STRLEN extend;
7918f24d
NC
2066
2067 PERL_ARGS_ASSERT_SV_EXP_GROW;
2068
f337b084
TH
2069 if (len - cur > needed) return SvPVX(sv);
2070 extend = needed > len ? needed : len;
2071 return SvGROW(sv, len+extend+1);
2072}
49704364
WL
2073
2074STATIC
2075SV **
f337b084 2076S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2077{
97aff369 2078 dVAR;
49704364 2079 tempsym_t lookahead;
f337b084
TH
2080 I32 items = endlist - beglist;
2081 bool found = next_symbol(symptr);
2082 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2083 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2084
7918f24d
NC
2085 PERL_ARGS_ASSERT_PACK_REC;
2086
f337b084
TH
2087 if (symptr->level == 0 && found && symptr->code == 'U') {
2088 marked_upgrade(aTHX_ cat, symptr);
2089 symptr->flags |= FLAG_DO_UTF8;
2090 utf8 = 0;
49704364 2091 }
f337b084 2092 symptr->strbeg = SvCUR(cat);
49704364
WL
2093
2094 while (found) {
f337b084
TH
2095 SV *fromstr;
2096 STRLEN fromlen;
2097 I32 len;
a0714e2c 2098 SV *lengthcode = NULL;
49704364 2099 I32 datumtype = symptr->code;
f337b084
TH
2100 howlen_t howlen = symptr->howlen;
2101 char *start = SvPVX(cat);
2102 char *cur = start + SvCUR(cat);
a1219b5e 2103 bool needs_swap;
49704364 2104
f337b084
TH
2105#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2106
2107 switch (howlen) {
fc241834 2108 case e_star:
f337b084
TH
2109 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2110 0 : items;
2111 break;
2112 default:
2113 /* e_no_len and e_number */
2114 len = symptr->length;
49704364
WL
2115 break;
2116 }
2117
f337b084 2118 if (len) {
a7a3cfaa 2119 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2120
a7a3cfaa
TH
2121 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2122 /* We can process this letter. */
2123 STRLEN size = props & PACK_SIZE_MASK;
2124 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2125 }
f337b084
TH
2126 }
2127
49704364
WL
2128 /* Look ahead for next symbol. Do we have code/code? */
2129 lookahead = *symptr;
2130 found = next_symbol(&lookahead);
246f24af
TH
2131 if (symptr->flags & FLAG_SLASH) {
2132 IV count;
f337b084 2133 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2134 if (strchr("aAZ", lookahead.code)) {
2135 if (lookahead.howlen == e_number) count = lookahead.length;
2136 else {
ce399ba6 2137 if (items > 0) {
48a5da33 2138 count = sv_len_utf8(*beglist);
ce399ba6 2139 }
246f24af
TH
2140 else count = 0;
2141 if (lookahead.code == 'Z') count++;
2142 }
2143 } else {
2144 if (lookahead.howlen == e_number && lookahead.length < items)
2145 count = lookahead.length;
2146 else count = items;
2147 }
2148 lookahead.howlen = e_number;
2149 lookahead.length = count;
2150 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2151 }
49704364 2152
a1219b5e
NC
2153 needs_swap = NEEDS_SWAP(datumtype);
2154
fc241834
RGS
2155 /* Code inside the switch must take care to properly update
2156 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2157 doesn't simply leave using break */
1109a392 2158 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2159 default:
f337b084
TH
2160 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2161 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2162 case '%':
49704364 2163 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2164 {
2165 char *from;
28be1210 2166 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2167 case '.':
2168 if (howlen == e_star) from = start;
2169 else if (len == 0) from = cur;
2170 else {
2171 tempsym_t *group = symptr;
2172
2173 while (--len && group) group = group->previous;
2174 from = group ? start + group->strbeg : start;
2175 }
2176 fromstr = NEXTFROM;
2177 len = SvIV(fromstr);
2178 goto resize;
28be1210 2179 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2180 case '@':
28be1210
TH
2181 from = start + symptr->strbeg;
2182 resize:
28be1210 2183 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2184 if (len >= 0) {
2185 while (len && from < cur) {
2186 from += UTF8SKIP(from);
2187 len--;
2188 }
2189 if (from > cur)
2190 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2191 if (len) {
2192 /* Here we know from == cur */
2193 grow:
2194 GROWING(0, cat, start, cur, len);
2195 Zero(cur, len, char);
2196 cur += len;
2197 } else if (from < cur) {
2198 len = cur - from;
2199 goto shrink;
2200 } else goto no_change;
2201 } else {
2202 cur = from;
2203 len = -len;
2204 goto utf8_shrink;
f337b084 2205 }
28be1210
TH
2206 else {
2207 len -= cur - from;
f337b084 2208 if (len > 0) goto grow;
28be1210 2209 if (len == 0) goto no_change;
fc241834 2210 len = -len;
28be1210 2211 goto shrink;
f337b084 2212 }
a6ec74c1 2213 break;
28be1210 2214 }
fc241834 2215 case '(': {
49704364 2216 tempsym_t savsym = *symptr;
66c611c5
MHM
2217 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2218 symptr->flags |= group_modifiers;
49704364
WL
2219 symptr->patend = savsym.grpend;
2220 symptr->level++;
f337b084 2221 symptr->previous = &lookahead;
18529408 2222 while (len--) {
f337b084
TH
2223 U32 was_utf8;
2224 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2225 else symptr->flags &= ~FLAG_PARSE_UTF8;
2226 was_utf8 = SvUTF8(cat);
49704364 2227 symptr->patptr = savsym.grpbeg;
f337b084
TH
2228 beglist = pack_rec(cat, symptr, beglist, endlist);
2229 if (SvUTF8(cat) != was_utf8)
2230 /* This had better be an upgrade while in utf8==0 mode */
2231 utf8 = 1;
2232
49704364 2233 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2234 break; /* No way to continue */
2235 }
ee790063 2236 items = endlist - beglist;
f337b084
TH
2237 lookahead.flags = symptr->flags & ~group_modifiers;
2238 goto no_change;
18529408 2239 }
62f95557
IZ
2240 case 'X' | TYPE_IS_SHRIEKING:
2241 if (!len) /* Avoid division by 0 */
2242 len = 1;
f337b084
TH
2243 if (utf8) {
2244 char *hop, *last;
2245 I32 l = len;
2246 hop = last = start;
2247 while (hop < cur) {
2248 hop += UTF8SKIP(hop);
2249 if (--l == 0) {
2250 last = hop;
2251 l = len;
2252 }
2253 }
2254 if (last > cur)
2255 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2256 cur = last;
2257 break;
2258 }
2259 len = (cur-start) % len;
62f95557 2260 /* FALL THROUGH */
a6ec74c1 2261 case 'X':
f337b084
TH
2262 if (utf8) {
2263 if (len < 1) goto no_change;
28be1210 2264 utf8_shrink:
f337b084
TH
2265 while (len > 0) {
2266 if (cur <= start)
28be1210
TH
2267 Perl_croak(aTHX_ "'%c' outside of string in pack",
2268 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2269 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2270 if (cur <= start)
28be1210
TH
2271 Perl_croak(aTHX_ "'%c' outside of string in pack",
2272 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2273 }
2274 len--;
2275 }
2276 } else {
fc241834 2277 shrink:
f337b084 2278 if (cur - start < len)
28be1210
TH
2279 Perl_croak(aTHX_ "'%c' outside of string in pack",
2280 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2281 cur -= len;
2282 }
2283 if (cur < start+symptr->strbeg) {
2284 /* Make sure group starts don't point into the void */
2285 tempsym_t *group;
9e27e96a 2286 const STRLEN length = cur-start;
f337b084
TH
2287 for (group = symptr;
2288 group && length < group->strbeg;
2289 group = group->previous) group->strbeg = length;
2290 lookahead.strbeg = length;
2291 }
a6ec74c1 2292 break;
fc241834
RGS
2293 case 'x' | TYPE_IS_SHRIEKING: {
2294 I32 ai32;
62f95557
IZ
2295 if (!len) /* Avoid division by 0 */
2296 len = 1;
230e1fce 2297 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2298 else ai32 = (cur - start) % len;
2299 if (ai32 == 0) goto no_change;
2300 len -= ai32;
2301 }
2302 /* FALL THROUGH */
a6ec74c1 2303 case 'x':
f337b084 2304 goto grow;
a6ec74c1
JH
2305 case 'A':
2306 case 'Z':
f337b084 2307 case 'a': {
f7fe979e 2308 const char *aptr;
f337b084 2309
a6ec74c1 2310 fromstr = NEXTFROM;
e62f0680 2311 aptr = SvPV_const(fromstr, fromlen);
f337b084 2312 if (DO_UTF8(fromstr)) {
f7fe979e 2313 const char *end, *s;
f337b084
TH
2314
2315 if (!utf8 && !SvUTF8(cat)) {
2316 marked_upgrade(aTHX_ cat, symptr);
2317 lookahead.flags |= FLAG_DO_UTF8;
2318 lookahead.strbeg = symptr->strbeg;
2319 utf8 = 1;
2320 start = SvPVX(cat);
2321 cur = start + SvCUR(cat);
2322 }
fc241834 2323 if (howlen == e_star) {
f337b084
TH
2324 if (utf8) goto string_copy;
2325 len = fromlen+1;
2326 }
2327 s = aptr;
2328 end = aptr + fromlen;
2329 fromlen = datumtype == 'Z' ? len-1 : len;
2330 while ((I32) fromlen > 0 && s < end) {
2331 s += UTF8SKIP(s);
2332 fromlen--;
2333 }
2334 if (s > end)
2335 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2336 if (utf8) {
fc241834 2337 len = fromlen;
f337b084
TH
2338 if (datumtype == 'Z') len++;
2339 fromlen = s-aptr;
2340 len += fromlen;
fc241834 2341
f337b084 2342 goto string_copy;
fc241834 2343 }
f337b084
TH
2344 fromlen = len - fromlen;
2345 if (datumtype == 'Z') fromlen--;
2346 if (howlen == e_star) {
2347 len = fromlen;
2348 if (datumtype == 'Z') len++;
fc241834 2349 }
f337b084 2350 GROWING(0, cat, start, cur, len);
fc241834 2351 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2352 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2353 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2354 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2355 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2356 cur += fromlen;
a6ec74c1 2357 len -= fromlen;
f337b084
TH
2358 } else if (utf8) {
2359 if (howlen == e_star) {
2360 len = fromlen;
2361 if (datumtype == 'Z') len++;
a6ec74c1 2362 }
f337b084
TH
2363 if (len <= (I32) fromlen) {
2364 fromlen = len;
2365 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2366 }
fc241834 2367 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2368 upgrade, so:
2369 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2370 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2371 len -= fromlen;
2372 while (fromlen > 0) {
230e1fce 2373 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2374 aptr++;
2375 fromlen--;
fc241834 2376 }
f337b084
TH
2377 } else {
2378 string_copy:
2379 if (howlen == e_star) {
2380 len = fromlen;
2381 if (datumtype == 'Z') len++;
2382 }
2383 if (len <= (I32) fromlen) {
2384 fromlen = len;
2385 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2386 }
f337b084
TH
2387 GROWING(0, cat, start, cur, len);
2388 Copy(aptr, cur, fromlen, char);
2389 cur += fromlen;
2390 len -= fromlen;
a6ec74c1 2391 }
f337b084
TH
2392 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2393 cur += len;
3c4fb04a 2394 SvTAINT(cat);
a6ec74c1 2395 break;
f337b084 2396 }
a6ec74c1 2397 case 'B':
f337b084 2398 case 'b': {
b83604b4 2399 const char *str, *end;
f337b084
TH
2400 I32 l, field_len;
2401 U8 bits;
2402 bool utf8_source;
2403 U32 utf8_flags;
a6ec74c1 2404
fc241834 2405 fromstr = NEXTFROM;
b83604b4 2406 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2407 end = str + fromlen;
2408 if (DO_UTF8(fromstr)) {
2409 utf8_source = TRUE;
041457d9 2410 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2411 } else {
2412 utf8_source = FALSE;
2413 utf8_flags = 0; /* Unused, but keep compilers happy */
2414 }
2415 if (howlen == e_star) len = fromlen;
2416 field_len = (len+7)/8;
2417 GROWING(utf8, cat, start, cur, field_len);
2418 if (len > (I32)fromlen) len = fromlen;
2419 bits = 0;
2420 l = 0;
2421 if (datumtype == 'B')
2422 while (l++ < len) {
2423 if (utf8_source) {
95b63a38 2424 UV val = 0;
f337b084
TH
2425 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2426 bits |= val & 1;
2427 } else bits |= *str++ & 1;
2428 if (l & 7) bits <<= 1;
fc241834 2429 else {
f337b084
TH
2430 PUSH_BYTE(utf8, cur, bits);
2431 bits = 0;
a6ec74c1
JH
2432 }
2433 }
f337b084
TH
2434 else
2435 /* datumtype == 'b' */
2436 while (l++ < len) {
2437 if (utf8_source) {
95b63a38 2438 UV val = 0;
f337b084
TH
2439 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2440 if (val & 1) bits |= 0x80;
2441 } else if (*str++ & 1)
2442 bits |= 0x80;
2443 if (l & 7) bits >>= 1;
fc241834 2444 else {
f337b084
TH
2445 PUSH_BYTE(utf8, cur, bits);
2446 bits = 0;
a6ec74c1
JH
2447 }
2448 }
f337b084
TH
2449 l--;
2450 if (l & 7) {
fc241834 2451 if (datumtype == 'B')
f337b084 2452 bits <<= 7 - (l & 7);
fc241834 2453 else
f337b084
TH
2454 bits >>= 7 - (l & 7);
2455 PUSH_BYTE(utf8, cur, bits);
2456 l += 7;
a6ec74c1 2457 }
f337b084
TH
2458 /* Determine how many chars are left in the requested field */
2459 l /= 8;
2460 if (howlen == e_star) field_len = 0;
2461 else field_len -= l;
2462 Zero(cur, field_len, char);
2463 cur += field_len;
a6ec74c1 2464 break;
f337b084 2465 }
a6ec74c1 2466 case 'H':
f337b084 2467 case 'h': {
10516c54 2468 const char *str, *end;
f337b084
TH
2469 I32 l, field_len;
2470 U8 bits;
2471 bool utf8_source;
2472 U32 utf8_flags;
a6ec74c1 2473
fc241834 2474 fromstr = NEXTFROM;
10516c54 2475 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2476 end = str + fromlen;
2477 if (DO_UTF8(fromstr)) {
2478 utf8_source = TRUE;
041457d9 2479 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2480 } else {
2481 utf8_source = FALSE;
2482 utf8_flags = 0; /* Unused, but keep compilers happy */
2483 }
2484 if (howlen == e_star) len = fromlen;
2485 field_len = (len+1)/2;
2486 GROWING(utf8, cat, start, cur, field_len);
2487 if (!utf8 && len > (I32)fromlen) len = fromlen;
2488 bits = 0;
2489 l = 0;
2490 if (datumtype == 'H')
2491 while (l++ < len) {
2492 if (utf8_source) {
95b63a38 2493 UV val = 0;
f337b084
TH
2494 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2495 if (val < 256 && isALPHA(val))
2496 bits |= (val + 9) & 0xf;
a6ec74c1 2497 else
f337b084
TH
2498 bits |= val & 0xf;
2499 } else if (isALPHA(*str))
2500 bits |= (*str++ + 9) & 0xf;
2501 else
2502 bits |= *str++ & 0xf;
2503 if (l & 1) bits <<= 4;
fc241834 2504 else {
f337b084
TH
2505 PUSH_BYTE(utf8, cur, bits);
2506 bits = 0;
a6ec74c1
JH
2507 }
2508 }
f337b084
TH
2509 else
2510 while (l++ < len) {
2511 if (utf8_source) {
95b63a38 2512 UV val = 0;
f337b084
TH
2513 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2514 if (val < 256 && isALPHA(val))
2515 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2516 else
f337b084
TH
2517 bits |= (val & 0xf) << 4;
2518 } else if (isALPHA(*str))
2519 bits |= ((*str++ + 9) & 0xf) << 4;
2520 else
2521 bits |= (*str++ & 0xf) << 4;
2522 if (l & 1) bits >>= 4;
fc241834 2523 else {
f337b084
TH
2524 PUSH_BYTE(utf8, cur, bits);
2525 bits = 0;
a6ec74c1 2526 }
fc241834 2527 }
f337b084
TH
2528 l--;
2529 if (l & 1) {
2530 PUSH_BYTE(utf8, cur, bits);
2531 l++;
2532 }
2533 /* Determine how many chars are left in the requested field */
2534 l /= 2;
2535 if (howlen == e_star) field_len = 0;
2536 else field_len -= l;
2537 Zero(cur, field_len, char);
2538 cur += field_len;
2539 break;
fc241834
RGS
2540 }
2541 case 'c':
f337b084
TH
2542 while (len-- > 0) {
2543 IV aiv;
2544 fromstr = NEXTFROM;
2545 aiv = SvIV(fromstr);
a2a5de95
NC
2546 if ((-128 > aiv || aiv > 127))
2547 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2548 "Character in 'c' format wrapped in pack");
585ec06d 2549 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2550 }
2551 break;
2552 case 'C':
f337b084
TH
2553 if (len == 0) {
2554 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2555 break;
2556 }
a6ec74c1 2557 while (len-- > 0) {
f337b084 2558 IV aiv;
a6ec74c1 2559 fromstr = NEXTFROM;
f337b084 2560 aiv = SvIV(fromstr);
a2a5de95
NC
2561 if ((0 > aiv || aiv > 0xff))
2562 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2563 "Character in 'C' format wrapped in pack");
1651fc44 2564 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2565 }
fc241834
RGS
2566 break;
2567 case 'W': {
2568 char *end;
670f1322 2569 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2570
2571 end = start+SvLEN(cat)-1;
2572 if (utf8) end -= UTF8_MAXLEN-1;
2573 while (len-- > 0) {
2574 UV auv;
2575 fromstr = NEXTFROM;
2576 auv = SvUV(fromstr);
2577 if (in_bytes) auv = auv % 0x100;
2578 if (utf8) {
2579 W_utf8:
2580 if (cur > end) {
2581 *cur = '\0';
b162af07 2582 SvCUR_set(cat, cur - start);
fc241834
RGS
2583
2584 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2585 end = start+SvLEN(cat)-UTF8_MAXLEN;
2586 }
230e1fce
NC
2587 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2588 NATIVE_TO_UNI(auv),
041457d9 2589 warn_utf8 ?
230e1fce 2590 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2591 } else {
2592 if (auv >= 0x100) {
2593 if (!SvUTF8(cat)) {
2594 *cur = '\0';
b162af07 2595 SvCUR_set(cat, cur - start);
fc241834
RGS
2596 marked_upgrade(aTHX_ cat, symptr);
2597 lookahead.flags |= FLAG_DO_UTF8;
2598 lookahead.strbeg = symptr->strbeg;
2599 utf8 = 1;
2600 start = SvPVX(cat);
2601 cur = start + SvCUR(cat);
2602 end = start+SvLEN(cat)-UTF8_MAXLEN;
2603 goto W_utf8;
2604 }
a2a5de95
NC
2605 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2606 "Character in 'W' format wrapped in pack");
fc241834
RGS
2607 auv &= 0xff;
2608 }
2609 if (cur >= end) {
2610 *cur = '\0';
b162af07 2611 SvCUR_set(cat, cur - start);
fc241834
RGS
2612 GROWING(0, cat, start, cur, len+1);
2613 end = start+SvLEN(cat)-1;
2614 }
fe2774ed 2615 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2616 }
2617 }
2618 break;
fc241834
RGS
2619 }
2620 case 'U': {
2621 char *end;
2622
2623 if (len == 0) {
2624 if (!(symptr->flags & FLAG_DO_UTF8)) {
2625 marked_upgrade(aTHX_ cat, symptr);
2626 lookahead.flags |= FLAG_DO_UTF8;
2627 lookahead.strbeg = symptr->strbeg;
2628 }
2629 utf8 = 0;
2630 goto no_change;
2631 }
2632
2633 end = start+SvLEN(cat);
2634 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2635 while (len-- > 0) {
fc241834 2636 UV auv;
a6ec74c1 2637 fromstr = NEXTFROM;
fc241834
RGS
2638 auv = SvUV(fromstr);
2639 if (utf8) {
230e1fce 2640 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2641 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2642 warn_utf8 ?
fc241834
RGS
2643 0 : UNICODE_ALLOW_ANY);
2644 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2645 *cur = '\0';
b162af07 2646 SvCUR_set(cat, cur - start);
fc241834
RGS
2647 GROWING(0, cat, start, cur,
2648 len+(endb-buffer)*UTF8_EXPAND);
2649 end = start+SvLEN(cat);
2650 }
64844641 2651 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2652 } else {
2653 if (cur >= end) {
2654 *cur = '\0';
b162af07 2655 SvCUR_set(cat, cur - start);
fc241834
RGS
2656 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2657 end = start+SvLEN(cat)-UTF8_MAXLEN;
2658 }
230e1fce 2659 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2660 warn_utf8 ?
230e1fce 2661 0 : UNICODE_ALLOW_ANY);
fc241834 2662 }
a6ec74c1 2663 }
a6ec74c1 2664 break;
fc241834 2665 }
a6ec74c1
JH
2666 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2667 case 'f':
a6ec74c1 2668 while (len-- > 0) {
f337b084
TH
2669 float afloat;
2670 NV anv;
a6ec74c1 2671 fromstr = NEXTFROM;
f337b084 2672 anv = SvNV(fromstr);
85bba25f 2673# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2674 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2675 * on Alpha; fake it if we don't have them.
2676 */
f337b084 2677 if (anv > FLT_MAX)
fc241834 2678 afloat = FLT_MAX;
f337b084 2679 else if (anv < -FLT_MAX)
fc241834 2680 afloat = -FLT_MAX;
f337b084 2681 else afloat = (float)anv;
baf3cf9c 2682# else
f337b084 2683 afloat = (float)anv;
baf3cf9c 2684# endif
2b4ad569 2685 DO_BO_PACK(afloat);
f337b084 2686 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2687 }
2688 break;
2689 case 'd':
a6ec74c1 2690 while (len-- > 0) {
f337b084
TH
2691 double adouble;
2692 NV anv;
a6ec74c1 2693 fromstr = NEXTFROM;
f337b084 2694 anv = SvNV(fromstr);
85bba25f 2695# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2697 * on Alpha; fake it if we don't have them.
2698 */
f337b084 2699 if (anv > DBL_MAX)
fc241834 2700 adouble = DBL_MAX;
f337b084 2701 else if (anv < -DBL_MAX)
fc241834 2702 adouble = -DBL_MAX;
f337b084 2703 else adouble = (double)anv;
baf3cf9c 2704# else
f337b084 2705 adouble = (double)anv;
baf3cf9c 2706# endif
2b4ad569 2707 DO_BO_PACK(adouble);
f337b084 2708 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2709 }
2710 break;
fc241834 2711 case 'F': {
275663fa 2712 NV_bytes anv;
1109a392 2713 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2714 while (len-- > 0) {
2715 fromstr = NEXTFROM;
cd07c537
DM
2716#ifdef __GNUC__
2717 /* to work round a gcc/x86 bug; don't use SvNV */
2718 anv.nv = sv_2nv(fromstr);
2719#else
275663fa 2720 anv.nv = SvNV(fromstr);
cd07c537 2721#endif
2b4ad569 2722 DO_BO_PACK(anv);
275663fa 2723 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2724 }
2725 break;
fc241834 2726 }
92d41999 2727#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2728 case 'D': {
275663fa 2729 ld_bytes aldouble;
1109a392
MHM
2730 /* long doubles can have unused bits, which may be nonzero */
2731 Zero(&aldouble, 1, long double);
92d41999
JH
2732 while (len-- > 0) {
2733 fromstr = NEXTFROM;
cd07c537
DM
2734# ifdef __GNUC__
2735 /* to work round a gcc/x86 bug; don't use SvNV */
2736 aldouble.ld = (long double)sv_2nv(fromstr);
2737# else
275663fa 2738 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2739# endif
2b4ad569 2740 DO_BO_PACK(aldouble);
275663fa 2741 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2742 }
2743 break;
fc241834 2744 }
92d41999 2745#endif
068bd2e7 2746 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2747 case 'n':
2748 while (len-- > 0) {
f337b084 2749 I16 ai16;
a6ec74c1 2750 fromstr = NEXTFROM;
ef108786 2751 ai16 = (I16)SvIV(fromstr);
ef108786 2752 ai16 = PerlSock_htons(ai16);
f337b084 2753 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2754 }
2755 break;
068bd2e7 2756 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2757 case 'v':
2758 while (len-- > 0) {
f337b084 2759 I16 ai16;
a6ec74c1 2760 fromstr = NEXTFROM;
ef108786 2761 ai16 = (I16)SvIV(fromstr);
ef108786 2762 ai16 = htovs(ai16);
f337b084 2763 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2764 }
2765 break;
49704364 2766 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2767#if SHORTSIZE != SIZE16
fc241834 2768 while (len-- > 0) {
f337b084 2769 unsigned short aushort;
fc241834
RGS
2770 fromstr = NEXTFROM;
2771 aushort = SvUV(fromstr);
2b4ad569 2772 DO_BO_PACK(aushort);
f337b084 2773 PUSH_VAR(utf8, cur, aushort);
fc241834 2774 }
49704364
WL
2775 break;
2776#else
2777 /* Fall through! */
a6ec74c1 2778#endif
49704364 2779 case 'S':
fc241834 2780 while (len-- > 0) {
f337b084 2781 U16 au16;
fc241834
RGS
2782 fromstr = NEXTFROM;
2783 au16 = (U16)SvUV(fromstr);
2b4ad569 2784 DO_BO_PACK(au16);
f337b084 2785 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2786 }
2787 break;
49704364 2788 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2789#if SHORTSIZE != SIZE16
fc241834 2790 while (len-- > 0) {
f337b084 2791 short ashort;
fc241834
RGS
2792 fromstr = NEXTFROM;
2793 ashort = SvIV(fromstr);
2b4ad569 2794 DO_BO_PACK(ashort);
f337b084 2795 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2796 }
49704364
WL
2797 break;
2798#else
2799 /* Fall through! */
a6ec74c1 2800#endif
49704364
WL
2801 case 's':
2802 while (len-- > 0) {
f337b084 2803 I16 ai16;
49704364 2804 fromstr = NEXTFROM;
ef108786 2805 ai16 = (I16)SvIV(fromstr);
2b4ad569 2806 DO_BO_PACK(ai16);
f337b084 2807 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2808 }
2809 break;
2810 case 'I':
49704364 2811 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2812 while (len-- > 0) {
f337b084 2813 unsigned int auint;
a6ec74c1
JH
2814 fromstr = NEXTFROM;
2815 auint = SvUV(fromstr);
2b4ad569 2816 DO_BO_PACK(auint);
f337b084 2817 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2818 }
2819 break;
92d41999
JH
2820 case 'j':
2821 while (len-- > 0) {
f337b084 2822 IV aiv;
92d41999
JH
2823 fromstr = NEXTFROM;
2824 aiv = SvIV(fromstr);
2b4ad569 2825 DO_BO_PACK(aiv);
f337b084 2826 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2827 }
2828 break;
2829 case 'J':
2830 while (len-- > 0) {
f337b084 2831 UV auv;
92d41999
JH
2832 fromstr = NEXTFROM;
2833 auv = SvUV(fromstr);
2b4ad569 2834 DO_BO_PACK(auv);
f337b084 2835 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2836 }
2837 break;
a6ec74c1
JH
2838 case 'w':
2839 while (len-- > 0) {
f337b084 2840 NV anv;
a6ec74c1 2841 fromstr = NEXTFROM;
15e9f109 2842 anv = SvNV(fromstr);
a6ec74c1 2843
f337b084
TH
2844 if (anv < 0) {
2845 *cur = '\0';
b162af07 2846 SvCUR_set(cat, cur - start);
49704364 2847 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2848 }
a6ec74c1 2849
196b62db
NC
2850 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2851 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2852 any negative IVs will have already been got by the croak()
2853 above. IOK is untrue for fractions, so we test them
2854 against UV_MAX_P1. */
f337b084
TH
2855 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2856 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2857 char *in = buf + sizeof(buf);
196b62db 2858 UV auv = SvUV(fromstr);
a6ec74c1
JH
2859
2860 do {
eb160463 2861 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2862 auv >>= 7;
2863 } while (auv);
2864 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2865 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2866 in, (buf + sizeof(buf)) - in);
2867 } else if (SvPOKp(fromstr))
2868 goto w_string;
a6ec74c1 2869 else if (SvNOKp(fromstr)) {
0258719b 2870 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2871 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2872 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2873 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2874 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2875 Some C compilers are strict about integral constant
2876 expressions so we conservatively divide by a slightly
2877 smaller integer instead of multiplying by the exact
2878 floating-point value.
0258719b
NC
2879 */
2880#ifdef NV_MAX_10_EXP
f337b084 2881 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2882 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2883#else
f337b084 2884 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2885 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2886#endif
a6ec74c1
JH
2887 char *in = buf + sizeof(buf);
2888
8b6e33c7 2889 anv = Perl_floor(anv);
a6ec74c1 2890 do {
8b6e33c7 2891 const NV next = Perl_floor(anv / 128);
a6ec74c1 2892 if (in <= buf) /* this cannot happen ;-) */
49704364 2893 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2894 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2895 anv = next;
2896 } while (anv > 0);
a6ec74c1 2897 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2898 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2899 in, (buf + sizeof(buf)) - in);
2900 } else {
8b6e33c7
AL
2901 const char *from;
2902 char *result, *in;
735b914b
JH
2903 SV *norm;
2904 STRLEN len;
2905 bool done;
2906
f337b084 2907 w_string:
735b914b 2908 /* Copy string and check for compliance */
349d4f2f 2909 from = SvPV_const(fromstr, len);
735b914b 2910 if ((norm = is_an_int(from, len)) == NULL)
49704364 2911 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2912
a02a5408 2913 Newx(result, len, char);
735b914b
JH
2914 in = result + len;
2915 done = FALSE;
f337b084 2916 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2917 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2918 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2919 in, (result + len) - in);
735b914b
JH
2920 Safefree(result);
2921 SvREFCNT_dec(norm); /* free norm */
fc241834 2922 }
a6ec74c1
JH
2923 }
2924 break;
2925 case 'i':
49704364 2926 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2927 while (len-- > 0) {
f337b084 2928 int aint;
a6ec74c1
JH
2929 fromstr = NEXTFROM;
2930 aint = SvIV(fromstr);
2b4ad569 2931 DO_BO_PACK(aint);
f337b084 2932 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
2933 }
2934 break;
068bd2e7 2935 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2936 case 'N':
2937 while (len-- > 0) {
f337b084 2938 U32 au32;
a6ec74c1 2939 fromstr = NEXTFROM;
ef108786 2940 au32 = SvUV(fromstr);
ef108786 2941 au32 = PerlSock_htonl(au32);
f337b084 2942 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2943 }
2944 break;
068bd2e7 2945 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2946 case 'V':
2947 while (len-- > 0) {
f337b084 2948 U32 au32;
a6ec74c1 2949 fromstr = NEXTFROM;
ef108786 2950 au32 = SvUV(fromstr);
ef108786 2951 au32 = htovl(au32);
f337b084 2952 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2953 }
2954 break;
49704364 2955 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2956#if LONGSIZE != SIZE32
fc241834 2957 while (len-- > 0) {
f337b084 2958 unsigned long aulong;
fc241834
RGS
2959 fromstr = NEXTFROM;
2960 aulong = SvUV(fromstr);
2b4ad569 2961 DO_BO_PACK(aulong);
f337b084 2962 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 2963 }
49704364
WL
2964 break;
2965#else
2966 /* Fall though! */
a6ec74c1 2967#endif
49704364 2968 case 'L':
fc241834 2969 while (len-- > 0) {
f337b084 2970 U32 au32;
fc241834
RGS
2971 fromstr = NEXTFROM;
2972 au32 = SvUV(fromstr);
2b4ad569 2973 DO_BO_PACK(au32);
f337b084 2974 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2975 }
2976 break;
49704364 2977 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2978#if LONGSIZE != SIZE32
fc241834 2979 while (len-- > 0) {
f337b084 2980 long along;
fc241834
RGS
2981 fromstr = NEXTFROM;
2982 along = SvIV(fromstr);
2b4ad569 2983 DO_BO_PACK(along);
f337b084 2984 PUSH_VAR(utf8, cur, along);
a6ec74c1 2985 }
49704364
WL
2986 break;
2987#else
2988 /* Fall though! */
a6ec74c1 2989#endif
49704364
WL
2990 case 'l':
2991 while (len-- > 0) {
f337b084 2992 I32 ai32;
49704364 2993 fromstr = NEXTFROM;
ef108786 2994 ai32 = SvIV(fromstr);
2b4ad569 2995 DO_BO_PACK(ai32);
f337b084 2996 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
2997 }
2998 break;
2999#ifdef HAS_QUAD
3000 case 'Q':
3001 while (len-- > 0) {
f337b084 3002 Uquad_t auquad;
a6ec74c1 3003 fromstr = NEXTFROM;
f337b084 3004 auquad = (Uquad_t) SvUV(fromstr);
2b4ad569 3005 DO_BO_PACK(auquad);
f337b084 3006 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3007 }
3008 break;
3009 case 'q':
3010 while (len-- > 0) {
f337b084 3011 Quad_t aquad;
a6ec74c1
JH
3012 fromstr = NEXTFROM;
3013 aquad = (Quad_t)SvIV(fromstr);
2b4ad569 3014 DO_BO_PACK(aquad);
f337b084 3015 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3016 }
3017 break;
f337b084 3018#endif /* HAS_QUAD */
a6ec74c1
JH
3019 case 'P':
3020 len = 1; /* assume SV is correct length */
f337b084 3021 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3022 /* Fall through! */
a6ec74c1
JH
3023 case 'p':
3024 while (len-- > 0) {
83003860 3025 const char *aptr;
f337b084 3026
a6ec74c1 3027 fromstr = NEXTFROM;
28a4f200
TH
3028 SvGETMAGIC(fromstr);
3029 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3030 else {
a6ec74c1
JH
3031 /* XXX better yet, could spirit away the string to
3032 * a safe spot and hang on to it until the result
3033 * of pack() (and all copies of the result) are
3034 * gone.
3035 */
041457d9 3036 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3037 !SvREADONLY(fromstr)))) {
3038 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3039 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3040 }
3041 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3042 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3043 else
2596d9fe 3044 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3045 }
2b4ad569 3046 DO_BO_PACK(aptr);
f337b084 3047 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3048 }
3049 break;
fc241834 3050 case 'u': {
f7fe979e 3051 const char *aptr, *aend;
fc241834 3052 bool from_utf8;
f337b084 3053
a6ec74c1 3054 fromstr = NEXTFROM;
fc241834
RGS
3055 if (len <= 2) len = 45;
3056 else len = len / 3 * 3;
3057 if (len >= 64) {
a2a5de95
NC
3058 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3059 "Field too wide in 'u' format in pack");
fc241834
RGS
3060 len = 63;
3061 }
83003860 3062 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3063 from_utf8 = DO_UTF8(fromstr);
3064 if (from_utf8) {
3065 aend = aptr + fromlen;
3f63b0e5 3066 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3067 } else aend = NULL; /* Unused, but keep compilers happy */
3068 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3069 while (fromlen > 0) {
fc241834 3070 U8 *end;
a6ec74c1 3071 I32 todo;
fc241834 3072 U8 hunk[1+63/3*4+1];
a6ec74c1 3073
eb160463 3074 if ((I32)fromlen > len)
a6ec74c1
JH
3075 todo = len;
3076 else
3077 todo = fromlen;
fc241834
RGS
3078 if (from_utf8) {
3079 char buffer[64];
3080 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3081 'u' | TYPE_IS_PACK)) {
3082 *cur = '\0';
b162af07 3083 SvCUR_set(cat, cur - start);
5637ef5b
NC
3084 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3085 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3086 aptr, aend, buffer, (long) todo);
fc241834
RGS
3087 }
3088 end = doencodes(hunk, buffer, todo);
3089 } else {
3090 end = doencodes(hunk, aptr, todo);
3091 aptr += todo;
3092 }
3093 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3094 fromlen -= todo;
3095 }
a6ec74c1
JH
3096 break;
3097 }
f337b084
TH
3098 }
3099 *cur = '\0';
b162af07 3100 SvCUR_set(cat, cur - start);
f337b084 3101 no_change:
49704364 3102 *symptr = lookahead;
a6ec74c1 3103 }
49704364 3104 return beglist;
18529408
IZ
3105}
3106#undef NEXTFROM
3107
3108
3109PP(pp_pack)
3110{
97aff369 3111 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3112 SV *cat = TARG;
18529408 3113 STRLEN fromlen;
349d4f2f 3114 SV *pat_sv = *++MARK;
eb578fdb
KW
3115 const char *pat = SvPV_const(pat_sv, fromlen);
3116 const char *patend = pat + fromlen;
18529408
IZ
3117
3118 MARK++;
76f68e9b 3119 sv_setpvs(cat, "");
f337b084 3120 SvUTF8_off(cat);
18529408 3121
7accc089 3122 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3123
a6ec74c1
JH
3124 SvSETMAGIC(cat);
3125 SP = ORIGMARK;
3126 PUSHs(cat);
3127 RETURN;
3128}
a6ec74c1 3129
73cb7263
NC
3130/*
3131 * Local variables:
3132 * c-indentation-style: bsd
3133 * c-basic-offset: 4
14d04a33 3134 * indent-tabs-mode: nil
73cb7263
NC
3135 * End:
3136 *
14d04a33 3137 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3138 */