This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Pass 'needs_swap' as a parameter to SHIFT_BYTES.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
133#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3 134
a1219b5e
NC
135#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
137#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
139#else
140# error "Unsupported byteorder"
20aa3a38
NC
141 /* Need to add code here to re-instate mixed endian support.
142 NEEDS_SWAP would need to hold a flag indicating which action to
143 take, and S_reverse_copy and the code in uni_to_bytes would need
144 logic adding to deal with any mixed-endian transformations needed.
145 */
a1219b5e
NC
146#endif
147
08ca2aa3 148/* Only to be used inside a loop (see the break) */
aaec8192 149#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084 152 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 153 (char *) (buf), len, datumtype)) break; \
08ca2aa3 154 } else { \
20aa3a38
NC
155 if (needs_swap) \
156 S_reverse_copy(s, (char *) (buf), len); \
157 else \
158 Copy(s, (char *) (buf), len, char); \
275663fa 159 s += len; \
08ca2aa3 160 } \
08ca2aa3
TH
161} STMT_END
162
aaec8192
NC
163#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
164 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 165
aaec8192
NC
166#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
167 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 168
aaec8192
NC
169#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
170 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 171
f337b084 172#define PUSH_VAR(utf8, aptr, var) \
230e1fce 173 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 174
49704364
LW
175/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
176#define MAX_SUB_TEMPLATE_LEVEL 100
177
66c611c5 178/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
179#define FLAG_WAS_UTF8 0x40
180#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 181#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 182#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
183#define FLAG_SLASH 0x04
184#define FLAG_COMMA 0x02
185#define FLAG_PACK 0x01
186
a6ec74c1
JH
187STATIC SV *
188S_mul128(pTHX_ SV *sv, U8 m)
189{
190 STRLEN len;
191 char *s = SvPV(sv, len);
192 char *t;
a6ec74c1 193
7918f24d
NC
194 PERL_ARGS_ASSERT_MUL128;
195
a6ec74c1 196 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 197 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
198
199 sv_catsv(tmpNew, sv);
200 SvREFCNT_dec(sv); /* free old sv */
201 sv = tmpNew;
202 s = SvPV(sv, len);
203 }
204 t = s + len - 1;
205 while (!*t) /* trailing '\0'? */
206 t--;
207 while (t > s) {
f7fe979e 208 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
209 *(t--) = '0' + (char)(i % 10);
210 m = (char)(i / 10);
a6ec74c1
JH
211 }
212 return (sv);
213}
214
215/* Explosives and implosives. */
216
217#if 'I' == 73 && 'J' == 74
218/* On an ASCII/ISO kind of system */
219#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220#else
221/*
222 Some other sort of character set - use memchr() so we don't match
223 the null byte.
224 */
225#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
226#endif
227
66c611c5 228/* type modifiers */
62f95557 229#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
230#define TYPE_IS_BIG_ENDIAN 0x200
231#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 232#define TYPE_IS_PACK 0x800
1109a392 233#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 234#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
235#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
236
7212898e
NC
237# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
238# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
239
240# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
241
2b4ad569 242# define DO_BO_PACK(var) \
3cc90e2e 243 STMT_START { \
a1219b5e 244 if (needs_swap) { \
e8fda8c4 245 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
246 } \
247 } STMT_END
248
78d46eaa 249#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 250#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
251#define PACK_SIZE_MASK 0x3F
252
298bc19c 253#include "packsizetables.c"
78d46eaa 254
20aa3a38
NC
255static void
256S_reverse_copy(const char *src, char *dest, STRLEN len)
257{
258 dest += len;
259 while (len--)
260 *--dest = *src++;
261}
262
08ca2aa3 263STATIC U8
f7fe979e 264uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 265{
08ca2aa3 266 STRLEN retlen;
0bcc34c2 267 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 269 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
270 warnings), but these two mean we make no progress in the string and
271 might enter an infinite loop */
272 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
273 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
274 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 275 if (val >= 0x100) {
a2a5de95
NC
276 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
277 "Character in '%c' format wrapped in unpack",
278 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
279 val &= 0xff;
280 }
281 *s += retlen;
fe2774ed 282 return (U8)val;
08ca2aa3
TH
283}
284
f337b084
TH
285#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
286 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
287 *(U8 *)(s)++)
288
289STATIC bool
f7fe979e 290uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
291{
292 UV val;
293 STRLEN retlen;
f7fe979e 294 const char *from = *s;
08ca2aa3 295 int bad = 0;
f7fe979e 296 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 297 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
298 const bool needs_swap = NEEDS_SWAP(datumtype);
299
300 if (needs_swap)
301 buf += buf_len;
302
08ca2aa3
TH
303 for (;buf_len > 0; buf_len--) {
304 if (from >= end) return FALSE;
f337b084 305 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
306 if (retlen == (STRLEN) -1 || retlen == 0) {
307 from += UTF8SKIP(from);
308 bad |= 1;
309 } else from += retlen;
310 if (val >= 0x100) {
311 bad |= 2;
312 val &= 0xff;
313 }
20aa3a38
NC
314 if (needs_swap)
315 *(U8 *)--buf = (U8)val;
316 else
317 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
318 }
319 /* We have enough characters for the buffer. Did we have problems ? */
320 if (bad) {
321 if (bad & 1) {
322 /* Rewalk the string fragment while warning */
f7fe979e 323 const char *ptr;
9e27e96a 324 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
325 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
326 if (ptr >= end) break;
f337b084 327 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
328 }
329 if (from > end) from = end;
330 }
a2a5de95
NC
331 if ((bad & 2))
332 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 333 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
334 "Character(s) in '%c' format wrapped in %s",
335 (int) TYPE_NO_MODIFIERS(datumtype),
336 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
337 }
338 *s = from;
339 return TRUE;
340}
341
342STATIC bool
f7fe979e 343next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 344{
97aff369 345 dVAR;
08ca2aa3 346 STRLEN retlen;
0bcc34c2 347 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
348 if (val >= 0x100 || !ISUUCHAR(val) ||
349 retlen == (STRLEN) -1 || retlen == 0) {
350 *out = 0;
351 return FALSE;
352 }
353 *out = PL_uudmap[val] & 077;
f337b084 354 *s += retlen;
08ca2aa3
TH
355 return TRUE;
356}
78d46eaa 357
64844641 358STATIC char *
14333449 359S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 360 const U8 * const end = start + len;
64844641 361
7918f24d
NC
362 PERL_ARGS_ASSERT_BYTES_TO_UNI;
363
f337b084 364 while (start < end) {
48fa4626
ST
365 const UV uv = NATIVE_TO_ASCII(*start);
366 if (UNI_IS_INVARIANT(uv))
367 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
368 else {
369 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
370 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
371 }
372 start++;
373 }
64844641 374 return dest;
f337b084
TH
375}
376
230e1fce
NC
377#define PUSH_BYTES(utf8, cur, buf, len) \
378STMT_START { \
64844641
AL
379 if (utf8) \
380 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
381 else { \
382 Copy(buf, cur, len, char); \
383 (cur) += (len); \
384 } \
f337b084
TH
385} STMT_END
386
387#define GROWING(utf8, cat, start, cur, in_len) \
388STMT_START { \
389 STRLEN glen = (in_len); \
3473cf63 390 if (utf8) glen *= UTF8_EXPAND; \
f337b084 391 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 392 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
393 (cur) = (start) + SvCUR(cat); \
394 } \
395} STMT_END
396
397#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
398STMT_START { \
f7fe979e 399 const STRLEN glen = (in_len); \
f337b084 400 STRLEN gl = glen; \
3473cf63 401 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
402 if ((cur) + gl >= (start) + SvLEN(cat)) { \
403 *cur = '\0'; \
b162af07 404 SvCUR_set((cat), (cur) - (start)); \
0bd48802 405 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
406 (cur) = (start) + SvCUR(cat); \
407 } \
408 PUSH_BYTES(utf8, cur, buf, glen); \
409} STMT_END
410
411#define PUSH_BYTE(utf8, s, byte) \
412STMT_START { \
413 if (utf8) { \
f7fe979e 414 const U8 au8 = (byte); \
64844641 415 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
416 } else *(U8 *)(s)++ = (byte); \
417} STMT_END
418
419/* Only to be used inside a loop (see the break) */
420#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
421STMT_START { \
422 STRLEN retlen; \
423 if (str >= end) break; \
424 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
425 if (retlen == (STRLEN) -1 || retlen == 0) { \
426 *cur = '\0'; \
427 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
428 } \
429 str += retlen; \
430} STMT_END
431
f7fe979e
AL
432static const char *_action( const tempsym_t* symptr )
433{
10edeb5d 434 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
435}
436
206947d2 437/* Returns the sizeof() struct described by pat */
028d1f6d 438STATIC I32
f337b084 439S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 440{
f337b084 441 I32 total = 0;
206947d2 442
7918f24d
NC
443 PERL_ARGS_ASSERT_MEASURE_STRUCT;
444
49704364 445 while (next_symbol(symptr)) {
f337b084 446 I32 len;
f7fe979e 447 int size;
f337b084
TH
448
449 switch (symptr->howlen) {
fc241834 450 case e_star:
49704364 451 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 452 _action( symptr ) );
49704364 453 break;
f337b084
TH
454 default:
455 /* e_no_len and e_number */
456 len = symptr->length;
457 break;
49704364
LW
458 }
459
a7a3cfaa 460 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 461 if (!size) {
f7fe979e 462 int star;
80a13697
NC
463 /* endianness doesn't influence the size of a type */
464 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
465 default:
466 Perl_croak(aTHX_ "Invalid type '%c' in %s",
467 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 468 _action( symptr ) );
28be1210
TH
469 case '.' | TYPE_IS_SHRIEKING:
470 case '@' | TYPE_IS_SHRIEKING:
80a13697 471 case '@':
28be1210 472 case '.':
80a13697
NC
473 case '/':
474 case 'U': /* XXXX Is it correct? */
475 case 'w':
476 case 'u':
477 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 478 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 479 _action( symptr ) );
80a13697
NC
480 case '%':
481 size = 0;
482 break;
483 case '(':
fc241834
RGS
484 {
485 tempsym_t savsym = *symptr;
486 symptr->patptr = savsym.grpbeg;
487 symptr->patend = savsym.grpend;
488 /* XXXX Theoretically, we need to measure many times at
489 different positions, since the subexpression may contain
490 alignment commands, but be not of aligned length.
491 Need to detect this and croak(). */
492 size = measure_struct(symptr);
493 *symptr = savsym;
494 break;
495 }
80a13697
NC
496 case 'X' | TYPE_IS_SHRIEKING:
497 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
498 */
499 if (!len) /* Avoid division by 0 */
500 len = 1;
501 len = total % len; /* Assumed: the start is aligned. */
502 /* FALL THROUGH */
503 case 'X':
504 size = -1;
505 if (total < len)
f7fe979e 506 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
507 break;
508 case 'x' | TYPE_IS_SHRIEKING:
509 if (!len) /* Avoid division by 0 */
510 len = 1;
511 star = total % len; /* Assumed: the start is aligned. */
512 if (star) /* Other portable ways? */
513 len = len - star;
514 else
515 len = 0;
516 /* FALL THROUGH */
517 case 'x':
518 case 'A':
519 case 'Z':
520 case 'a':
80a13697
NC
521 size = 1;
522 break;
523 case 'B':
524 case 'b':
525 len = (len + 7)/8;
526 size = 1;
527 break;
528 case 'H':
529 case 'h':
530 len = (len + 1)/2;
531 size = 1;
532 break;
78d46eaa 533
80a13697
NC
534 case 'P':
535 len = 1;
536 size = sizeof(char*);
78d46eaa
NC
537 break;
538 }
206947d2
IZ
539 }
540 total += len * size;
541 }
542 return total;
543}
544
49704364
LW
545
546/* locate matching closing parenthesis or bracket
547 * returns char pointer to char after match, or NULL
548 */
f7fe979e 549STATIC const char *
5aaab254 550S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 551{
7918f24d
NC
552 PERL_ARGS_ASSERT_GROUP_END;
553
49704364 554 while (patptr < patend) {
f7fe979e 555 const char c = *patptr++;
49704364
LW
556
557 if (isSPACE(c))
558 continue;
559 else if (c == ender)
560 return patptr-1;
561 else if (c == '#') {
562 while (patptr < patend && *patptr != '\n')
563 patptr++;
564 continue;
565 } else if (c == '(')
566 patptr = group_end(patptr, patend, ')') + 1;
567 else if (c == '[')
568 patptr = group_end(patptr, patend, ']') + 1;
18529408 569 }
49704364
LW
570 Perl_croak(aTHX_ "No group ending character '%c' found in template",
571 ender);
572 return 0;
18529408
IZ
573}
574
49704364
LW
575
576/* Convert unsigned decimal number to binary.
577 * Expects a pointer to the first digit and address of length variable
578 * Advances char pointer to 1st non-digit char and returns number
fc241834 579 */
f7fe979e 580STATIC const char *
5aaab254 581S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
582{
583 I32 len = *patptr++ - '0';
7918f24d
NC
584
585 PERL_ARGS_ASSERT_GET_NUM;
586
49704364
LW
587 while (isDIGIT(*patptr)) {
588 if (len >= 0x7FFFFFFF/10)
589 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
590 len = (len * 10) + (*patptr++ - '0');
591 }
592 *lenptr = len;
593 return patptr;
594}
595
596/* The marvellous template parsing routine: Using state stored in *symptr,
597 * locates next template code and count
598 */
599STATIC bool
f337b084 600S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 601{
f7fe979e 602 const char* patptr = symptr->patptr;
0bcc34c2 603 const char* const patend = symptr->patend;
49704364 604
7918f24d
NC
605 PERL_ARGS_ASSERT_NEXT_SYMBOL;
606
49704364
LW
607 symptr->flags &= ~FLAG_SLASH;
608
609 while (patptr < patend) {
610 if (isSPACE(*patptr))
611 patptr++;
612 else if (*patptr == '#') {
613 patptr++;
614 while (patptr < patend && *patptr != '\n')
615 patptr++;
616 if (patptr < patend)
617 patptr++;
618 } else {
fc241834 619 /* We should have found a template code */
49704364 620 I32 code = *patptr++ & 0xFF;
66c611c5 621 U32 inherited_modifiers = 0;
49704364
LW
622
623 if (code == ','){ /* grandfather in commas but with a warning */
624 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
625 symptr->flags |= FLAG_COMMA;
626 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 627 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
628 }
629 continue;
630 }
fc241834 631
49704364 632 /* for '(', skip to ')' */
fc241834 633 if (code == '(') {
49704364
LW
634 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
635 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 636 _action( symptr ) );
49704364
LW
637 symptr->grpbeg = patptr;
638 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
639 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
640 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 641 _action( symptr ) );
49704364
LW
642 }
643
66c611c5
MHM
644 /* look for group modifiers to inherit */
645 if (TYPE_ENDIANNESS(symptr->flags)) {
646 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
647 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
648 }
649
1109a392
MHM
650 /* look for modifiers */
651 while (patptr < patend) {
b7787f18
AL
652 const char *allowed;
653 I32 modifier;
1109a392
MHM
654 switch (*patptr) {
655 case '!':
656 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 657 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
658 break;
659 case '>':
660 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 661 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
662 break;
663 case '<':
664 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 665 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
666 break;
667 default:
b7787f18
AL
668 allowed = "";
669 modifier = 0;
1109a392
MHM
670 break;
671 }
66c611c5 672
1109a392
MHM
673 if (modifier == 0)
674 break;
66c611c5 675
1109a392
MHM
676 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
677 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 678 allowed, _action( symptr ) );
66c611c5
MHM
679
680 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 681 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 682 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
683 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
684 TYPE_ENDIANNESS_MASK)
685 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 686 *patptr, _action( symptr ) );
66c611c5 687
a2a5de95
NC
688 if ((code & modifier)) {
689 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
690 "Duplicate modifier '%c' after '%c' in %s",
691 *patptr, (int) TYPE_NO_MODIFIERS(code),
692 _action( symptr ) );
1109a392 693 }
66c611c5 694
1109a392
MHM
695 code |= modifier;
696 patptr++;
49704364
LW
697 }
698
66c611c5
MHM
699 /* inherit modifiers */
700 code |= inherited_modifiers;
701
fc241834 702 /* look for count and/or / */
49704364
LW
703 if (patptr < patend) {
704 if (isDIGIT(*patptr)) {
705 patptr = get_num( patptr, &symptr->length );
706 symptr->howlen = e_number;
707
708 } else if (*patptr == '*') {
709 patptr++;
710 symptr->howlen = e_star;
711
712 } else if (*patptr == '[') {
f7fe979e 713 const char* lenptr = ++patptr;
49704364
LW
714 symptr->howlen = e_number;
715 patptr = group_end( patptr, patend, ']' ) + 1;
716 /* what kind of [] is it? */
717 if (isDIGIT(*lenptr)) {
718 lenptr = get_num( lenptr, &symptr->length );
719 if( *lenptr != ']' )
720 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 721 _action( symptr ) );
49704364
LW
722 } else {
723 tempsym_t savsym = *symptr;
724 symptr->patend = patptr-1;
725 symptr->patptr = lenptr;
726 savsym.length = measure_struct(symptr);
727 *symptr = savsym;
728 }
729 } else {
730 symptr->howlen = e_no_len;
731 symptr->length = 1;
732 }
733
734 /* try to find / */
735 while (patptr < patend) {
736 if (isSPACE(*patptr))
737 patptr++;
738 else if (*patptr == '#') {
739 patptr++;
740 while (patptr < patend && *patptr != '\n')
741 patptr++;
742 if (patptr < patend)
743 patptr++;
744 } else {
66c611c5 745 if (*patptr == '/') {
49704364
LW
746 symptr->flags |= FLAG_SLASH;
747 patptr++;
66c611c5
MHM
748 if (patptr < patend &&
749 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 750 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 751 _action( symptr ) );
49704364
LW
752 }
753 break;
754 }
18529408 755 }
49704364
LW
756 } else {
757 /* at end - no count, no / */
758 symptr->howlen = e_no_len;
759 symptr->length = 1;
760 }
761
762 symptr->code = code;
fc241834 763 symptr->patptr = patptr;
49704364 764 return TRUE;
18529408 765 }
49704364 766 }
fc241834 767 symptr->patptr = patptr;
49704364 768 return FALSE;
18529408
IZ
769}
770
18529408 771/*
fc241834 772 There is no way to cleanly handle the case where we should process the
08ca2aa3 773 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
774 (e.g. estimates like strend-s as an upper bound for the number of
775 characters left wouldn't work). So if we foresee the need of this
776 (pattern starts with U or contains U0), we want to work on the encoded
777 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
778 themselves if they need to do a lot of unpacks like this on it
779*/
fc241834 780STATIC bool
08ca2aa3
TH
781need_utf8(const char *pat, const char *patend)
782{
783 bool first = TRUE;
7918f24d
NC
784
785 PERL_ARGS_ASSERT_NEED_UTF8;
786
08ca2aa3
TH
787 while (pat < patend) {
788 if (pat[0] == '#') {
789 pat++;
f7fe979e 790 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
791 if (!pat) return FALSE;
792 } else if (pat[0] == 'U') {
793 if (first || pat[1] == '0') return TRUE;
794 } else first = FALSE;
795 pat++;
796 }
797 return FALSE;
798}
799
800STATIC char
801first_symbol(const char *pat, const char *patend) {
7918f24d
NC
802 PERL_ARGS_ASSERT_FIRST_SYMBOL;
803
08ca2aa3
TH
804 while (pat < patend) {
805 if (pat[0] != '#') return pat[0];
806 pat++;
f7fe979e 807 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
808 if (!pat) return 0;
809 pat++;
810 }
811 return 0;
812}
813
814/*
7accc089
JH
815=for apidoc unpackstring
816
21ebfc7a
DM
817The engine implementing the unpack() Perl function.
818
819Using the template pat..patend, this function unpacks the string
820s..strend into a number of mortal SVs, which it pushes onto the perl
821argument (@_) stack (so you will need to issue a C<PUTBACK> before and
822C<SPAGAIN> after the call to this function). It returns the number of
823pushed elements.
824
825The strend and patend pointers should point to the byte following the last
826character of each string.
827
828Although this function returns its values on the perl argument stack, it
829doesn't take any parameters from that stack (and thus in particular
830there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
831example).
7accc089
JH
832
833=cut */
834
835I32
f7fe979e 836Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 837{
f7fe979e 838 tempsym_t sym;
08ca2aa3 839
7918f24d
NC
840 PERL_ARGS_ASSERT_UNPACKSTRING;
841
f337b084 842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
230e1fce 847 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
848 SAVEFREEPV(s);
849 strend = s + len;
f337b084 850 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
851 }
852
f337b084
TH
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
08ca2aa3 855
f7fe979e 856 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
857
858 return unpack_rec(&sym, s, s, strend, NULL );
859}
860
4136a0f7 861STATIC I32
f7fe979e 862S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 863{
27da23d5 864 dVAR; dSP;
3297d27d 865 SV *sv = NULL;
f7fe979e 866 const I32 start_sp_offset = SP - PL_stack_base;
49704364 867 howlen_t howlen;
a6ec74c1 868 I32 checksum = 0;
92d41999 869 UV cuv = 0;
a6ec74c1 870 NV cdouble = 0.0;
f337b084 871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 872 bool beyond = FALSE;
21c16052 873 bool explicit_length;
9e27e96a 874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
876
877 PERL_ARGS_ASSERT_UNPACK_REC;
878
28be1210 879 symptr->strbeg = s - strbeg;
49704364 880
49704364 881 while (next_symbol(symptr)) {
a7a3cfaa 882 packprops_t props;
9e27e96a 883 I32 len;
f337b084 884 I32 datumtype = symptr->code;
a1219b5e 885 bool needs_swap;
206947d2 886 /* do first one only unless in list context
08ca2aa3 887 / is implemented by unpacking the count, then popping it from the
206947d2 888 stack, so must check that we're not in the middle of a / */
49704364 889 if ( unpack_only_one
206947d2 890 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 891 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 892 break;
49704364 893
f337b084 894 switch (howlen = symptr->howlen) {
fc241834
RGS
895 case e_star:
896 len = strend - strbeg; /* long enough */
49704364 897 break;
f337b084
TH
898 default:
899 /* e_no_len and e_number */
900 len = symptr->length;
901 break;
49704364 902 }
18529408 903
21c16052 904 explicit_length = TRUE;
a6ec74c1 905 redo_switch:
49704364 906 beyond = s >= strend;
a7a3cfaa
TH
907
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
909 if (props) {
910 /* props nonzero means we can process this letter. */
9e27e96a
AL
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
a7a3cfaa
TH
913 if (len > howmany)
914 len = howmany;
915
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
918 EXTEND(SP, len);
919 EXTEND_MORTAL(len);
78d46eaa
NC
920 }
921 }
a7a3cfaa 922
a1219b5e
NC
923 needs_swap = NEEDS_SWAP(datumtype);
924
1109a392 925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 926 default:
1109a392 927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 928
a6ec74c1 929 case '%':
49704364 930 if (howlen == e_no_len)
18529408 931 len = 16; /* len is not specified */
a6ec74c1 932 checksum = len;
92d41999 933 cuv = 0;
a6ec74c1 934 cdouble = 0;
18529408 935 continue;
a6ec74c1 936 break;
18529408
IZ
937 case '(':
938 {
49704364 939 tempsym_t savsym = *symptr;
9e27e96a 940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 941 symptr->flags |= group_modifiers;
49704364 942 symptr->patend = savsym.grpend;
28be1210 943 symptr->previous = &savsym;
49704364 944 symptr->level++;
18529408 945 PUTBACK;
c6f750d1 946 if (len && unpack_only_one) len = 1;
18529408 947 while (len--) {
49704364 948 symptr->patptr = savsym.grpbeg;
f337b084
TH
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
49704364 953 break; /* No way to continue */
18529408
IZ
954 }
955 SPAGAIN;
28be1210 956 savsym.flags = symptr->flags & ~group_modifiers;
49704364 957 *symptr = savsym;
18529408
IZ
958 break;
959 }
28be1210 960 case '.' | TYPE_IS_SHRIEKING:
28be1210 961 case '.': {
9e27e96a 962 const char *from;
28be1210 963 SV *sv;
9e27e96a 964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
967 else {
968 tempsym_t *group = symptr;
969
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
972 }
973 sv = from <= s ?
00646304
CB
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 976 mXPUSHs(sv);
28be1210
TH
977 break;
978 }
28be1210 979 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 980 case '@':
28be1210 981 s = strbeg + symptr->strbeg;
28be1210 982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 983 {
08ca2aa3
TH
984 while (len > 0) {
985 if (s >= strend)
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
987 s += UTF8SKIP(s);
988 len--;
989 }
990 if (s > strend)
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 } else {
28be1210 993 if (strend-s < len)
fc241834 994 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 995 s += len;
08ca2aa3 996 }
a6ec74c1 997 break;
62f95557
IZ
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1000 len = 1;
08ca2aa3 1001 if (utf8) {
f7fe979e 1002 const char *hop, *last;
f337b084
TH
1003 I32 l = len;
1004 hop = last = strbeg;
1005 while (hop < s) {
1006 hop += UTF8SKIP(hop);
1007 if (--l == 0) {
08ca2aa3 1008 last = hop;
f337b084
TH
1009 l = len;
1010 }
fc241834 1011 }
f337b084
TH
1012 if (last > s)
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1014 s = last;
1015 break;
f337b084
TH
1016 }
1017 len = (s - strbeg) % len;
62f95557 1018 /* FALL THROUGH */
a6ec74c1 1019 case 'X':
08ca2aa3
TH
1020 if (utf8) {
1021 while (len > 0) {
1022 if (s <= strbeg)
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1025 if (s <= strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1027 }
1028 len--;
1029 }
1030 } else {
fc241834
RGS
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033 s -= len;
08ca2aa3 1034 }
a6ec74c1 1035 break;
9e27e96a
AL
1036 case 'x' | TYPE_IS_SHRIEKING: {
1037 I32 ai32;
62f95557
IZ
1038 if (!len) /* Avoid division by 0 */
1039 len = 1;
230e1fce
NC
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1042 if (ai32 == 0) break;
1043 len -= ai32;
9e27e96a 1044 }
62f95557 1045 /* FALL THROUGH */
a6ec74c1 1046 case 'x':
08ca2aa3
TH
1047 if (utf8) {
1048 while (len>0) {
1049 if (s >= strend)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += UTF8SKIP(s);
1052 len--;
1053 }
1054 } else {
fc241834
RGS
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1057 s += len;
f337b084 1058 }
a6ec74c1
JH
1059 break;
1060 case '/':
49704364
LW
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1062 break;
a6ec74c1
JH
1063 case 'A':
1064 case 'Z':
1065 case 'a':
08ca2aa3
TH
1066 if (checksum) {
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1069 goto W_checksum;
1070 }
1071 if (utf8) {
1072 I32 l;
f7fe979e 1073 const char *hop;
08ca2aa3
TH
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1076 if (hop > strend)
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 break;
fc241834 1079 }
a6ec74c1 1080 }
08ca2aa3
TH
1081 if (hop > strend)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 len = hop - s;
1084 } else if (len > strend - s)
1085 len = strend - s;
1086
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
f7fe979e 1089 const char *ptr, *end;
f337b084
TH
1090 end = s + len;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
f7fe979e 1097 const char *ptr;
18bdf90a
TH
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1101 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 else ptr++;
28be1210 1104 if (ptr > s+len)
18bdf90a
TH
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1106 } else {
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 ptr++;
1110 }
08ca2aa3
TH
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1113
1114 if (utf8) {
1115 SvUTF8_on(sv);
1116 /* Undo any upgrade done due to need_utf8() */
f337b084 1117 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1118 sv_utf8_downgrade(sv, 0);
a6ec74c1 1119 }
6e449a3a 1120 mXPUSHs(sv);
08ca2aa3 1121 s += len;
a6ec74c1
JH
1122 break;
1123 case 'B':
08ca2aa3
TH
1124 case 'b': {
1125 char *str;
49704364 1126 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1127 len = (strend - s) * 8;
1128 if (checksum) {
f337b084 1129 if (utf8)
08ca2aa3 1130 while (len >= 8 && s < strend) {
f337b084 1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1132 len -= 8;
1133 }
f337b084 1134 else
fc241834 1135 while (len >= 8) {
08ca2aa3 1136 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1137 len -= 8;
1138 }
08ca2aa3
TH
1139 if (len && s < strend) {
1140 U8 bits;
f337b084
TH
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
a6ec74c1 1143 while (len-- > 0) {
92d41999 1144 if (bits & 1) cuv++;
a6ec74c1
JH
1145 bits >>= 1;
1146 }
f337b084 1147 else
a6ec74c1 1148 while (len-- > 0) {
08ca2aa3 1149 if (bits & 0x80) cuv++;
a6ec74c1
JH
1150 bits <<= 1;
1151 }
fc241834 1152 }
a6ec74c1
JH
1153 break;
1154 }
08ca2aa3 1155
561b68a9 1156 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1157 SvPOK_on(sv);
1158 str = SvPVX(sv);
1159 if (datumtype == 'b') {
f337b084 1160 U8 bits = 0;
f7fe979e 1161 const I32 ai32 = len;
08ca2aa3
TH
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1164 else if (utf8) {
1165 if (s >= strend) break;
f337b084 1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1169 }
08ca2aa3 1170 } else {
f337b084 1171 U8 bits = 0;
f7fe979e 1172 const I32 ai32 = len;
08ca2aa3
TH
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1175 else if (utf8) {
1176 if (s >= strend) break;
f337b084 1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1180 }
1181 }
1182 *str = '\0';
aa07b2f6 1183 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1184 XPUSHs(sv);
a6ec74c1 1185 break;
08ca2aa3 1186 }
a6ec74c1 1187 case 'H':
08ca2aa3 1188 case 'h': {
3297d27d 1189 char *str = NULL;
fc241834 1190 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1191 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1192 len = (strend - s) * 2;
858fe5e1
TC
1193 if (!checksum) {
1194 sv = sv_2mortal(newSV(len ? len : 1));
1195 SvPOK_on(sv);
1196 str = SvPVX(sv);
1197 }
a6ec74c1 1198 if (datumtype == 'h') {
f337b084 1199 U8 bits = 0;
9e27e96a 1200 I32 ai32 = len;
fc241834
RGS
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1203 else if (utf8) {
1204 if (s >= strend) break;
f337b084 1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1206 } else bits = * (U8 *) s++;
858fe5e1
TC
1207 if (!checksum)
1208 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1209 }
08ca2aa3 1210 } else {
f337b084 1211 U8 bits = 0;
f7fe979e 1212 const I32 ai32 = len;
08ca2aa3
TH
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1215 else if (utf8) {
1216 if (s >= strend) break;
f337b084 1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1218 } else bits = *(U8 *) s++;
858fe5e1
TC
1219 if (!checksum)
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1221 }
1222 }
858fe5e1
TC
1223 if (!checksum) {
1224 *str = '\0';
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1226 XPUSHs(sv);
1227 }
a6ec74c1 1228 break;
08ca2aa3 1229 }
1651fc44
ML
1230 case 'C':
1231 if (len == 0) {
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1235 break;
1236 }
1237 /* FALL THROUGH */
a6ec74c1 1238 case 'c':
1651fc44
ML
1239 while (len-- > 0 && s < strend) {
1240 int aint;
1241 if (utf8)
1242 {
1243 STRLEN retlen;
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 }
1250 else
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1253 aint -= 256;
08ca2aa3 1254 if (!checksum)
6e449a3a 1255 mPUSHi(aint);
73cb7263
NC
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1258 else
1259 cuv += aint;
a6ec74c1
JH
1260 }
1261 break;
08ca2aa3
TH
1262 case 'W':
1263 W_checksum:
1651fc44 1264 if (utf8) {
08ca2aa3 1265 while (len-- > 0 && s < strend) {
08ca2aa3 1266 STRLEN retlen;
f7fe979e 1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 s += retlen;
1272 if (!checksum)
6e449a3a 1273 mPUSHu(val);
08ca2aa3
TH
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
d6d3e8bd 1276 else
08ca2aa3 1277 cuv += val;
fc241834 1278 }
08ca2aa3 1279 } else if (!checksum)
a6ec74c1 1280 while (len-- > 0) {
f7fe979e 1281 const U8 ch = *(U8 *) s++;
6e449a3a 1282 mPUSHu(ch);
a6ec74c1 1283 }
08ca2aa3
TH
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1286 else
1287 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1288 break;
1289 case 'U':
35bcd338 1290 if (len == 0) {
c5333953 1291 if (explicit_length && howlen != e_star) {
08ca2aa3 1292 /* Switch to "bytes in UTF-8" mode */
f337b084 1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1294 else
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1297 }
35bcd338
JH
1298 break;
1299 }
08ca2aa3 1300 if (len > strend - s) len = strend - s;
fc241834 1301 if (!checksum) {
08ca2aa3
TH
1302 if (len && unpack_only_one) len = 1;
1303 EXTEND(SP, len);
1304 EXTEND_MORTAL(len);
fc241834 1305 }
08ca2aa3
TH
1306 while (len-- > 0 && s < strend) {
1307 STRLEN retlen;
1308 UV auv;
1309 if (utf8) {
1310 U8 result[UTF8_MAXLEN];
f7fe979e 1311 const char *ptr = s;
08ca2aa3 1312 STRLEN len;
08ca2aa3
TH
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
230e1fce
NC
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 'U'))
08ca2aa3
TH
1317 break;
1318 len = UTF8SKIP(result);
fc241834 1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1320 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1321 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1322 s = ptr;
1323 } else {
cde84f2a 1324 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1325 if (retlen == (STRLEN) -1 || retlen == 0)
1326 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1327 s += retlen;
1328 }
1329 if (!checksum)
6e449a3a 1330 mPUSHu(auv);
73cb7263 1331 else if (checksum > bits_in_uv)
08ca2aa3 1332 cdouble += (NV) auv;
73cb7263 1333 else
08ca2aa3 1334 cuv += auv;
a6ec74c1
JH
1335 }
1336 break;
49704364
LW
1337 case 's' | TYPE_IS_SHRIEKING:
1338#if SHORTSIZE != SIZE16
73cb7263 1339 while (len-- > 0) {
08ca2aa3 1340 short ashort;
aaec8192 1341 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1342 if (!checksum)
6e449a3a 1343 mPUSHi(ashort);
73cb7263
NC
1344 else if (checksum > bits_in_uv)
1345 cdouble += (NV)ashort;
1346 else
1347 cuv += ashort;
49704364
LW
1348 }
1349 break;
1350#else
1351 /* Fallthrough! */
a6ec74c1 1352#endif
49704364 1353 case 's':
73cb7263 1354 while (len-- > 0) {
08ca2aa3
TH
1355 I16 ai16;
1356
1357#if U16SIZE > SIZE16
1358 ai16 = 0;
1359#endif
aaec8192 1360 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1361#if U16SIZE > SIZE16
73cb7263
NC
1362 if (ai16 > 32767)
1363 ai16 -= 65536;
a6ec74c1 1364#endif
08ca2aa3 1365 if (!checksum)
6e449a3a 1366 mPUSHi(ai16);
73cb7263
NC
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1369 else
1370 cuv += ai16;
a6ec74c1
JH
1371 }
1372 break;
49704364
LW
1373 case 'S' | TYPE_IS_SHRIEKING:
1374#if SHORTSIZE != SIZE16
73cb7263 1375 while (len-- > 0) {
08ca2aa3 1376 unsigned short aushort;
aaec8192
NC
1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1378 needs_swap);
08ca2aa3 1379 if (!checksum)
6e449a3a 1380 mPUSHu(aushort);
73cb7263
NC
1381 else if (checksum > bits_in_uv)
1382 cdouble += (NV)aushort;
1383 else
1384 cuv += aushort;
49704364
LW
1385 }
1386 break;
1387#else
486ec47a 1388 /* Fallthrough! */
49704364 1389#endif
a6ec74c1
JH
1390 case 'v':
1391 case 'n':
1392 case 'S':
73cb7263 1393 while (len-- > 0) {
08ca2aa3
TH
1394 U16 au16;
1395#if U16SIZE > SIZE16
1396 au16 = 0;
1397#endif
aaec8192 1398 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1399 if (datumtype == 'n')
1400 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1401 if (datumtype == 'v')
1402 au16 = vtohs(au16);
08ca2aa3 1403 if (!checksum)
6e449a3a 1404 mPUSHu(au16);
73cb7263 1405 else if (checksum > bits_in_uv)
f337b084 1406 cdouble += (NV) au16;
73cb7263
NC
1407 else
1408 cuv += au16;
a6ec74c1
JH
1409 }
1410 break;
068bd2e7
MHM
1411 case 'v' | TYPE_IS_SHRIEKING:
1412 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1413 while (len-- > 0) {
08ca2aa3
TH
1414 I16 ai16;
1415# if U16SIZE > SIZE16
1416 ai16 = 0;
1417# endif
aaec8192 1418 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1419 /* There should never be any byte-swapping here. */
1420 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1421 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1422 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1423 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1424 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1425 if (!checksum)
6e449a3a 1426 mPUSHi(ai16);
73cb7263 1427 else if (checksum > bits_in_uv)
08ca2aa3 1428 cdouble += (NV) ai16;
73cb7263
NC
1429 else
1430 cuv += ai16;
068bd2e7
MHM
1431 }
1432 break;
a6ec74c1 1433 case 'i':
49704364 1434 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1435 while (len-- > 0) {
08ca2aa3 1436 int aint;
aaec8192 1437 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1438 if (!checksum)
6e449a3a 1439 mPUSHi(aint);
73cb7263
NC
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)aint;
1442 else
1443 cuv += aint;
a6ec74c1
JH
1444 }
1445 break;
1446 case 'I':
49704364 1447 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1448 while (len-- > 0) {
08ca2aa3 1449 unsigned int auint;
aaec8192 1450 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1451 if (!checksum)
6e449a3a 1452 mPUSHu(auint);
73cb7263
NC
1453 else if (checksum > bits_in_uv)
1454 cdouble += (NV)auint;
1455 else
1456 cuv += auint;
a6ec74c1
JH
1457 }
1458 break;
92d41999 1459 case 'j':
73cb7263 1460 while (len-- > 0) {
08ca2aa3 1461 IV aiv;
aaec8192 1462 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1463 if (!checksum)
6e449a3a 1464 mPUSHi(aiv);
73cb7263
NC
1465 else if (checksum > bits_in_uv)
1466 cdouble += (NV)aiv;
1467 else
1468 cuv += aiv;
92d41999
JH
1469 }
1470 break;
1471 case 'J':
73cb7263 1472 while (len-- > 0) {
08ca2aa3 1473 UV auv;
aaec8192 1474 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1475 if (!checksum)
6e449a3a 1476 mPUSHu(auv);
73cb7263
NC
1477 else if (checksum > bits_in_uv)
1478 cdouble += (NV)auv;
1479 else
1480 cuv += auv;
92d41999
JH
1481 }
1482 break;
49704364
LW
1483 case 'l' | TYPE_IS_SHRIEKING:
1484#if LONGSIZE != SIZE32
73cb7263 1485 while (len-- > 0) {
08ca2aa3 1486 long along;
aaec8192 1487 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1488 if (!checksum)
6e449a3a 1489 mPUSHi(along);
73cb7263
NC
1490 else if (checksum > bits_in_uv)
1491 cdouble += (NV)along;
1492 else
1493 cuv += along;
49704364
LW
1494 }
1495 break;
1496#else
1497 /* Fallthrough! */
a6ec74c1 1498#endif
49704364 1499 case 'l':
73cb7263 1500 while (len-- > 0) {
08ca2aa3
TH
1501 I32 ai32;
1502#if U32SIZE > SIZE32
1503 ai32 = 0;
1504#endif
aaec8192 1505 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1506#if U32SIZE > SIZE32
08ca2aa3 1507 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1508#endif
08ca2aa3 1509 if (!checksum)
6e449a3a 1510 mPUSHi(ai32);
73cb7263
NC
1511 else if (checksum > bits_in_uv)
1512 cdouble += (NV)ai32;
1513 else
1514 cuv += ai32;
a6ec74c1
JH
1515 }
1516 break;
49704364
LW
1517 case 'L' | TYPE_IS_SHRIEKING:
1518#if LONGSIZE != SIZE32
73cb7263 1519 while (len-- > 0) {
08ca2aa3 1520 unsigned long aulong;
aaec8192 1521 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1522 if (!checksum)
6e449a3a 1523 mPUSHu(aulong);
73cb7263
NC
1524 else if (checksum > bits_in_uv)
1525 cdouble += (NV)aulong;
1526 else
1527 cuv += aulong;
49704364
LW
1528 }
1529 break;
1530#else
1531 /* Fall through! */
1532#endif
a6ec74c1
JH
1533 case 'V':
1534 case 'N':
1535 case 'L':
73cb7263 1536 while (len-- > 0) {
08ca2aa3
TH
1537 U32 au32;
1538#if U32SIZE > SIZE32
1539 au32 = 0;
1540#endif
aaec8192 1541 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1542 if (datumtype == 'N')
1543 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1544 if (datumtype == 'V')
1545 au32 = vtohl(au32);
08ca2aa3 1546 if (!checksum)
6e449a3a 1547 mPUSHu(au32);
fc241834
RGS
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV)au32;
1550 else
1551 cuv += au32;
a6ec74c1
JH
1552 }
1553 break;
068bd2e7
MHM
1554 case 'V' | TYPE_IS_SHRIEKING:
1555 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1556 while (len-- > 0) {
08ca2aa3 1557 I32 ai32;
f8e5a5db 1558#if U32SIZE > SIZE32
08ca2aa3 1559 ai32 = 0;
f8e5a5db 1560#endif
aaec8192 1561 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1562 /* There should never be any byte swapping here. */
1563 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1564 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1566 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1567 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1568 if (!checksum)
6e449a3a 1569 mPUSHi(ai32);
73cb7263
NC
1570 else if (checksum > bits_in_uv)
1571 cdouble += (NV)ai32;
1572 else
1573 cuv += ai32;
068bd2e7
MHM
1574 }
1575 break;
a6ec74c1 1576 case 'p':
a6ec74c1 1577 while (len-- > 0) {
f7fe979e 1578 const char *aptr;
aaec8192 1579 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1580 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1581 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1582 }
1583 break;
1584 case 'w':
a6ec74c1
JH
1585 {
1586 UV auv = 0;
1587 U32 bytes = 0;
fc241834 1588
08ca2aa3
TH
1589 while (len > 0 && s < strend) {
1590 U8 ch;
f337b084 1591 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1592 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1593 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1594 if (ch < 0x80) {
a6ec74c1 1595 bytes = 0;
6e449a3a 1596 mPUSHu(auv);
a6ec74c1
JH
1597 len--;
1598 auv = 0;
08ca2aa3 1599 continue;
a6ec74c1 1600 }
08ca2aa3 1601 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1602 const char *t;
a6ec74c1 1603
f5992bc4 1604 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1605 while (s < strend) {
f337b084 1606 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1607 sv = mul128(sv, (U8)(ch & 0x7f));
1608 if (!(ch & 0x80)) {
a6ec74c1
JH
1609 bytes = 0;
1610 break;
1611 }
1612 }
10516c54 1613 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1614 while (*t == '0')
1615 t++;
1616 sv_chop(sv, t);
6e449a3a 1617 mPUSHs(sv);
a6ec74c1
JH
1618 len--;
1619 auv = 0;
1620 }
1621 }
1622 if ((s >= strend) && bytes)
49704364 1623 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1624 }
1625 break;
1626 case 'P':
49704364
LW
1627 if (symptr->howlen == e_star)
1628 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1629 EXTEND(SP, 1);
2d3e0934 1630 if (s + sizeof(char*) <= strend) {
08ca2aa3 1631 char *aptr;
aaec8192 1632 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1633 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1634 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1635 }
a6ec74c1
JH
1636 break;
1637#ifdef HAS_QUAD
1638 case 'q':
73cb7263 1639 while (len-- > 0) {
08ca2aa3 1640 Quad_t aquad;
aaec8192 1641 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1642 if (!checksum)
6e449a3a
MHM
1643 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1644 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1647 else
1648 cuv += aquad;
1649 }
a6ec74c1
JH
1650 break;
1651 case 'Q':
73cb7263 1652 while (len-- > 0) {
08ca2aa3 1653 Uquad_t auquad;
aaec8192 1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1655 if (!checksum)
6e449a3a
MHM
1656 mPUSHs(auquad <= UV_MAX ?
1657 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1658 else if (checksum > bits_in_uv)
1659 cdouble += (NV)auquad;
1660 else
1661 cuv += auquad;
a6ec74c1
JH
1662 }
1663 break;
08ca2aa3 1664#endif /* HAS_QUAD */
a6ec74c1
JH
1665 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1666 case 'f':
73cb7263 1667 while (len-- > 0) {
08ca2aa3 1668 float afloat;
aaec8192 1669 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1670 if (!checksum)
6e449a3a 1671 mPUSHn(afloat);
08ca2aa3 1672 else
73cb7263 1673 cdouble += afloat;
fc241834 1674 }
a6ec74c1
JH
1675 break;
1676 case 'd':
73cb7263 1677 while (len-- > 0) {
08ca2aa3 1678 double adouble;
aaec8192 1679 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1680 if (!checksum)
6e449a3a 1681 mPUSHn(adouble);
08ca2aa3 1682 else
73cb7263 1683 cdouble += adouble;
fc241834 1684 }
a6ec74c1 1685 break;
92d41999 1686 case 'F':
73cb7263 1687 while (len-- > 0) {
275663fa 1688 NV_bytes anv;
aaec8192
NC
1689 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1690 datumtype, needs_swap);
08ca2aa3 1691 if (!checksum)
275663fa 1692 mPUSHn(anv.nv);
08ca2aa3 1693 else
275663fa 1694 cdouble += anv.nv;
fc241834 1695 }
92d41999
JH
1696 break;
1697#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1698 case 'D':
73cb7263 1699 while (len-- > 0) {
275663fa 1700 ld_bytes aldouble;
aaec8192
NC
1701 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1702 sizeof(aldouble.bytes), datumtype, needs_swap);
08ca2aa3 1703 if (!checksum)
275663fa 1704 mPUSHn(aldouble.ld);
08ca2aa3 1705 else
275663fa 1706 cdouble += aldouble.ld;
92d41999
JH
1707 }
1708 break;
1709#endif
a6ec74c1 1710 case 'u':
858fe5e1 1711 if (!checksum) {
f7fe979e 1712 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1713 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1714 if (l) SvPOK_on(sv);
1715 }
1716 if (utf8) {
1717 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1718 I32 a, b, c, d;
db187877 1719 char hunk[3];
08ca2aa3 1720
08ca2aa3
TH
1721 while (len > 0) {
1722 next_uni_uu(aTHX_ &s, strend, &a);
1723 next_uni_uu(aTHX_ &s, strend, &b);
1724 next_uni_uu(aTHX_ &s, strend, &c);
1725 next_uni_uu(aTHX_ &s, strend, &d);
1726 hunk[0] = (char)((a << 2) | (b >> 4));
1727 hunk[1] = (char)((b << 4) | (c >> 2));
1728 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1729 if (!checksum)
1730 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1731 len -= 3;
1732 }
1733 if (s < strend) {
f7fe979e
AL
1734 if (*s == '\n') {
1735 s++;
1736 }
08ca2aa3
TH
1737 else {
1738 /* possible checksum byte */
f7fe979e
AL
1739 const char *skip = s+UTF8SKIP(s);
1740 if (skip < strend && *skip == '\n')
1741 s = skip+1;
08ca2aa3
TH
1742 }
1743 }
1744 }
1745 } else {
fc241834
RGS
1746 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1747 I32 a, b, c, d;
db187877 1748 char hunk[3];
a6ec74c1 1749
fc241834
RGS
1750 len = PL_uudmap[*(U8*)s++] & 077;
1751 while (len > 0) {
1752 if (s < strend && ISUUCHAR(*s))
1753 a = PL_uudmap[*(U8*)s++] & 077;
1754 else
1755 a = 0;
1756 if (s < strend && ISUUCHAR(*s))
1757 b = PL_uudmap[*(U8*)s++] & 077;
1758 else
1759 b = 0;
1760 if (s < strend && ISUUCHAR(*s))
1761 c = PL_uudmap[*(U8*)s++] & 077;
1762 else
1763 c = 0;
1764 if (s < strend && ISUUCHAR(*s))
1765 d = PL_uudmap[*(U8*)s++] & 077;
1766 else
1767 d = 0;
1768 hunk[0] = (char)((a << 2) | (b >> 4));
1769 hunk[1] = (char)((b << 4) | (c >> 2));
1770 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1771 if (!checksum)
1772 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1773 len -= 3;
1774 }
1775 if (*s == '\n')
1776 s++;
1777 else /* possible checksum byte */
1778 if (s + 1 < strend && s[1] == '\n')
1779 s += 2;
a6ec74c1 1780 }
08ca2aa3 1781 }
858fe5e1
TC
1782 if (!checksum)
1783 XPUSHs(sv);
a6ec74c1
JH
1784 break;
1785 }
49704364 1786
a6ec74c1 1787 if (checksum) {
1109a392 1788 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1789 (checksum > bits_in_uv &&
08ca2aa3
TH
1790 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1791 NV trouble, anv;
a6ec74c1 1792
08ca2aa3 1793 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1794 while (checksum >= 16) {
1795 checksum -= 16;
08ca2aa3 1796 anv *= 65536.0;
a6ec74c1 1797 }
a6ec74c1 1798 while (cdouble < 0.0)
08ca2aa3
TH
1799 cdouble += anv;
1800 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1801 sv = newSVnv(cdouble);
a6ec74c1
JH
1802 }
1803 else {
fa8ec7c1
NC
1804 if (checksum < bits_in_uv) {
1805 UV mask = ((UV)1 << checksum) - 1;
92d41999 1806 cuv &= mask;
a6ec74c1 1807 }
c4c5f44a 1808 sv = newSVuv(cuv);
a6ec74c1 1809 }
6e449a3a 1810 mXPUSHs(sv);
a6ec74c1
JH
1811 checksum = 0;
1812 }
fc241834 1813
49704364
LW
1814 if (symptr->flags & FLAG_SLASH){
1815 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1816 break;
49704364
LW
1817 if( next_symbol(symptr) ){
1818 if( symptr->howlen == e_number )
1819 Perl_croak(aTHX_ "Count after length/code in unpack" );
1820 if( beyond ){
1821 /* ...end of char buffer then no decent length available */
1822 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1823 } else {
1824 /* take top of stack (hope it's numeric) */
1825 len = POPi;
1826 if( len < 0 )
1827 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1828 }
1829 } else {
1830 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1831 }
1832 datumtype = symptr->code;
21c16052 1833 explicit_length = FALSE;
49704364
LW
1834 goto redo_switch;
1835 }
a6ec74c1 1836 }
49704364 1837
18529408
IZ
1838 if (new_s)
1839 *new_s = s;
1840 PUTBACK;
1841 return SP - PL_stack_base - start_sp_offset;
1842}
1843
1844PP(pp_unpack)
1845{
97aff369 1846 dVAR;
18529408 1847 dSP;
bab9c0ac 1848 dPOPPOPssrl;
18529408
IZ
1849 I32 gimme = GIMME_V;
1850 STRLEN llen;
1851 STRLEN rlen;
5c144d81
NC
1852 const char *pat = SvPV_const(left, llen);
1853 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1854 const char *strend = s + rlen;
1855 const char *patend = pat + llen;
08ca2aa3 1856 I32 cnt;
18529408
IZ
1857
1858 PUTBACK;
7accc089 1859 cnt = unpackstring(pat, patend, s, strend,
49704364 1860 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1861 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1862
18529408
IZ
1863 SPAGAIN;
1864 if ( !cnt && gimme == G_SCALAR )
1865 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1866 RETURN;
1867}
1868
f337b084 1869STATIC U8 *
f7fe979e 1870doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1871{
f337b084 1872 *h++ = PL_uuemap[len];
a6ec74c1 1873 while (len > 2) {
f337b084
TH
1874 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1875 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1876 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1877 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1878 s += 3;
1879 len -= 3;
1880 }
1881 if (len > 0) {
f7fe979e 1882 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1883 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1884 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1885 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1886 *h++ = PL_uuemap[0];
a6ec74c1 1887 }
f337b084
TH
1888 *h++ = '\n';
1889 return h;
a6ec74c1
JH
1890}
1891
1892STATIC SV *
f7fe979e 1893S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1894{
8b6e33c7
AL
1895 SV *result = newSVpvn(s, l);
1896 char *const result_c = SvPV_nolen(result); /* convenience */
1897 char *out = result_c;
1898 bool skip = 1;
1899 bool ignore = 0;
a6ec74c1 1900
7918f24d
NC
1901 PERL_ARGS_ASSERT_IS_AN_INT;
1902
a6ec74c1
JH
1903 while (*s) {
1904 switch (*s) {
1905 case ' ':
1906 break;
1907 case '+':
1908 if (!skip) {
1909 SvREFCNT_dec(result);
1910 return (NULL);
1911 }
1912 break;
1913 case '0':
1914 case '1':
1915 case '2':
1916 case '3':
1917 case '4':
1918 case '5':
1919 case '6':
1920 case '7':
1921 case '8':
1922 case '9':
1923 skip = 0;
1924 if (!ignore) {
1925 *(out++) = *s;
1926 }
1927 break;
1928 case '.':
1929 ignore = 1;
1930 break;
1931 default:
1932 SvREFCNT_dec(result);
1933 return (NULL);
1934 }
1935 s++;
1936 }
1937 *(out++) = '\0';
1938 SvCUR_set(result, out - result_c);
1939 return (result);
1940}
1941
1942/* pnum must be '\0' terminated */
1943STATIC int
1944S_div128(pTHX_ SV *pnum, bool *done)
1945{
8b6e33c7
AL
1946 STRLEN len;
1947 char * const s = SvPV(pnum, len);
1948 char *t = s;
1949 int m = 0;
1950
7918f24d
NC
1951 PERL_ARGS_ASSERT_DIV128;
1952
8b6e33c7
AL
1953 *done = 1;
1954 while (*t) {
1955 const int i = m * 10 + (*t - '0');
1956 const int r = (i >> 7); /* r < 10 */
1957 m = i & 0x7F;
1958 if (r) {
1959 *done = 0;
1960 }
1961 *(t++) = '0' + r;
a6ec74c1 1962 }
8b6e33c7
AL
1963 *(t++) = '\0';
1964 SvCUR_set(pnum, (STRLEN) (t - s));
1965 return (m);
a6ec74c1
JH
1966}
1967
18529408 1968/*
7accc089
JH
1969=for apidoc packlist
1970
1971The engine implementing pack() Perl function.
1972
bfce84ec
AL
1973=cut
1974*/
7accc089
JH
1975
1976void
5aaab254 1977Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1978{
97aff369 1979 dVAR;
aadb217d
JH
1980 tempsym_t sym;
1981
7918f24d
NC
1982 PERL_ARGS_ASSERT_PACKLIST;
1983
f7fe979e 1984 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1985
f337b084
TH
1986 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1987 Also make sure any UTF8 flag is loaded */
56eb0262 1988 SvPV_force_nolen(cat);
bfce84ec
AL
1989 if (DO_UTF8(cat))
1990 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1991
49704364
LW
1992 (void)pack_rec( cat, &sym, beglist, endlist );
1993}
1994
f337b084
TH
1995/* like sv_utf8_upgrade, but also repoint the group start markers */
1996STATIC void
1997marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1998 STRLEN len;
1999 tempsym_t *group;
f7fe979e
AL
2000 const char *from_ptr, *from_start, *from_end, **marks, **m;
2001 char *to_start, *to_ptr;
f337b084
TH
2002
2003 if (SvUTF8(sv)) return;
2004
aa07b2f6 2005 from_start = SvPVX_const(sv);
f337b084
TH
2006 from_end = from_start + SvCUR(sv);
2007 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2008 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2009 if (from_ptr == from_end) {
2010 /* Simple case: no character needs to be changed */
2011 SvUTF8_on(sv);
2012 return;
2013 }
2014
3473cf63 2015 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2016 Newx(to_start, len, char);
f337b084
TH
2017 Copy(from_start, to_start, from_ptr-from_start, char);
2018 to_ptr = to_start + (from_ptr-from_start);
2019
a02a5408 2020 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2021 for (group=sym_ptr; group; group = group->previous)
2022 marks[group->level] = from_start + group->strbeg;
2023 marks[sym_ptr->level+1] = from_end+1;
2024 for (m = marks; *m < from_ptr; m++)
2025 *m = to_start + (*m-from_start);
2026
2027 for (;from_ptr < from_end; from_ptr++) {
2028 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2029 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2030 }
2031 *to_ptr = 0;
2032
2033 while (*m == from_ptr) *m++ = to_ptr;
2034 if (m != marks + sym_ptr->level+1) {
2035 Safefree(marks);
2036 Safefree(to_start);
5637ef5b
NC
2037 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2038 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2039 }
2040 for (group=sym_ptr; group; group = group->previous)
2041 group->strbeg = marks[group->level] - to_start;
2042 Safefree(marks);
2043
2044 if (SvOOK(sv)) {
2045 if (SvIVX(sv)) {
b162af07 2046 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2047 from_start -= SvIVX(sv);
2048 SvIV_set(sv, 0);
2049 }
2050 SvFLAGS(sv) &= ~SVf_OOK;
2051 }
2052 if (SvLEN(sv) != 0)
2053 Safefree(from_start);
f880fe2f 2054 SvPV_set(sv, to_start);
b162af07
SP
2055 SvCUR_set(sv, to_ptr - to_start);
2056 SvLEN_set(sv, len);
f337b084
TH
2057 SvUTF8_on(sv);
2058}
2059
2060/* Exponential string grower. Makes string extension effectively O(n)
2061 needed says how many extra bytes we need (not counting the final '\0')
2062 Only grows the string if there is an actual lack of space
2063*/
2064STATIC char *
0bd48802 2065S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2066 const STRLEN cur = SvCUR(sv);
2067 const STRLEN len = SvLEN(sv);
f337b084 2068 STRLEN extend;
7918f24d
NC
2069
2070 PERL_ARGS_ASSERT_SV_EXP_GROW;
2071
f337b084
TH
2072 if (len - cur > needed) return SvPVX(sv);
2073 extend = needed > len ? needed : len;
2074 return SvGROW(sv, len+extend+1);
2075}
49704364
LW
2076
2077STATIC
2078SV **
f337b084 2079S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2080{
97aff369 2081 dVAR;
49704364 2082 tempsym_t lookahead;
f337b084
TH
2083 I32 items = endlist - beglist;
2084 bool found = next_symbol(symptr);
2085 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2086 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2087
7918f24d
NC
2088 PERL_ARGS_ASSERT_PACK_REC;
2089
f337b084
TH
2090 if (symptr->level == 0 && found && symptr->code == 'U') {
2091 marked_upgrade(aTHX_ cat, symptr);
2092 symptr->flags |= FLAG_DO_UTF8;
2093 utf8 = 0;
49704364 2094 }
f337b084 2095 symptr->strbeg = SvCUR(cat);
49704364
LW
2096
2097 while (found) {
f337b084
TH
2098 SV *fromstr;
2099 STRLEN fromlen;
2100 I32 len;
a0714e2c 2101 SV *lengthcode = NULL;
49704364 2102 I32 datumtype = symptr->code;
f337b084
TH
2103 howlen_t howlen = symptr->howlen;
2104 char *start = SvPVX(cat);
2105 char *cur = start + SvCUR(cat);
a1219b5e 2106 bool needs_swap;
49704364 2107
f337b084
TH
2108#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2109
2110 switch (howlen) {
fc241834 2111 case e_star:
f337b084
TH
2112 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2113 0 : items;
2114 break;
2115 default:
2116 /* e_no_len and e_number */
2117 len = symptr->length;
49704364
LW
2118 break;
2119 }
2120
f337b084 2121 if (len) {
a7a3cfaa 2122 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2123
a7a3cfaa
TH
2124 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2125 /* We can process this letter. */
2126 STRLEN size = props & PACK_SIZE_MASK;
2127 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2128 }
f337b084
TH
2129 }
2130
49704364
LW
2131 /* Look ahead for next symbol. Do we have code/code? */
2132 lookahead = *symptr;
2133 found = next_symbol(&lookahead);
246f24af
TH
2134 if (symptr->flags & FLAG_SLASH) {
2135 IV count;
f337b084 2136 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2137 if (strchr("aAZ", lookahead.code)) {
2138 if (lookahead.howlen == e_number) count = lookahead.length;
2139 else {
ce399ba6 2140 if (items > 0) {
48a5da33 2141 count = sv_len_utf8(*beglist);
ce399ba6 2142 }
246f24af
TH
2143 else count = 0;
2144 if (lookahead.code == 'Z') count++;
2145 }
2146 } else {
2147 if (lookahead.howlen == e_number && lookahead.length < items)
2148 count = lookahead.length;
2149 else count = items;
2150 }
2151 lookahead.howlen = e_number;
2152 lookahead.length = count;
2153 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2154 }
49704364 2155
a1219b5e
NC
2156 needs_swap = NEEDS_SWAP(datumtype);
2157
fc241834
RGS
2158 /* Code inside the switch must take care to properly update
2159 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2160 doesn't simply leave using break */
1109a392 2161 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2162 default:
f337b084
TH
2163 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2164 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2165 case '%':
49704364 2166 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2167 {
2168 char *from;
28be1210 2169 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2170 case '.':
2171 if (howlen == e_star) from = start;
2172 else if (len == 0) from = cur;
2173 else {
2174 tempsym_t *group = symptr;
2175
2176 while (--len && group) group = group->previous;
2177 from = group ? start + group->strbeg : start;
2178 }
2179 fromstr = NEXTFROM;
2180 len = SvIV(fromstr);
2181 goto resize;
28be1210 2182 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2183 case '@':
28be1210
TH
2184 from = start + symptr->strbeg;
2185 resize:
28be1210 2186 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2187 if (len >= 0) {
2188 while (len && from < cur) {
2189 from += UTF8SKIP(from);
2190 len--;
2191 }
2192 if (from > cur)
2193 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2194 if (len) {
2195 /* Here we know from == cur */
2196 grow:
2197 GROWING(0, cat, start, cur, len);
2198 Zero(cur, len, char);
2199 cur += len;
2200 } else if (from < cur) {
2201 len = cur - from;
2202 goto shrink;
2203 } else goto no_change;
2204 } else {
2205 cur = from;
2206 len = -len;
2207 goto utf8_shrink;
f337b084 2208 }
28be1210
TH
2209 else {
2210 len -= cur - from;
f337b084 2211 if (len > 0) goto grow;
28be1210 2212 if (len == 0) goto no_change;
fc241834 2213 len = -len;
28be1210 2214 goto shrink;
f337b084 2215 }
a6ec74c1 2216 break;
28be1210 2217 }
fc241834 2218 case '(': {
49704364 2219 tempsym_t savsym = *symptr;
66c611c5
MHM
2220 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2221 symptr->flags |= group_modifiers;
49704364
LW
2222 symptr->patend = savsym.grpend;
2223 symptr->level++;
f337b084 2224 symptr->previous = &lookahead;
18529408 2225 while (len--) {
f337b084
TH
2226 U32 was_utf8;
2227 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2228 else symptr->flags &= ~FLAG_PARSE_UTF8;
2229 was_utf8 = SvUTF8(cat);
49704364 2230 symptr->patptr = savsym.grpbeg;
f337b084
TH
2231 beglist = pack_rec(cat, symptr, beglist, endlist);
2232 if (SvUTF8(cat) != was_utf8)
2233 /* This had better be an upgrade while in utf8==0 mode */
2234 utf8 = 1;
2235
49704364 2236 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2237 break; /* No way to continue */
2238 }
ee790063 2239 items = endlist - beglist;
f337b084
TH
2240 lookahead.flags = symptr->flags & ~group_modifiers;
2241 goto no_change;
18529408 2242 }
62f95557
IZ
2243 case 'X' | TYPE_IS_SHRIEKING:
2244 if (!len) /* Avoid division by 0 */
2245 len = 1;
f337b084
TH
2246 if (utf8) {
2247 char *hop, *last;
2248 I32 l = len;
2249 hop = last = start;
2250 while (hop < cur) {
2251 hop += UTF8SKIP(hop);
2252 if (--l == 0) {
2253 last = hop;
2254 l = len;
2255 }
2256 }
2257 if (last > cur)
2258 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2259 cur = last;
2260 break;
2261 }
2262 len = (cur-start) % len;
62f95557 2263 /* FALL THROUGH */
a6ec74c1 2264 case 'X':
f337b084
TH
2265 if (utf8) {
2266 if (len < 1) goto no_change;
28be1210 2267 utf8_shrink:
f337b084
TH
2268 while (len > 0) {
2269 if (cur <= start)
28be1210
TH
2270 Perl_croak(aTHX_ "'%c' outside of string in pack",
2271 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2272 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2273 if (cur <= start)
28be1210
TH
2274 Perl_croak(aTHX_ "'%c' outside of string in pack",
2275 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2276 }
2277 len--;
2278 }
2279 } else {
fc241834 2280 shrink:
f337b084 2281 if (cur - start < len)
28be1210
TH
2282 Perl_croak(aTHX_ "'%c' outside of string in pack",
2283 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2284 cur -= len;
2285 }
2286 if (cur < start+symptr->strbeg) {
2287 /* Make sure group starts don't point into the void */
2288 tempsym_t *group;
9e27e96a 2289 const STRLEN length = cur-start;
f337b084
TH
2290 for (group = symptr;
2291 group && length < group->strbeg;
2292 group = group->previous) group->strbeg = length;
2293 lookahead.strbeg = length;
2294 }
a6ec74c1 2295 break;
fc241834
RGS
2296 case 'x' | TYPE_IS_SHRIEKING: {
2297 I32 ai32;
62f95557
IZ
2298 if (!len) /* Avoid division by 0 */
2299 len = 1;
230e1fce 2300 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2301 else ai32 = (cur - start) % len;
2302 if (ai32 == 0) goto no_change;
2303 len -= ai32;
2304 }
2305 /* FALL THROUGH */
a6ec74c1 2306 case 'x':
f337b084 2307 goto grow;
a6ec74c1
JH
2308 case 'A':
2309 case 'Z':
f337b084 2310 case 'a': {
f7fe979e 2311 const char *aptr;
f337b084 2312
a6ec74c1 2313 fromstr = NEXTFROM;
e62f0680 2314 aptr = SvPV_const(fromstr, fromlen);
f337b084 2315 if (DO_UTF8(fromstr)) {
f7fe979e 2316 const char *end, *s;
f337b084
TH
2317
2318 if (!utf8 && !SvUTF8(cat)) {
2319 marked_upgrade(aTHX_ cat, symptr);
2320 lookahead.flags |= FLAG_DO_UTF8;
2321 lookahead.strbeg = symptr->strbeg;
2322 utf8 = 1;
2323 start = SvPVX(cat);
2324 cur = start + SvCUR(cat);
2325 }
fc241834 2326 if (howlen == e_star) {
f337b084
TH
2327 if (utf8) goto string_copy;
2328 len = fromlen+1;
2329 }
2330 s = aptr;
2331 end = aptr + fromlen;
2332 fromlen = datumtype == 'Z' ? len-1 : len;
2333 while ((I32) fromlen > 0 && s < end) {
2334 s += UTF8SKIP(s);
2335 fromlen--;
2336 }
2337 if (s > end)
2338 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2339 if (utf8) {
fc241834 2340 len = fromlen;
f337b084
TH
2341 if (datumtype == 'Z') len++;
2342 fromlen = s-aptr;
2343 len += fromlen;
fc241834 2344
f337b084 2345 goto string_copy;
fc241834 2346 }
f337b084
TH
2347 fromlen = len - fromlen;
2348 if (datumtype == 'Z') fromlen--;
2349 if (howlen == e_star) {
2350 len = fromlen;
2351 if (datumtype == 'Z') len++;
fc241834 2352 }
f337b084 2353 GROWING(0, cat, start, cur, len);
fc241834 2354 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2355 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2356 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2357 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2358 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2359 cur += fromlen;
a6ec74c1 2360 len -= fromlen;
f337b084
TH
2361 } else if (utf8) {
2362 if (howlen == e_star) {
2363 len = fromlen;
2364 if (datumtype == 'Z') len++;
a6ec74c1 2365 }
f337b084
TH
2366 if (len <= (I32) fromlen) {
2367 fromlen = len;
2368 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2369 }
fc241834 2370 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2371 upgrade, so:
2372 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2373 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2374 len -= fromlen;
2375 while (fromlen > 0) {
230e1fce 2376 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2377 aptr++;
2378 fromlen--;
fc241834 2379 }
f337b084
TH
2380 } else {
2381 string_copy:
2382 if (howlen == e_star) {
2383 len = fromlen;
2384 if (datumtype == 'Z') len++;
2385 }
2386 if (len <= (I32) fromlen) {
2387 fromlen = len;
2388 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2389 }
f337b084
TH
2390 GROWING(0, cat, start, cur, len);
2391 Copy(aptr, cur, fromlen, char);
2392 cur += fromlen;
2393 len -= fromlen;
a6ec74c1 2394 }
f337b084
TH
2395 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2396 cur += len;
3c4fb04a 2397 SvTAINT(cat);
a6ec74c1 2398 break;
f337b084 2399 }
a6ec74c1 2400 case 'B':
f337b084 2401 case 'b': {
b83604b4 2402 const char *str, *end;
f337b084
TH
2403 I32 l, field_len;
2404 U8 bits;
2405 bool utf8_source;
2406 U32 utf8_flags;
a6ec74c1 2407
fc241834 2408 fromstr = NEXTFROM;
b83604b4 2409 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2410 end = str + fromlen;
2411 if (DO_UTF8(fromstr)) {
2412 utf8_source = TRUE;
041457d9 2413 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2414 } else {
2415 utf8_source = FALSE;
2416 utf8_flags = 0; /* Unused, but keep compilers happy */
2417 }
2418 if (howlen == e_star) len = fromlen;
2419 field_len = (len+7)/8;
2420 GROWING(utf8, cat, start, cur, field_len);
2421 if (len > (I32)fromlen) len = fromlen;
2422 bits = 0;
2423 l = 0;
2424 if (datumtype == 'B')
2425 while (l++ < len) {
2426 if (utf8_source) {
95b63a38 2427 UV val = 0;
f337b084
TH
2428 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2429 bits |= val & 1;
2430 } else bits |= *str++ & 1;
2431 if (l & 7) bits <<= 1;
fc241834 2432 else {
f337b084
TH
2433 PUSH_BYTE(utf8, cur, bits);
2434 bits = 0;
a6ec74c1
JH
2435 }
2436 }
f337b084
TH
2437 else
2438 /* datumtype == 'b' */
2439 while (l++ < len) {
2440 if (utf8_source) {
95b63a38 2441 UV val = 0;
f337b084
TH
2442 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2443 if (val & 1) bits |= 0x80;
2444 } else if (*str++ & 1)
2445 bits |= 0x80;
2446 if (l & 7) bits >>= 1;
fc241834 2447 else {
f337b084
TH
2448 PUSH_BYTE(utf8, cur, bits);
2449 bits = 0;
a6ec74c1
JH
2450 }
2451 }
f337b084
TH
2452 l--;
2453 if (l & 7) {
fc241834 2454 if (datumtype == 'B')
f337b084 2455 bits <<= 7 - (l & 7);
fc241834 2456 else
f337b084
TH
2457 bits >>= 7 - (l & 7);
2458 PUSH_BYTE(utf8, cur, bits);
2459 l += 7;
a6ec74c1 2460 }
f337b084
TH
2461 /* Determine how many chars are left in the requested field */
2462 l /= 8;
2463 if (howlen == e_star) field_len = 0;
2464 else field_len -= l;
2465 Zero(cur, field_len, char);
2466 cur += field_len;
a6ec74c1 2467 break;
f337b084 2468 }
a6ec74c1 2469 case 'H':
f337b084 2470 case 'h': {
10516c54 2471 const char *str, *end;
f337b084
TH
2472 I32 l, field_len;
2473 U8 bits;
2474 bool utf8_source;
2475 U32 utf8_flags;
a6ec74c1 2476
fc241834 2477 fromstr = NEXTFROM;
10516c54 2478 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2479 end = str + fromlen;
2480 if (DO_UTF8(fromstr)) {
2481 utf8_source = TRUE;
041457d9 2482 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2483 } else {
2484 utf8_source = FALSE;
2485 utf8_flags = 0; /* Unused, but keep compilers happy */
2486 }
2487 if (howlen == e_star) len = fromlen;
2488 field_len = (len+1)/2;
2489 GROWING(utf8, cat, start, cur, field_len);
2490 if (!utf8 && len > (I32)fromlen) len = fromlen;
2491 bits = 0;
2492 l = 0;
2493 if (datumtype == 'H')
2494 while (l++ < len) {
2495 if (utf8_source) {
95b63a38 2496 UV val = 0;
f337b084
TH
2497 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2498 if (val < 256 && isALPHA(val))
2499 bits |= (val + 9) & 0xf;
a6ec74c1 2500 else
f337b084
TH
2501 bits |= val & 0xf;
2502 } else if (isALPHA(*str))
2503 bits |= (*str++ + 9) & 0xf;
2504 else
2505 bits |= *str++ & 0xf;
2506 if (l & 1) bits <<= 4;
fc241834 2507 else {
f337b084
TH
2508 PUSH_BYTE(utf8, cur, bits);
2509 bits = 0;
a6ec74c1
JH
2510 }
2511 }
f337b084
TH
2512 else
2513 while (l++ < len) {
2514 if (utf8_source) {
95b63a38 2515 UV val = 0;
f337b084
TH
2516 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2517 if (val < 256 && isALPHA(val))
2518 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2519 else
f337b084
TH
2520 bits |= (val & 0xf) << 4;
2521 } else if (isALPHA(*str))
2522 bits |= ((*str++ + 9) & 0xf) << 4;
2523 else
2524 bits |= (*str++ & 0xf) << 4;
2525 if (l & 1) bits >>= 4;
fc241834 2526 else {
f337b084
TH
2527 PUSH_BYTE(utf8, cur, bits);
2528 bits = 0;
a6ec74c1 2529 }
fc241834 2530 }
f337b084
TH
2531 l--;
2532 if (l & 1) {
2533 PUSH_BYTE(utf8, cur, bits);
2534 l++;
2535 }
2536 /* Determine how many chars are left in the requested field */
2537 l /= 2;
2538 if (howlen == e_star) field_len = 0;
2539 else field_len -= l;
2540 Zero(cur, field_len, char);
2541 cur += field_len;
2542 break;
fc241834
RGS
2543 }
2544 case 'c':
f337b084
TH
2545 while (len-- > 0) {
2546 IV aiv;
2547 fromstr = NEXTFROM;
2548 aiv = SvIV(fromstr);
a2a5de95
NC
2549 if ((-128 > aiv || aiv > 127))
2550 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2551 "Character in 'c' format wrapped in pack");
585ec06d 2552 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2553 }
2554 break;
2555 case 'C':
f337b084
TH
2556 if (len == 0) {
2557 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2558 break;
2559 }
a6ec74c1 2560 while (len-- > 0) {
f337b084 2561 IV aiv;
a6ec74c1 2562 fromstr = NEXTFROM;
f337b084 2563 aiv = SvIV(fromstr);
a2a5de95
NC
2564 if ((0 > aiv || aiv > 0xff))
2565 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2566 "Character in 'C' format wrapped in pack");
1651fc44 2567 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2568 }
fc241834
RGS
2569 break;
2570 case 'W': {
2571 char *end;
670f1322 2572 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2573
2574 end = start+SvLEN(cat)-1;
2575 if (utf8) end -= UTF8_MAXLEN-1;
2576 while (len-- > 0) {
2577 UV auv;
2578 fromstr = NEXTFROM;
2579 auv = SvUV(fromstr);
2580 if (in_bytes) auv = auv % 0x100;
2581 if (utf8) {
2582 W_utf8:
2583 if (cur > end) {
2584 *cur = '\0';
b162af07 2585 SvCUR_set(cat, cur - start);
fc241834
RGS
2586
2587 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2588 end = start+SvLEN(cat)-UTF8_MAXLEN;
2589 }
230e1fce
NC
2590 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2591 NATIVE_TO_UNI(auv),
041457d9 2592 warn_utf8 ?
230e1fce 2593 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2594 } else {
2595 if (auv >= 0x100) {
2596 if (!SvUTF8(cat)) {
2597 *cur = '\0';
b162af07 2598 SvCUR_set(cat, cur - start);
fc241834
RGS
2599 marked_upgrade(aTHX_ cat, symptr);
2600 lookahead.flags |= FLAG_DO_UTF8;
2601 lookahead.strbeg = symptr->strbeg;
2602 utf8 = 1;
2603 start = SvPVX(cat);
2604 cur = start + SvCUR(cat);
2605 end = start+SvLEN(cat)-UTF8_MAXLEN;
2606 goto W_utf8;
2607 }
a2a5de95
NC
2608 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2609 "Character in 'W' format wrapped in pack");
fc241834
RGS
2610 auv &= 0xff;
2611 }
2612 if (cur >= end) {
2613 *cur = '\0';
b162af07 2614 SvCUR_set(cat, cur - start);
fc241834
RGS
2615 GROWING(0, cat, start, cur, len+1);
2616 end = start+SvLEN(cat)-1;
2617 }
fe2774ed 2618 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2619 }
2620 }
2621 break;
fc241834
RGS
2622 }
2623 case 'U': {
2624 char *end;
2625
2626 if (len == 0) {
2627 if (!(symptr->flags & FLAG_DO_UTF8)) {
2628 marked_upgrade(aTHX_ cat, symptr);
2629 lookahead.flags |= FLAG_DO_UTF8;
2630 lookahead.strbeg = symptr->strbeg;
2631 }
2632 utf8 = 0;
2633 goto no_change;
2634 }
2635
2636 end = start+SvLEN(cat);
2637 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2638 while (len-- > 0) {
fc241834 2639 UV auv;
a6ec74c1 2640 fromstr = NEXTFROM;
fc241834
RGS
2641 auv = SvUV(fromstr);
2642 if (utf8) {
230e1fce 2643 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2644 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2645 warn_utf8 ?
fc241834
RGS
2646 0 : UNICODE_ALLOW_ANY);
2647 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2648 *cur = '\0';
b162af07 2649 SvCUR_set(cat, cur - start);
fc241834
RGS
2650 GROWING(0, cat, start, cur,
2651 len+(endb-buffer)*UTF8_EXPAND);
2652 end = start+SvLEN(cat);
2653 }
64844641 2654 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2655 } else {
2656 if (cur >= end) {
2657 *cur = '\0';
b162af07 2658 SvCUR_set(cat, cur - start);
fc241834
RGS
2659 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2660 end = start+SvLEN(cat)-UTF8_MAXLEN;
2661 }
230e1fce 2662 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2663 warn_utf8 ?
230e1fce 2664 0 : UNICODE_ALLOW_ANY);
fc241834 2665 }
a6ec74c1 2666 }
a6ec74c1 2667 break;
fc241834 2668 }
a6ec74c1
JH
2669 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2670 case 'f':
a6ec74c1 2671 while (len-- > 0) {
f337b084
TH
2672 float afloat;
2673 NV anv;
a6ec74c1 2674 fromstr = NEXTFROM;
f337b084 2675 anv = SvNV(fromstr);
85bba25f 2676# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2677 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2678 * on Alpha; fake it if we don't have them.
2679 */
f337b084 2680 if (anv > FLT_MAX)
fc241834 2681 afloat = FLT_MAX;
f337b084 2682 else if (anv < -FLT_MAX)
fc241834 2683 afloat = -FLT_MAX;
f337b084 2684 else afloat = (float)anv;
baf3cf9c 2685# else
f337b084 2686 afloat = (float)anv;
baf3cf9c 2687# endif
2b4ad569 2688 DO_BO_PACK(afloat);
f337b084 2689 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2690 }
2691 break;
2692 case 'd':
a6ec74c1 2693 while (len-- > 0) {
f337b084
TH
2694 double adouble;
2695 NV anv;
a6ec74c1 2696 fromstr = NEXTFROM;
f337b084 2697 anv = SvNV(fromstr);
85bba25f 2698# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2699 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2700 * on Alpha; fake it if we don't have them.
2701 */
f337b084 2702 if (anv > DBL_MAX)
fc241834 2703 adouble = DBL_MAX;
f337b084 2704 else if (anv < -DBL_MAX)
fc241834 2705 adouble = -DBL_MAX;
f337b084 2706 else adouble = (double)anv;
baf3cf9c 2707# else
f337b084 2708 adouble = (double)anv;
baf3cf9c 2709# endif
2b4ad569 2710 DO_BO_PACK(adouble);
f337b084 2711 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2712 }
2713 break;
fc241834 2714 case 'F': {
275663fa 2715 NV_bytes anv;
1109a392 2716 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2717 while (len-- > 0) {
2718 fromstr = NEXTFROM;
cd07c537
DM
2719#ifdef __GNUC__
2720 /* to work round a gcc/x86 bug; don't use SvNV */
2721 anv.nv = sv_2nv(fromstr);
2722#else
275663fa 2723 anv.nv = SvNV(fromstr);
cd07c537 2724#endif
2b4ad569 2725 DO_BO_PACK(anv);
275663fa 2726 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2727 }
2728 break;
fc241834 2729 }
92d41999 2730#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2731 case 'D': {
275663fa 2732 ld_bytes aldouble;
1109a392
MHM
2733 /* long doubles can have unused bits, which may be nonzero */
2734 Zero(&aldouble, 1, long double);
92d41999
JH
2735 while (len-- > 0) {
2736 fromstr = NEXTFROM;
cd07c537
DM
2737# ifdef __GNUC__
2738 /* to work round a gcc/x86 bug; don't use SvNV */
2739 aldouble.ld = (long double)sv_2nv(fromstr);
2740# else
275663fa 2741 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2742# endif
2b4ad569 2743 DO_BO_PACK(aldouble);
275663fa 2744 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2745 }
2746 break;
fc241834 2747 }
92d41999 2748#endif
068bd2e7 2749 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2750 case 'n':
2751 while (len-- > 0) {
f337b084 2752 I16 ai16;
a6ec74c1 2753 fromstr = NEXTFROM;
ef108786 2754 ai16 = (I16)SvIV(fromstr);
ef108786 2755 ai16 = PerlSock_htons(ai16);
f337b084 2756 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2757 }
2758 break;
068bd2e7 2759 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2760 case 'v':
2761 while (len-- > 0) {
f337b084 2762 I16 ai16;
a6ec74c1 2763 fromstr = NEXTFROM;
ef108786 2764 ai16 = (I16)SvIV(fromstr);
ef108786 2765 ai16 = htovs(ai16);
f337b084 2766 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2767 }
2768 break;
49704364 2769 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2770#if SHORTSIZE != SIZE16
fc241834 2771 while (len-- > 0) {
f337b084 2772 unsigned short aushort;
fc241834
RGS
2773 fromstr = NEXTFROM;
2774 aushort = SvUV(fromstr);
2b4ad569 2775 DO_BO_PACK(aushort);
f337b084 2776 PUSH_VAR(utf8, cur, aushort);
fc241834 2777 }
49704364
LW
2778 break;
2779#else
2780 /* Fall through! */
a6ec74c1 2781#endif
49704364 2782 case 'S':
fc241834 2783 while (len-- > 0) {
f337b084 2784 U16 au16;
fc241834
RGS
2785 fromstr = NEXTFROM;
2786 au16 = (U16)SvUV(fromstr);
2b4ad569 2787 DO_BO_PACK(au16);
f337b084 2788 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2789 }
2790 break;
49704364 2791 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2792#if SHORTSIZE != SIZE16
fc241834 2793 while (len-- > 0) {
f337b084 2794 short ashort;
fc241834
RGS
2795 fromstr = NEXTFROM;
2796 ashort = SvIV(fromstr);
2b4ad569 2797 DO_BO_PACK(ashort);
f337b084 2798 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2799 }
49704364
LW
2800 break;
2801#else
2802 /* Fall through! */
a6ec74c1 2803#endif
49704364
LW
2804 case 's':
2805 while (len-- > 0) {
f337b084 2806 I16 ai16;
49704364 2807 fromstr = NEXTFROM;
ef108786 2808 ai16 = (I16)SvIV(fromstr);
2b4ad569 2809 DO_BO_PACK(ai16);
f337b084 2810 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2811 }
2812 break;
2813 case 'I':
49704364 2814 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2815 while (len-- > 0) {
f337b084 2816 unsigned int auint;
a6ec74c1
JH
2817 fromstr = NEXTFROM;
2818 auint = SvUV(fromstr);
2b4ad569 2819 DO_BO_PACK(auint);
f337b084 2820 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2821 }
2822 break;
92d41999
JH
2823 case 'j':
2824 while (len-- > 0) {
f337b084 2825 IV aiv;
92d41999
JH
2826 fromstr = NEXTFROM;
2827 aiv = SvIV(fromstr);
2b4ad569 2828 DO_BO_PACK(aiv);
f337b084 2829 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2830 }
2831 break;
2832 case 'J':
2833 while (len-- > 0) {
f337b084 2834 UV auv;
92d41999
JH
2835 fromstr = NEXTFROM;
2836 auv = SvUV(fromstr);
2b4ad569 2837 DO_BO_PACK(auv);
f337b084 2838 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2839 }
2840 break;
a6ec74c1
JH
2841 case 'w':
2842 while (len-- > 0) {
f337b084 2843 NV anv;
a6ec74c1 2844 fromstr = NEXTFROM;
15e9f109 2845 anv = SvNV(fromstr);
a6ec74c1 2846
f337b084
TH
2847 if (anv < 0) {
2848 *cur = '\0';
b162af07 2849 SvCUR_set(cat, cur - start);
49704364 2850 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2851 }
a6ec74c1 2852
196b62db
NC
2853 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2854 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2855 any negative IVs will have already been got by the croak()
2856 above. IOK is untrue for fractions, so we test them
2857 against UV_MAX_P1. */
f337b084
TH
2858 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2859 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2860 char *in = buf + sizeof(buf);
196b62db 2861 UV auv = SvUV(fromstr);
a6ec74c1
JH
2862
2863 do {
eb160463 2864 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2865 auv >>= 7;
2866 } while (auv);
2867 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2868 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2869 in, (buf + sizeof(buf)) - in);
2870 } else if (SvPOKp(fromstr))
2871 goto w_string;
a6ec74c1 2872 else if (SvNOKp(fromstr)) {
0258719b 2873 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2874 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2875 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2876 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2877 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2878 Some C compilers are strict about integral constant
2879 expressions so we conservatively divide by a slightly
2880 smaller integer instead of multiplying by the exact
2881 floating-point value.
0258719b
NC
2882 */
2883#ifdef NV_MAX_10_EXP
f337b084 2884 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2885 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2886#else
f337b084 2887 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2888 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2889#endif
a6ec74c1
JH
2890 char *in = buf + sizeof(buf);
2891
8b6e33c7 2892 anv = Perl_floor(anv);
a6ec74c1 2893 do {
8b6e33c7 2894 const NV next = Perl_floor(anv / 128);
a6ec74c1 2895 if (in <= buf) /* this cannot happen ;-) */
49704364 2896 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2897 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2898 anv = next;
2899 } while (anv > 0);
a6ec74c1 2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2903 } else {
8b6e33c7
AL
2904 const char *from;
2905 char *result, *in;
735b914b
JH
2906 SV *norm;
2907 STRLEN len;
2908 bool done;
2909
f337b084 2910 w_string:
735b914b 2911 /* Copy string and check for compliance */
349d4f2f 2912 from = SvPV_const(fromstr, len);
735b914b 2913 if ((norm = is_an_int(from, len)) == NULL)
49704364 2914 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2915
a02a5408 2916 Newx(result, len, char);
735b914b
JH
2917 in = result + len;
2918 done = FALSE;
f337b084 2919 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2920 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2921 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2922 in, (result + len) - in);
735b914b
JH
2923 Safefree(result);
2924 SvREFCNT_dec(norm); /* free norm */
fc241834 2925 }
a6ec74c1
JH
2926 }
2927 break;
2928 case 'i':
49704364 2929 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2930 while (len-- > 0) {
f337b084 2931 int aint;
a6ec74c1
JH
2932 fromstr = NEXTFROM;
2933 aint = SvIV(fromstr);
2b4ad569 2934 DO_BO_PACK(aint);
f337b084 2935 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
2936 }
2937 break;
068bd2e7 2938 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2939 case 'N':
2940 while (len-- > 0) {
f337b084 2941 U32 au32;
a6ec74c1 2942 fromstr = NEXTFROM;
ef108786 2943 au32 = SvUV(fromstr);
ef108786 2944 au32 = PerlSock_htonl(au32);
f337b084 2945 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2946 }
2947 break;
068bd2e7 2948 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2949 case 'V':
2950 while (len-- > 0) {
f337b084 2951 U32 au32;
a6ec74c1 2952 fromstr = NEXTFROM;
ef108786 2953 au32 = SvUV(fromstr);
ef108786 2954 au32 = htovl(au32);
f337b084 2955 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2956 }
2957 break;
49704364 2958 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2959#if LONGSIZE != SIZE32
fc241834 2960 while (len-- > 0) {
f337b084 2961 unsigned long aulong;
fc241834
RGS
2962 fromstr = NEXTFROM;
2963 aulong = SvUV(fromstr);
2b4ad569 2964 DO_BO_PACK(aulong);
f337b084 2965 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 2966 }
49704364
LW
2967 break;
2968#else
2969 /* Fall though! */
a6ec74c1 2970#endif
49704364 2971 case 'L':
fc241834 2972 while (len-- > 0) {
f337b084 2973 U32 au32;
fc241834
RGS
2974 fromstr = NEXTFROM;
2975 au32 = SvUV(fromstr);
2b4ad569 2976 DO_BO_PACK(au32);
f337b084 2977 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2978 }
2979 break;
49704364 2980 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2981#if LONGSIZE != SIZE32
fc241834 2982 while (len-- > 0) {
f337b084 2983 long along;
fc241834
RGS
2984 fromstr = NEXTFROM;
2985 along = SvIV(fromstr);
2b4ad569 2986 DO_BO_PACK(along);
f337b084 2987 PUSH_VAR(utf8, cur, along);
a6ec74c1 2988 }
49704364
LW
2989 break;
2990#else
2991 /* Fall though! */
a6ec74c1 2992#endif
49704364
LW
2993 case 'l':
2994 while (len-- > 0) {
f337b084 2995 I32 ai32;
49704364 2996 fromstr = NEXTFROM;
ef108786 2997 ai32 = SvIV(fromstr);
2b4ad569 2998 DO_BO_PACK(ai32);
f337b084 2999 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3000 }
3001 break;
3002#ifdef HAS_QUAD
3003 case 'Q':
3004 while (len-- > 0) {
f337b084 3005 Uquad_t auquad;
a6ec74c1 3006 fromstr = NEXTFROM;
f337b084 3007 auquad = (Uquad_t) SvUV(fromstr);
2b4ad569 3008 DO_BO_PACK(auquad);
f337b084 3009 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3010 }
3011 break;
3012 case 'q':
3013 while (len-- > 0) {
f337b084 3014 Quad_t aquad;
a6ec74c1
JH
3015 fromstr = NEXTFROM;
3016 aquad = (Quad_t)SvIV(fromstr);
2b4ad569 3017 DO_BO_PACK(aquad);
f337b084 3018 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3019 }
3020 break;
f337b084 3021#endif /* HAS_QUAD */
a6ec74c1
JH
3022 case 'P':
3023 len = 1; /* assume SV is correct length */
f337b084 3024 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3025 /* Fall through! */
a6ec74c1
JH
3026 case 'p':
3027 while (len-- > 0) {
83003860 3028 const char *aptr;
f337b084 3029
a6ec74c1 3030 fromstr = NEXTFROM;
28a4f200 3031 SvGETMAGIC(fromstr);
3032 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3033 else {
a6ec74c1
JH
3034 /* XXX better yet, could spirit away the string to
3035 * a safe spot and hang on to it until the result
3036 * of pack() (and all copies of the result) are
3037 * gone.
3038 */
041457d9 3039 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3040 !SvREADONLY(fromstr)))) {
3041 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3042 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3043 }
3044 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3045 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3046 else
2596d9fe 3047 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3048 }
2b4ad569 3049 DO_BO_PACK(aptr);
f337b084 3050 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3051 }
3052 break;
fc241834 3053 case 'u': {
f7fe979e 3054 const char *aptr, *aend;
fc241834 3055 bool from_utf8;
f337b084 3056
a6ec74c1 3057 fromstr = NEXTFROM;
fc241834
RGS
3058 if (len <= 2) len = 45;
3059 else len = len / 3 * 3;
3060 if (len >= 64) {
a2a5de95
NC
3061 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3062 "Field too wide in 'u' format in pack");
fc241834
RGS
3063 len = 63;
3064 }
83003860 3065 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3066 from_utf8 = DO_UTF8(fromstr);
3067 if (from_utf8) {
3068 aend = aptr + fromlen;
3f63b0e5 3069 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3070 } else aend = NULL; /* Unused, but keep compilers happy */
3071 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3072 while (fromlen > 0) {
fc241834 3073 U8 *end;
a6ec74c1 3074 I32 todo;
fc241834 3075 U8 hunk[1+63/3*4+1];
a6ec74c1 3076
eb160463 3077 if ((I32)fromlen > len)
a6ec74c1
JH
3078 todo = len;
3079 else
3080 todo = fromlen;
fc241834
RGS
3081 if (from_utf8) {
3082 char buffer[64];
3083 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3084 'u' | TYPE_IS_PACK)) {
3085 *cur = '\0';
b162af07 3086 SvCUR_set(cat, cur - start);
5637ef5b
NC
3087 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3088 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3089 aptr, aend, buffer, (long) todo);
fc241834
RGS
3090 }
3091 end = doencodes(hunk, buffer, todo);
3092 } else {
3093 end = doencodes(hunk, aptr, todo);
3094 aptr += todo;
3095 }
3096 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3097 fromlen -= todo;
3098 }
a6ec74c1
JH
3099 break;
3100 }
f337b084
TH
3101 }
3102 *cur = '\0';
b162af07 3103 SvCUR_set(cat, cur - start);
f337b084 3104 no_change:
49704364 3105 *symptr = lookahead;
a6ec74c1 3106 }
49704364 3107 return beglist;
18529408
IZ
3108}
3109#undef NEXTFROM
3110
3111
3112PP(pp_pack)
3113{
97aff369 3114 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3115 SV *cat = TARG;
18529408 3116 STRLEN fromlen;
349d4f2f 3117 SV *pat_sv = *++MARK;
eb578fdb
KW
3118 const char *pat = SvPV_const(pat_sv, fromlen);
3119 const char *patend = pat + fromlen;
18529408
IZ
3120
3121 MARK++;
76f68e9b 3122 sv_setpvs(cat, "");
f337b084 3123 SvUTF8_off(cat);
18529408 3124
7accc089 3125 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3126
a6ec74c1
JH
3127 SvSETMAGIC(cat);
3128 SP = ORIGMARK;
3129 PUSHs(cat);
3130 RETURN;
3131}
a6ec74c1 3132
73cb7263
NC
3133/*
3134 * Local variables:
3135 * c-indentation-style: bsd
3136 * c-basic-offset: 4
14d04a33 3137 * indent-tabs-mode: nil
73cb7263
NC
3138 * End:
3139 *
14d04a33 3140 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3141 */