This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update experimental to CPAN version 0.012
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
3a88beaa
NC
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 136
a1219b5e
NC
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
20aa3a38
NC
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
a1219b5e
NC
148#endif
149
08ca2aa3 150/* Only to be used inside a loop (see the break) */
aaec8192 151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 152STMT_START { \
228e69a7 153 if (UNLIKELY(utf8)) { \
f337b084 154 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 155 (char *) (buf), len, datumtype)) break; \
08ca2aa3 156 } else { \
228e69a7 157 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
275663fa 161 s += len; \
08ca2aa3 162 } \
08ca2aa3
TH
163} STMT_END
164
aaec8192
NC
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 167
aaec8192
NC
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 170
aaec8192
NC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 173
3a88beaa
NC
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e
NC
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
78d46eaa 244#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
246#define PACK_SIZE_MASK 0x3F
247
298bc19c 248#include "packsizetables.c"
78d46eaa 249
20aa3a38
NC
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
08ca2aa3 258STATIC U8
f7fe979e 259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 260{
08ca2aa3 261 STRLEN retlen;
0bcc34c2 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 264 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 270 if (val >= 0x100) {
a2a5de95
NC
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
274 val &= 0xff;
275 }
276 *s += retlen;
fe2774ed 277 return (U8)val;
08ca2aa3
TH
278}
279
f337b084
TH
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
282 *(U8 *)(s)++)
283
284STATIC bool
f7fe979e 285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
286{
287 UV val;
288 STRLEN retlen;
f7fe979e 289 const char *from = *s;
08ca2aa3 290 int bad = 0;
f7fe979e 291 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
228e69a7 295 if (UNLIKELY(needs_swap))
20aa3a38
NC
296 buf += buf_len;
297
08ca2aa3
TH
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
f337b084 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
228e69a7 309 if (UNLIKELY(needs_swap))
20aa3a38
NC
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
f7fe979e 318 const char *ptr;
9e27e96a 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
c80e42f3 322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
323 }
324 if (from > end) from = end;
325 }
a2a5de95
NC
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 328 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
f7fe979e 338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 339{
08ca2aa3 340 STRLEN retlen;
0bcc34c2 341 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
342 if (val >= 0x100 || !ISUUCHAR(val) ||
343 retlen == (STRLEN) -1 || retlen == 0) {
344 *out = 0;
345 return FALSE;
346 }
347 *out = PL_uudmap[val] & 077;
f337b084 348 *s += retlen;
08ca2aa3
TH
349 return TRUE;
350}
78d46eaa 351
64844641 352STATIC char *
3a88beaa 353S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
7918f24d
NC
354 PERL_ARGS_ASSERT_BYTES_TO_UNI;
355
228e69a7 356 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
357 const U8 *p = start + len;
358 while (p-- > start) {
55d09dc8 359 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
360 }
361 } else {
362 const U8 * const end = start + len;
363 while (start < end) {
55d09dc8 364 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
365 start++;
366 }
f337b084 367 }
64844641 368 return dest;
f337b084
TH
369}
370
3a88beaa 371#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 372STMT_START { \
228e69a7 373 if (UNLIKELY(utf8)) \
3a88beaa 374 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
230e1fce 375 else { \
228e69a7 376 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
377 S_reverse_copy((char *)(buf), cur, len); \
378 else \
379 Copy(buf, cur, len, char); \
230e1fce
NC
380 (cur) += (len); \
381 } \
f337b084
TH
382} STMT_END
383
384#define GROWING(utf8, cat, start, cur, in_len) \
385STMT_START { \
386 STRLEN glen = (in_len); \
3473cf63 387 if (utf8) glen *= UTF8_EXPAND; \
f337b084 388 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 389 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
390 (cur) = (start) + SvCUR(cat); \
391 } \
392} STMT_END
393
394#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
395STMT_START { \
f7fe979e 396 const STRLEN glen = (in_len); \
f337b084 397 STRLEN gl = glen; \
3473cf63 398 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
399 if ((cur) + gl >= (start) + SvLEN(cat)) { \
400 *cur = '\0'; \
b162af07 401 SvCUR_set((cat), (cur) - (start)); \
0bd48802 402 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
403 (cur) = (start) + SvCUR(cat); \
404 } \
3a88beaa 405 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
406} STMT_END
407
408#define PUSH_BYTE(utf8, s, byte) \
409STMT_START { \
410 if (utf8) { \
f7fe979e 411 const U8 au8 = (byte); \
3a88beaa 412 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
f337b084
TH
413 } else *(U8 *)(s)++ = (byte); \
414} STMT_END
415
416/* Only to be used inside a loop (see the break) */
417#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
418STMT_START { \
419 STRLEN retlen; \
420 if (str >= end) break; \
421 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
422 if (retlen == (STRLEN) -1 || retlen == 0) { \
423 *cur = '\0'; \
424 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
425 } \
426 str += retlen; \
427} STMT_END
428
f7fe979e
AL
429static const char *_action( const tempsym_t* symptr )
430{
10edeb5d 431 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
432}
433
206947d2 434/* Returns the sizeof() struct described by pat */
028d1f6d 435STATIC I32
f337b084 436S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 437{
f337b084 438 I32 total = 0;
206947d2 439
7918f24d
NC
440 PERL_ARGS_ASSERT_MEASURE_STRUCT;
441
49704364 442 while (next_symbol(symptr)) {
f337b084 443 I32 len;
f7fe979e 444 int size;
f337b084
TH
445
446 switch (symptr->howlen) {
fc241834 447 case e_star:
49704364 448 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 449 _action( symptr ) );
81d52ecd 450
f337b084
TH
451 default:
452 /* e_no_len and e_number */
453 len = symptr->length;
454 break;
49704364
WL
455 }
456
a7a3cfaa 457 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 458 if (!size) {
f7fe979e 459 int star;
80a13697
NC
460 /* endianness doesn't influence the size of a type */
461 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
462 default:
463 Perl_croak(aTHX_ "Invalid type '%c' in %s",
464 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 465 _action( symptr ) );
28be1210
TH
466 case '.' | TYPE_IS_SHRIEKING:
467 case '@' | TYPE_IS_SHRIEKING:
80a13697 468 case '@':
28be1210 469 case '.':
80a13697
NC
470 case '/':
471 case 'U': /* XXXX Is it correct? */
472 case 'w':
473 case 'u':
474 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 475 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 476 _action( symptr ) );
80a13697
NC
477 case '%':
478 size = 0;
479 break;
480 case '(':
fc241834
RGS
481 {
482 tempsym_t savsym = *symptr;
483 symptr->patptr = savsym.grpbeg;
484 symptr->patend = savsym.grpend;
485 /* XXXX Theoretically, we need to measure many times at
486 different positions, since the subexpression may contain
487 alignment commands, but be not of aligned length.
488 Need to detect this and croak(). */
489 size = measure_struct(symptr);
490 *symptr = savsym;
491 break;
492 }
80a13697
NC
493 case 'X' | TYPE_IS_SHRIEKING:
494 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
495 */
496 if (!len) /* Avoid division by 0 */
497 len = 1;
498 len = total % len; /* Assumed: the start is aligned. */
924ba076 499 /* FALLTHROUGH */
80a13697
NC
500 case 'X':
501 size = -1;
502 if (total < len)
f7fe979e 503 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
504 break;
505 case 'x' | TYPE_IS_SHRIEKING:
506 if (!len) /* Avoid division by 0 */
507 len = 1;
508 star = total % len; /* Assumed: the start is aligned. */
509 if (star) /* Other portable ways? */
510 len = len - star;
511 else
512 len = 0;
924ba076 513 /* FALLTHROUGH */
80a13697
NC
514 case 'x':
515 case 'A':
516 case 'Z':
517 case 'a':
80a13697
NC
518 size = 1;
519 break;
520 case 'B':
521 case 'b':
522 len = (len + 7)/8;
523 size = 1;
524 break;
525 case 'H':
526 case 'h':
527 len = (len + 1)/2;
528 size = 1;
529 break;
78d46eaa 530
80a13697
NC
531 case 'P':
532 len = 1;
533 size = sizeof(char*);
78d46eaa
NC
534 break;
535 }
206947d2
IZ
536 }
537 total += len * size;
538 }
539 return total;
540}
541
49704364
WL
542
543/* locate matching closing parenthesis or bracket
544 * returns char pointer to char after match, or NULL
545 */
f7fe979e 546STATIC const char *
5aaab254 547S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 548{
7918f24d
NC
549 PERL_ARGS_ASSERT_GROUP_END;
550
49704364 551 while (patptr < patend) {
f7fe979e 552 const char c = *patptr++;
49704364
WL
553
554 if (isSPACE(c))
555 continue;
556 else if (c == ender)
557 return patptr-1;
558 else if (c == '#') {
559 while (patptr < patend && *patptr != '\n')
560 patptr++;
561 continue;
562 } else if (c == '(')
563 patptr = group_end(patptr, patend, ')') + 1;
564 else if (c == '[')
565 patptr = group_end(patptr, patend, ']') + 1;
18529408 566 }
49704364
WL
567 Perl_croak(aTHX_ "No group ending character '%c' found in template",
568 ender);
a25b5927 569 NOT_REACHED; /* NOTREACHED */
18529408
IZ
570}
571
49704364
WL
572
573/* Convert unsigned decimal number to binary.
574 * Expects a pointer to the first digit and address of length variable
575 * Advances char pointer to 1st non-digit char and returns number
fc241834 576 */
f7fe979e 577STATIC const char *
5aaab254 578S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
579{
580 I32 len = *patptr++ - '0';
7918f24d
NC
581
582 PERL_ARGS_ASSERT_GET_NUM;
583
49704364
WL
584 while (isDIGIT(*patptr)) {
585 if (len >= 0x7FFFFFFF/10)
586 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
587 len = (len * 10) + (*patptr++ - '0');
588 }
589 *lenptr = len;
590 return patptr;
591}
592
593/* The marvellous template parsing routine: Using state stored in *symptr,
594 * locates next template code and count
595 */
596STATIC bool
f337b084 597S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 598{
f7fe979e 599 const char* patptr = symptr->patptr;
0bcc34c2 600 const char* const patend = symptr->patend;
49704364 601
7918f24d
NC
602 PERL_ARGS_ASSERT_NEXT_SYMBOL;
603
49704364
WL
604 symptr->flags &= ~FLAG_SLASH;
605
606 while (patptr < patend) {
607 if (isSPACE(*patptr))
608 patptr++;
609 else if (*patptr == '#') {
610 patptr++;
611 while (patptr < patend && *patptr != '\n')
612 patptr++;
613 if (patptr < patend)
614 patptr++;
615 } else {
fc241834 616 /* We should have found a template code */
49704364 617 I32 code = *patptr++ & 0xFF;
66c611c5 618 U32 inherited_modifiers = 0;
49704364
WL
619
620 if (code == ','){ /* grandfather in commas but with a warning */
621 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
622 symptr->flags |= FLAG_COMMA;
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 624 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
625 }
626 continue;
627 }
fc241834 628
49704364 629 /* for '(', skip to ')' */
fc241834 630 if (code == '(') {
49704364
WL
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 633 _action( symptr ) );
49704364
WL
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 638 _action( symptr ) );
49704364
WL
639 }
640
66c611c5
MHM
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
645 }
646
1109a392
MHM
647 /* look for modifiers */
648 while (patptr < patend) {
b7787f18
AL
649 const char *allowed;
650 I32 modifier;
1109a392
MHM
651 switch (*patptr) {
652 case '!':
653 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 654 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
655 break;
656 case '>':
657 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 658 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
659 break;
660 case '<':
661 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 662 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
663 break;
664 default:
b7787f18
AL
665 allowed = "";
666 modifier = 0;
1109a392
MHM
667 break;
668 }
66c611c5 669
1109a392
MHM
670 if (modifier == 0)
671 break;
66c611c5 672
1109a392
MHM
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 675 allowed, _action( symptr ) );
66c611c5
MHM
676
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 683 *patptr, _action( symptr ) );
66c611c5 684
a2a5de95
NC
685 if ((code & modifier)) {
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
689 _action( symptr ) );
1109a392 690 }
66c611c5 691
1109a392
MHM
692 code |= modifier;
693 patptr++;
49704364
WL
694 }
695
66c611c5
MHM
696 /* inherit modifiers */
697 code |= inherited_modifiers;
698
fc241834 699 /* look for count and/or / */
49704364
WL
700 if (patptr < patend) {
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
703 symptr->howlen = e_number;
704
705 } else if (*patptr == '*') {
706 patptr++;
707 symptr->howlen = e_star;
708
709 } else if (*patptr == '[') {
f7fe979e 710 const char* lenptr = ++patptr;
49704364
WL
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
716 if( *lenptr != ']' )
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 718 _action( symptr ) );
49704364
WL
719 } else {
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
724 *symptr = savsym;
725 }
726 } else {
727 symptr->howlen = e_no_len;
728 symptr->length = 1;
729 }
730
731 /* try to find / */
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
734 patptr++;
735 else if (*patptr == '#') {
736 patptr++;
737 while (patptr < patend && *patptr != '\n')
738 patptr++;
739 if (patptr < patend)
740 patptr++;
741 } else {
66c611c5 742 if (*patptr == '/') {
49704364
WL
743 symptr->flags |= FLAG_SLASH;
744 patptr++;
66c611c5
MHM
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 748 _action( symptr ) );
49704364
WL
749 }
750 break;
751 }
18529408 752 }
49704364
WL
753 } else {
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
756 symptr->length = 1;
757 }
758
759 symptr->code = code;
fc241834 760 symptr->patptr = patptr;
49704364 761 return TRUE;
18529408 762 }
49704364 763 }
fc241834 764 symptr->patptr = patptr;
49704364 765 return FALSE;
18529408
IZ
766}
767
18529408 768/*
fc241834 769 There is no way to cleanly handle the case where we should process the
08ca2aa3 770 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
775 themselves if they need to do a lot of unpacks like this on it
776*/
fc241834 777STATIC bool
08ca2aa3
TH
778need_utf8(const char *pat, const char *patend)
779{
780 bool first = TRUE;
7918f24d
NC
781
782 PERL_ARGS_ASSERT_NEED_UTF8;
783
08ca2aa3
TH
784 while (pat < patend) {
785 if (pat[0] == '#') {
786 pat++;
f7fe979e 787 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
792 pat++;
793 }
794 return FALSE;
795}
796
797STATIC char
798first_symbol(const char *pat, const char *patend) {
7918f24d
NC
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
800
08ca2aa3
TH
801 while (pat < patend) {
802 if (pat[0] != '#') return pat[0];
803 pat++;
f7fe979e 804 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
805 if (!pat) return 0;
806 pat++;
807 }
808 return 0;
809}
810
811/*
dcccc8ff
KW
812
813=head1 Pack and Unpack
814
7accc089
JH
815=for apidoc unpackstring
816
21ebfc7a
DM
817The engine implementing the unpack() Perl function.
818
819Using the template pat..patend, this function unpacks the string
820s..strend into a number of mortal SVs, which it pushes onto the perl
821argument (@_) stack (so you will need to issue a C<PUTBACK> before and
72d33970 822C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
823pushed elements.
824
825The strend and patend pointers should point to the byte following the last
826character of each string.
827
828Although this function returns its values on the perl argument stack, it
829doesn't take any parameters from that stack (and thus in particular
830there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
831example).
7accc089
JH
832
833=cut */
834
835I32
f7fe979e 836Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 837{
f7fe979e 838 tempsym_t sym;
08ca2aa3 839
7918f24d
NC
840 PERL_ARGS_ASSERT_UNPACKSTRING;
841
f337b084 842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
230e1fce 847 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
848 SAVEFREEPV(s);
849 strend = s + len;
f337b084 850 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
851 }
852
f337b084
TH
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
08ca2aa3 855
f7fe979e 856 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
857
858 return unpack_rec(&sym, s, s, strend, NULL );
859}
860
4136a0f7 861STATIC I32
f7fe979e 862S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 863{
20b7effb 864 dSP;
3297d27d 865 SV *sv = NULL;
f7fe979e 866 const I32 start_sp_offset = SP - PL_stack_base;
49704364 867 howlen_t howlen;
a6ec74c1 868 I32 checksum = 0;
92d41999 869 UV cuv = 0;
a6ec74c1 870 NV cdouble = 0.0;
f337b084 871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 872 bool beyond = FALSE;
21c16052 873 bool explicit_length;
9e27e96a 874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
876
877 PERL_ARGS_ASSERT_UNPACK_REC;
878
28be1210 879 symptr->strbeg = s - strbeg;
49704364 880
49704364 881 while (next_symbol(symptr)) {
a7a3cfaa 882 packprops_t props;
9e27e96a 883 I32 len;
f337b084 884 I32 datumtype = symptr->code;
a1219b5e 885 bool needs_swap;
206947d2 886 /* do first one only unless in list context
08ca2aa3 887 / is implemented by unpacking the count, then popping it from the
206947d2 888 stack, so must check that we're not in the middle of a / */
49704364 889 if ( unpack_only_one
206947d2 890 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 891 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 892 break;
49704364 893
f337b084 894 switch (howlen = symptr->howlen) {
fc241834
RGS
895 case e_star:
896 len = strend - strbeg; /* long enough */
49704364 897 break;
f337b084
TH
898 default:
899 /* e_no_len and e_number */
900 len = symptr->length;
901 break;
49704364 902 }
18529408 903
21c16052 904 explicit_length = TRUE;
a6ec74c1 905 redo_switch:
49704364 906 beyond = s >= strend;
a7a3cfaa
TH
907
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
909 if (props) {
910 /* props nonzero means we can process this letter. */
9e27e96a
AL
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
a7a3cfaa
TH
913 if (len > howmany)
914 len = howmany;
915
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
918 EXTEND(SP, len);
919 EXTEND_MORTAL(len);
78d46eaa
NC
920 }
921 }
a7a3cfaa 922
a1219b5e
NC
923 needs_swap = NEEDS_SWAP(datumtype);
924
1109a392 925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 926 default:
1109a392 927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 928
a6ec74c1 929 case '%':
49704364 930 if (howlen == e_no_len)
18529408 931 len = 16; /* len is not specified */
a6ec74c1 932 checksum = len;
92d41999 933 cuv = 0;
a6ec74c1 934 cdouble = 0;
18529408 935 continue;
81d52ecd 936
18529408
IZ
937 case '(':
938 {
49704364 939 tempsym_t savsym = *symptr;
9e27e96a 940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 941 symptr->flags |= group_modifiers;
49704364 942 symptr->patend = savsym.grpend;
28be1210 943 symptr->previous = &savsym;
49704364 944 symptr->level++;
18529408 945 PUTBACK;
c6f750d1 946 if (len && unpack_only_one) len = 1;
18529408 947 while (len--) {
49704364 948 symptr->patptr = savsym.grpbeg;
f337b084
TH
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
49704364 953 break; /* No way to continue */
18529408
IZ
954 }
955 SPAGAIN;
28be1210 956 savsym.flags = symptr->flags & ~group_modifiers;
49704364 957 *symptr = savsym;
18529408
IZ
958 break;
959 }
28be1210 960 case '.' | TYPE_IS_SHRIEKING:
28be1210 961 case '.': {
9e27e96a 962 const char *from;
28be1210 963 SV *sv;
9e27e96a 964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
967 else {
968 tempsym_t *group = symptr;
969
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
972 }
973 sv = from <= s ?
00646304
CB
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 976 mXPUSHs(sv);
28be1210
TH
977 break;
978 }
28be1210 979 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 980 case '@':
28be1210 981 s = strbeg + symptr->strbeg;
28be1210 982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 983 {
08ca2aa3
TH
984 while (len > 0) {
985 if (s >= strend)
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
987 s += UTF8SKIP(s);
988 len--;
989 }
990 if (s > strend)
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 } else {
28be1210 993 if (strend-s < len)
fc241834 994 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 995 s += len;
08ca2aa3 996 }
a6ec74c1 997 break;
62f95557
IZ
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1000 len = 1;
08ca2aa3 1001 if (utf8) {
f7fe979e 1002 const char *hop, *last;
f337b084
TH
1003 I32 l = len;
1004 hop = last = strbeg;
1005 while (hop < s) {
1006 hop += UTF8SKIP(hop);
1007 if (--l == 0) {
08ca2aa3 1008 last = hop;
f337b084
TH
1009 l = len;
1010 }
fc241834 1011 }
f337b084
TH
1012 if (last > s)
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1014 s = last;
1015 break;
f337b084
TH
1016 }
1017 len = (s - strbeg) % len;
924ba076 1018 /* FALLTHROUGH */
a6ec74c1 1019 case 'X':
08ca2aa3
TH
1020 if (utf8) {
1021 while (len > 0) {
1022 if (s <= strbeg)
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1025 if (s <= strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1027 }
1028 len--;
1029 }
1030 } else {
fc241834
RGS
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033 s -= len;
08ca2aa3 1034 }
a6ec74c1 1035 break;
9e27e96a
AL
1036 case 'x' | TYPE_IS_SHRIEKING: {
1037 I32 ai32;
62f95557
IZ
1038 if (!len) /* Avoid division by 0 */
1039 len = 1;
230e1fce
NC
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1042 if (ai32 == 0) break;
1043 len -= ai32;
9e27e96a 1044 }
924ba076 1045 /* FALLTHROUGH */
a6ec74c1 1046 case 'x':
08ca2aa3
TH
1047 if (utf8) {
1048 while (len>0) {
1049 if (s >= strend)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += UTF8SKIP(s);
1052 len--;
1053 }
1054 } else {
fc241834
RGS
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1057 s += len;
f337b084 1058 }
a6ec74c1
JH
1059 break;
1060 case '/':
49704364 1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1062
a6ec74c1
JH
1063 case 'A':
1064 case 'Z':
1065 case 'a':
08ca2aa3
TH
1066 if (checksum) {
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1069 goto W_checksum;
1070 }
1071 if (utf8) {
1072 I32 l;
f7fe979e 1073 const char *hop;
08ca2aa3
TH
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1076 if (hop > strend)
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 break;
fc241834 1079 }
a6ec74c1 1080 }
08ca2aa3
TH
1081 if (hop > strend)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 len = hop - s;
1084 } else if (len > strend - s)
1085 len = strend - s;
1086
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
f7fe979e 1089 const char *ptr, *end;
f337b084
TH
1090 end = s + len;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
f7fe979e 1097 const char *ptr;
18bdf90a
TH
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1101 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 else ptr++;
28be1210 1104 if (ptr > s+len)
18bdf90a
TH
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1106 } else {
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 ptr++;
1110 }
08ca2aa3
TH
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1113
1114 if (utf8) {
1115 SvUTF8_on(sv);
1116 /* Undo any upgrade done due to need_utf8() */
f337b084 1117 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1118 sv_utf8_downgrade(sv, 0);
a6ec74c1 1119 }
6e449a3a 1120 mXPUSHs(sv);
08ca2aa3 1121 s += len;
a6ec74c1
JH
1122 break;
1123 case 'B':
08ca2aa3
TH
1124 case 'b': {
1125 char *str;
49704364 1126 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1127 len = (strend - s) * 8;
1128 if (checksum) {
f337b084 1129 if (utf8)
08ca2aa3 1130 while (len >= 8 && s < strend) {
f337b084 1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1132 len -= 8;
1133 }
f337b084 1134 else
fc241834 1135 while (len >= 8) {
08ca2aa3 1136 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1137 len -= 8;
1138 }
08ca2aa3
TH
1139 if (len && s < strend) {
1140 U8 bits;
f337b084
TH
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
a6ec74c1 1143 while (len-- > 0) {
92d41999 1144 if (bits & 1) cuv++;
a6ec74c1
JH
1145 bits >>= 1;
1146 }
f337b084 1147 else
a6ec74c1 1148 while (len-- > 0) {
08ca2aa3 1149 if (bits & 0x80) cuv++;
a6ec74c1
JH
1150 bits <<= 1;
1151 }
fc241834 1152 }
a6ec74c1
JH
1153 break;
1154 }
08ca2aa3 1155
561b68a9 1156 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1157 SvPOK_on(sv);
1158 str = SvPVX(sv);
1159 if (datumtype == 'b') {
f337b084 1160 U8 bits = 0;
f7fe979e 1161 const I32 ai32 = len;
08ca2aa3
TH
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1164 else if (utf8) {
1165 if (s >= strend) break;
f337b084 1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1169 }
08ca2aa3 1170 } else {
f337b084 1171 U8 bits = 0;
f7fe979e 1172 const I32 ai32 = len;
08ca2aa3
TH
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1175 else if (utf8) {
1176 if (s >= strend) break;
f337b084 1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1180 }
1181 }
1182 *str = '\0';
aa07b2f6 1183 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1184 XPUSHs(sv);
a6ec74c1 1185 break;
08ca2aa3 1186 }
a6ec74c1 1187 case 'H':
08ca2aa3 1188 case 'h': {
3297d27d 1189 char *str = NULL;
fc241834 1190 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1191 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1192 len = (strend - s) * 2;
858fe5e1
TC
1193 if (!checksum) {
1194 sv = sv_2mortal(newSV(len ? len : 1));
1195 SvPOK_on(sv);
1196 str = SvPVX(sv);
1197 }
a6ec74c1 1198 if (datumtype == 'h') {
f337b084 1199 U8 bits = 0;
9e27e96a 1200 I32 ai32 = len;
fc241834
RGS
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1203 else if (utf8) {
1204 if (s >= strend) break;
f337b084 1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1206 } else bits = * (U8 *) s++;
858fe5e1
TC
1207 if (!checksum)
1208 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1209 }
08ca2aa3 1210 } else {
f337b084 1211 U8 bits = 0;
f7fe979e 1212 const I32 ai32 = len;
08ca2aa3
TH
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1215 else if (utf8) {
1216 if (s >= strend) break;
f337b084 1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1218 } else bits = *(U8 *) s++;
858fe5e1
TC
1219 if (!checksum)
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1221 }
1222 }
858fe5e1
TC
1223 if (!checksum) {
1224 *str = '\0';
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1226 XPUSHs(sv);
1227 }
a6ec74c1 1228 break;
08ca2aa3 1229 }
1651fc44
ML
1230 case 'C':
1231 if (len == 0) {
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1235 break;
1236 }
924ba076 1237 /* FALLTHROUGH */
a6ec74c1 1238 case 'c':
1651fc44
ML
1239 while (len-- > 0 && s < strend) {
1240 int aint;
1241 if (utf8)
1242 {
1243 STRLEN retlen;
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 }
1250 else
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1253 aint -= 256;
08ca2aa3 1254 if (!checksum)
6e449a3a 1255 mPUSHi(aint);
73cb7263
NC
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1258 else
1259 cuv += aint;
a6ec74c1
JH
1260 }
1261 break;
08ca2aa3
TH
1262 case 'W':
1263 W_checksum:
1651fc44 1264 if (utf8) {
08ca2aa3 1265 while (len-- > 0 && s < strend) {
08ca2aa3 1266 STRLEN retlen;
f7fe979e 1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 s += retlen;
1272 if (!checksum)
6e449a3a 1273 mPUSHu(val);
08ca2aa3
TH
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
d6d3e8bd 1276 else
08ca2aa3 1277 cuv += val;
fc241834 1278 }
08ca2aa3 1279 } else if (!checksum)
a6ec74c1 1280 while (len-- > 0) {
f7fe979e 1281 const U8 ch = *(U8 *) s++;
6e449a3a 1282 mPUSHu(ch);
a6ec74c1 1283 }
08ca2aa3
TH
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1286 else
1287 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1288 break;
1289 case 'U':
35bcd338 1290 if (len == 0) {
c5333953 1291 if (explicit_length && howlen != e_star) {
08ca2aa3 1292 /* Switch to "bytes in UTF-8" mode */
f337b084 1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1294 else
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1297 }
35bcd338
JH
1298 break;
1299 }
08ca2aa3 1300 if (len > strend - s) len = strend - s;
fc241834 1301 if (!checksum) {
08ca2aa3
TH
1302 if (len && unpack_only_one) len = 1;
1303 EXTEND(SP, len);
1304 EXTEND_MORTAL(len);
fc241834 1305 }
08ca2aa3
TH
1306 while (len-- > 0 && s < strend) {
1307 STRLEN retlen;
1308 UV auv;
1309 if (utf8) {
1310 U8 result[UTF8_MAXLEN];
f7fe979e 1311 const char *ptr = s;
08ca2aa3 1312 STRLEN len;
08ca2aa3
TH
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
230e1fce
NC
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 'U'))
08ca2aa3
TH
1317 break;
1318 len = UTF8SKIP(result);
fc241834 1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1320 (char *) &result[1], len-1, 'U')) break;
c80e42f3 1321 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1322 s = ptr;
1323 } else {
c80e42f3 1324 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1325 if (retlen == (STRLEN) -1 || retlen == 0)
1326 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1327 s += retlen;
1328 }
1329 if (!checksum)
6e449a3a 1330 mPUSHu(auv);
73cb7263 1331 else if (checksum > bits_in_uv)
08ca2aa3 1332 cdouble += (NV) auv;
73cb7263 1333 else
08ca2aa3 1334 cuv += auv;
a6ec74c1
JH
1335 }
1336 break;
49704364
WL
1337 case 's' | TYPE_IS_SHRIEKING:
1338#if SHORTSIZE != SIZE16
73cb7263 1339 while (len-- > 0) {
08ca2aa3 1340 short ashort;
aaec8192 1341 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1342 if (!checksum)
6e449a3a 1343 mPUSHi(ashort);
73cb7263
NC
1344 else if (checksum > bits_in_uv)
1345 cdouble += (NV)ashort;
1346 else
1347 cuv += ashort;
49704364
WL
1348 }
1349 break;
1350#else
924ba076 1351 /* FALLTHROUGH */
a6ec74c1 1352#endif
49704364 1353 case 's':
73cb7263 1354 while (len-- > 0) {
08ca2aa3
TH
1355 I16 ai16;
1356
1357#if U16SIZE > SIZE16
1358 ai16 = 0;
1359#endif
aaec8192 1360 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1361#if U16SIZE > SIZE16
73cb7263
NC
1362 if (ai16 > 32767)
1363 ai16 -= 65536;
a6ec74c1 1364#endif
08ca2aa3 1365 if (!checksum)
6e449a3a 1366 mPUSHi(ai16);
73cb7263
NC
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1369 else
1370 cuv += ai16;
a6ec74c1
JH
1371 }
1372 break;
49704364
WL
1373 case 'S' | TYPE_IS_SHRIEKING:
1374#if SHORTSIZE != SIZE16
73cb7263 1375 while (len-- > 0) {
08ca2aa3 1376 unsigned short aushort;
aaec8192
NC
1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1378 needs_swap);
08ca2aa3 1379 if (!checksum)
6e449a3a 1380 mPUSHu(aushort);
73cb7263
NC
1381 else if (checksum > bits_in_uv)
1382 cdouble += (NV)aushort;
1383 else
1384 cuv += aushort;
49704364
WL
1385 }
1386 break;
1387#else
924ba076 1388 /* FALLTHROUGH */
49704364 1389#endif
a6ec74c1
JH
1390 case 'v':
1391 case 'n':
1392 case 'S':
73cb7263 1393 while (len-- > 0) {
08ca2aa3
TH
1394 U16 au16;
1395#if U16SIZE > SIZE16
1396 au16 = 0;
1397#endif
aaec8192 1398 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1399 if (datumtype == 'n')
1400 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1401 if (datumtype == 'v')
1402 au16 = vtohs(au16);
08ca2aa3 1403 if (!checksum)
6e449a3a 1404 mPUSHu(au16);
73cb7263 1405 else if (checksum > bits_in_uv)
f337b084 1406 cdouble += (NV) au16;
73cb7263
NC
1407 else
1408 cuv += au16;
a6ec74c1
JH
1409 }
1410 break;
068bd2e7
MHM
1411 case 'v' | TYPE_IS_SHRIEKING:
1412 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1413 while (len-- > 0) {
08ca2aa3
TH
1414 I16 ai16;
1415# if U16SIZE > SIZE16
1416 ai16 = 0;
1417# endif
aaec8192 1418 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1419 /* There should never be any byte-swapping here. */
1420 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1421 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1422 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1423 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1424 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1425 if (!checksum)
6e449a3a 1426 mPUSHi(ai16);
73cb7263 1427 else if (checksum > bits_in_uv)
08ca2aa3 1428 cdouble += (NV) ai16;
73cb7263
NC
1429 else
1430 cuv += ai16;
068bd2e7
MHM
1431 }
1432 break;
a6ec74c1 1433 case 'i':
49704364 1434 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1435 while (len-- > 0) {
08ca2aa3 1436 int aint;
aaec8192 1437 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1438 if (!checksum)
6e449a3a 1439 mPUSHi(aint);
73cb7263
NC
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)aint;
1442 else
1443 cuv += aint;
a6ec74c1
JH
1444 }
1445 break;
1446 case 'I':
49704364 1447 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1448 while (len-- > 0) {
08ca2aa3 1449 unsigned int auint;
aaec8192 1450 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1451 if (!checksum)
6e449a3a 1452 mPUSHu(auint);
73cb7263
NC
1453 else if (checksum > bits_in_uv)
1454 cdouble += (NV)auint;
1455 else
1456 cuv += auint;
a6ec74c1
JH
1457 }
1458 break;
92d41999 1459 case 'j':
73cb7263 1460 while (len-- > 0) {
08ca2aa3 1461 IV aiv;
aaec8192 1462 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1463 if (!checksum)
6e449a3a 1464 mPUSHi(aiv);
73cb7263
NC
1465 else if (checksum > bits_in_uv)
1466 cdouble += (NV)aiv;
1467 else
1468 cuv += aiv;
92d41999
JH
1469 }
1470 break;
1471 case 'J':
73cb7263 1472 while (len-- > 0) {
08ca2aa3 1473 UV auv;
aaec8192 1474 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1475 if (!checksum)
6e449a3a 1476 mPUSHu(auv);
73cb7263
NC
1477 else if (checksum > bits_in_uv)
1478 cdouble += (NV)auv;
1479 else
1480 cuv += auv;
92d41999
JH
1481 }
1482 break;
49704364
WL
1483 case 'l' | TYPE_IS_SHRIEKING:
1484#if LONGSIZE != SIZE32
73cb7263 1485 while (len-- > 0) {
08ca2aa3 1486 long along;
aaec8192 1487 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1488 if (!checksum)
6e449a3a 1489 mPUSHi(along);
73cb7263
NC
1490 else if (checksum > bits_in_uv)
1491 cdouble += (NV)along;
1492 else
1493 cuv += along;
49704364
WL
1494 }
1495 break;
1496#else
924ba076 1497 /* FALLTHROUGH */
a6ec74c1 1498#endif
49704364 1499 case 'l':
73cb7263 1500 while (len-- > 0) {
08ca2aa3
TH
1501 I32 ai32;
1502#if U32SIZE > SIZE32
1503 ai32 = 0;
1504#endif
aaec8192 1505 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1506#if U32SIZE > SIZE32
08ca2aa3 1507 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1508#endif
08ca2aa3 1509 if (!checksum)
6e449a3a 1510 mPUSHi(ai32);
73cb7263
NC
1511 else if (checksum > bits_in_uv)
1512 cdouble += (NV)ai32;
1513 else
1514 cuv += ai32;
a6ec74c1
JH
1515 }
1516 break;
49704364
WL
1517 case 'L' | TYPE_IS_SHRIEKING:
1518#if LONGSIZE != SIZE32
73cb7263 1519 while (len-- > 0) {
08ca2aa3 1520 unsigned long aulong;
aaec8192 1521 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1522 if (!checksum)
6e449a3a 1523 mPUSHu(aulong);
73cb7263
NC
1524 else if (checksum > bits_in_uv)
1525 cdouble += (NV)aulong;
1526 else
1527 cuv += aulong;
49704364
WL
1528 }
1529 break;
1530#else
924ba076 1531 /* FALLTHROUGH */
49704364 1532#endif
a6ec74c1
JH
1533 case 'V':
1534 case 'N':
1535 case 'L':
73cb7263 1536 while (len-- > 0) {
08ca2aa3
TH
1537 U32 au32;
1538#if U32SIZE > SIZE32
1539 au32 = 0;
1540#endif
aaec8192 1541 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1542 if (datumtype == 'N')
1543 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1544 if (datumtype == 'V')
1545 au32 = vtohl(au32);
08ca2aa3 1546 if (!checksum)
6e449a3a 1547 mPUSHu(au32);
fc241834
RGS
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV)au32;
1550 else
1551 cuv += au32;
a6ec74c1
JH
1552 }
1553 break;
068bd2e7
MHM
1554 case 'V' | TYPE_IS_SHRIEKING:
1555 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1556 while (len-- > 0) {
08ca2aa3 1557 I32 ai32;
f8e5a5db 1558#if U32SIZE > SIZE32
08ca2aa3 1559 ai32 = 0;
f8e5a5db 1560#endif
aaec8192 1561 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1562 /* There should never be any byte swapping here. */
1563 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1564 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1566 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1567 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1568 if (!checksum)
6e449a3a 1569 mPUSHi(ai32);
73cb7263
NC
1570 else if (checksum > bits_in_uv)
1571 cdouble += (NV)ai32;
1572 else
1573 cuv += ai32;
068bd2e7
MHM
1574 }
1575 break;
a6ec74c1 1576 case 'p':
a6ec74c1 1577 while (len-- > 0) {
f7fe979e 1578 const char *aptr;
aaec8192 1579 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1580 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1581 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1582 }
1583 break;
1584 case 'w':
a6ec74c1
JH
1585 {
1586 UV auv = 0;
1587 U32 bytes = 0;
fc241834 1588
08ca2aa3
TH
1589 while (len > 0 && s < strend) {
1590 U8 ch;
f337b084 1591 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1592 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1593 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1594 if (ch < 0x80) {
a6ec74c1 1595 bytes = 0;
6e449a3a 1596 mPUSHu(auv);
a6ec74c1
JH
1597 len--;
1598 auv = 0;
08ca2aa3 1599 continue;
a6ec74c1 1600 }
08ca2aa3 1601 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1602 const char *t;
a6ec74c1 1603
f5992bc4 1604 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1605 while (s < strend) {
f337b084 1606 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1607 sv = mul128(sv, (U8)(ch & 0x7f));
1608 if (!(ch & 0x80)) {
a6ec74c1
JH
1609 bytes = 0;
1610 break;
1611 }
1612 }
10516c54 1613 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1614 while (*t == '0')
1615 t++;
1616 sv_chop(sv, t);
6e449a3a 1617 mPUSHs(sv);
a6ec74c1
JH
1618 len--;
1619 auv = 0;
1620 }
1621 }
1622 if ((s >= strend) && bytes)
49704364 1623 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1624 }
1625 break;
1626 case 'P':
49704364
WL
1627 if (symptr->howlen == e_star)
1628 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1629 EXTEND(SP, 1);
2d3e0934 1630 if (s + sizeof(char*) <= strend) {
08ca2aa3 1631 char *aptr;
aaec8192 1632 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1633 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1634 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1635 }
a6ec74c1 1636 break;
c174bf3b 1637#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1638 case 'q':
73cb7263 1639 while (len-- > 0) {
08ca2aa3 1640 Quad_t aquad;
aaec8192 1641 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1642 if (!checksum)
c174bf3b 1643 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1644 else if (checksum > bits_in_uv)
1645 cdouble += (NV)aquad;
1646 else
1647 cuv += aquad;
1648 }
a6ec74c1
JH
1649 break;
1650 case 'Q':
73cb7263 1651 while (len-- > 0) {
08ca2aa3 1652 Uquad_t auquad;
aaec8192 1653 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1654 if (!checksum)
c174bf3b 1655 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1656 else if (checksum > bits_in_uv)
1657 cdouble += (NV)auquad;
1658 else
1659 cuv += auquad;
a6ec74c1
JH
1660 }
1661 break;
1640b983 1662#endif
a6ec74c1
JH
1663 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1664 case 'f':
73cb7263 1665 while (len-- > 0) {
08ca2aa3 1666 float afloat;
aaec8192 1667 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1668 if (!checksum)
6e449a3a 1669 mPUSHn(afloat);
08ca2aa3 1670 else
73cb7263 1671 cdouble += afloat;
fc241834 1672 }
a6ec74c1
JH
1673 break;
1674 case 'd':
73cb7263 1675 while (len-- > 0) {
08ca2aa3 1676 double adouble;
aaec8192 1677 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1678 if (!checksum)
6e449a3a 1679 mPUSHn(adouble);
08ca2aa3 1680 else
73cb7263 1681 cdouble += adouble;
fc241834 1682 }
a6ec74c1 1683 break;
92d41999 1684 case 'F':
73cb7263 1685 while (len-- > 0) {
275663fa 1686 NV_bytes anv;
aaec8192
NC
1687 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1688 datumtype, needs_swap);
08ca2aa3 1689 if (!checksum)
275663fa 1690 mPUSHn(anv.nv);
08ca2aa3 1691 else
275663fa 1692 cdouble += anv.nv;
fc241834 1693 }
92d41999
JH
1694 break;
1695#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1696 case 'D':
73cb7263 1697 while (len-- > 0) {
275663fa 1698 ld_bytes aldouble;
aaec8192
NC
1699 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1700 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1701 /* The most common long double format, the x86 80-bit
1702 * extended precision, has either 2 or 6 unused bytes,
1703 * which may contain garbage, which may contain
1704 * unintentional data. While we do zero the bytes of
1705 * the long double data in pack(), here in unpack() we
1706 * don't, because it's really hard to envision that
1707 * reading the long double off aldouble would be
e075ae47 1708 * affected by the unused bytes.
42262fd3
JH
1709 *
1710 * Note that trying to unpack 'long doubles' of 'long
1711 * doubles' packed in another system is in the general
1712 * case doomed without having more detail. */
08ca2aa3 1713 if (!checksum)
275663fa 1714 mPUSHn(aldouble.ld);
08ca2aa3 1715 else
275663fa 1716 cdouble += aldouble.ld;
92d41999
JH
1717 }
1718 break;
1719#endif
a6ec74c1 1720 case 'u':
858fe5e1 1721 if (!checksum) {
f7fe979e 1722 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1723 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1724 if (l) SvPOK_on(sv);
1725 }
1726 if (utf8) {
1727 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1728 I32 a, b, c, d;
db187877 1729 char hunk[3];
08ca2aa3 1730
08ca2aa3
TH
1731 while (len > 0) {
1732 next_uni_uu(aTHX_ &s, strend, &a);
1733 next_uni_uu(aTHX_ &s, strend, &b);
1734 next_uni_uu(aTHX_ &s, strend, &c);
1735 next_uni_uu(aTHX_ &s, strend, &d);
1736 hunk[0] = (char)((a << 2) | (b >> 4));
1737 hunk[1] = (char)((b << 4) | (c >> 2));
1738 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1739 if (!checksum)
1740 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1741 len -= 3;
1742 }
1743 if (s < strend) {
f7fe979e
AL
1744 if (*s == '\n') {
1745 s++;
1746 }
08ca2aa3
TH
1747 else {
1748 /* possible checksum byte */
f7fe979e
AL
1749 const char *skip = s+UTF8SKIP(s);
1750 if (skip < strend && *skip == '\n')
1751 s = skip+1;
08ca2aa3
TH
1752 }
1753 }
1754 }
1755 } else {
fc241834
RGS
1756 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1757 I32 a, b, c, d;
db187877 1758 char hunk[3];
a6ec74c1 1759
fc241834
RGS
1760 len = PL_uudmap[*(U8*)s++] & 077;
1761 while (len > 0) {
1762 if (s < strend && ISUUCHAR(*s))
1763 a = PL_uudmap[*(U8*)s++] & 077;
1764 else
1765 a = 0;
1766 if (s < strend && ISUUCHAR(*s))
1767 b = PL_uudmap[*(U8*)s++] & 077;
1768 else
1769 b = 0;
1770 if (s < strend && ISUUCHAR(*s))
1771 c = PL_uudmap[*(U8*)s++] & 077;
1772 else
1773 c = 0;
1774 if (s < strend && ISUUCHAR(*s))
1775 d = PL_uudmap[*(U8*)s++] & 077;
1776 else
1777 d = 0;
1778 hunk[0] = (char)((a << 2) | (b >> 4));
1779 hunk[1] = (char)((b << 4) | (c >> 2));
1780 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1781 if (!checksum)
1782 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1783 len -= 3;
1784 }
1785 if (*s == '\n')
1786 s++;
1787 else /* possible checksum byte */
1788 if (s + 1 < strend && s[1] == '\n')
1789 s += 2;
a6ec74c1 1790 }
08ca2aa3 1791 }
858fe5e1
TC
1792 if (!checksum)
1793 XPUSHs(sv);
a6ec74c1
JH
1794 break;
1795 }
49704364 1796
a6ec74c1 1797 if (checksum) {
1109a392 1798 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1799 (checksum > bits_in_uv &&
08ca2aa3
TH
1800 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1801 NV trouble, anv;
a6ec74c1 1802
08ca2aa3 1803 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1804 while (checksum >= 16) {
1805 checksum -= 16;
08ca2aa3 1806 anv *= 65536.0;
a6ec74c1 1807 }
a6ec74c1 1808 while (cdouble < 0.0)
08ca2aa3
TH
1809 cdouble += anv;
1810 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1811 sv = newSVnv(cdouble);
a6ec74c1
JH
1812 }
1813 else {
fa8ec7c1
NC
1814 if (checksum < bits_in_uv) {
1815 UV mask = ((UV)1 << checksum) - 1;
92d41999 1816 cuv &= mask;
a6ec74c1 1817 }
c4c5f44a 1818 sv = newSVuv(cuv);
a6ec74c1 1819 }
6e449a3a 1820 mXPUSHs(sv);
a6ec74c1
JH
1821 checksum = 0;
1822 }
fc241834 1823
49704364
WL
1824 if (symptr->flags & FLAG_SLASH){
1825 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1826 break;
49704364
WL
1827 if( next_symbol(symptr) ){
1828 if( symptr->howlen == e_number )
1829 Perl_croak(aTHX_ "Count after length/code in unpack" );
1830 if( beyond ){
1831 /* ...end of char buffer then no decent length available */
1832 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1833 } else {
1834 /* take top of stack (hope it's numeric) */
1835 len = POPi;
1836 if( len < 0 )
1837 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1838 }
1839 } else {
1840 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1841 }
1842 datumtype = symptr->code;
21c16052 1843 explicit_length = FALSE;
49704364
WL
1844 goto redo_switch;
1845 }
a6ec74c1 1846 }
49704364 1847
18529408
IZ
1848 if (new_s)
1849 *new_s = s;
1850 PUTBACK;
1851 return SP - PL_stack_base - start_sp_offset;
1852}
1853
1854PP(pp_unpack)
1855{
1856 dSP;
bab9c0ac 1857 dPOPPOPssrl;
18529408
IZ
1858 I32 gimme = GIMME_V;
1859 STRLEN llen;
1860 STRLEN rlen;
5c144d81
NC
1861 const char *pat = SvPV_const(left, llen);
1862 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1863 const char *strend = s + rlen;
1864 const char *patend = pat + llen;
08ca2aa3 1865 I32 cnt;
18529408
IZ
1866
1867 PUTBACK;
7accc089 1868 cnt = unpackstring(pat, patend, s, strend,
49704364 1869 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1870 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1871
18529408
IZ
1872 SPAGAIN;
1873 if ( !cnt && gimme == G_SCALAR )
1874 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1875 RETURN;
1876}
1877
f337b084 1878STATIC U8 *
f7fe979e 1879doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1880{
f337b084 1881 *h++ = PL_uuemap[len];
a6ec74c1 1882 while (len > 2) {
f337b084
TH
1883 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1884 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1885 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1886 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1887 s += 3;
1888 len -= 3;
1889 }
1890 if (len > 0) {
f7fe979e 1891 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1892 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1893 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1894 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1895 *h++ = PL_uuemap[0];
a6ec74c1 1896 }
f337b084
TH
1897 *h++ = '\n';
1898 return h;
a6ec74c1
JH
1899}
1900
1901STATIC SV *
f7fe979e 1902S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1903{
8b6e33c7
AL
1904 SV *result = newSVpvn(s, l);
1905 char *const result_c = SvPV_nolen(result); /* convenience */
1906 char *out = result_c;
1907 bool skip = 1;
1908 bool ignore = 0;
a6ec74c1 1909
7918f24d
NC
1910 PERL_ARGS_ASSERT_IS_AN_INT;
1911
a6ec74c1
JH
1912 while (*s) {
1913 switch (*s) {
1914 case ' ':
1915 break;
1916 case '+':
1917 if (!skip) {
1918 SvREFCNT_dec(result);
1919 return (NULL);
1920 }
1921 break;
1922 case '0':
1923 case '1':
1924 case '2':
1925 case '3':
1926 case '4':
1927 case '5':
1928 case '6':
1929 case '7':
1930 case '8':
1931 case '9':
1932 skip = 0;
1933 if (!ignore) {
1934 *(out++) = *s;
1935 }
1936 break;
1937 case '.':
1938 ignore = 1;
1939 break;
1940 default:
1941 SvREFCNT_dec(result);
1942 return (NULL);
1943 }
1944 s++;
1945 }
1946 *(out++) = '\0';
1947 SvCUR_set(result, out - result_c);
1948 return (result);
1949}
1950
1951/* pnum must be '\0' terminated */
1952STATIC int
1953S_div128(pTHX_ SV *pnum, bool *done)
1954{
8b6e33c7
AL
1955 STRLEN len;
1956 char * const s = SvPV(pnum, len);
1957 char *t = s;
1958 int m = 0;
1959
7918f24d
NC
1960 PERL_ARGS_ASSERT_DIV128;
1961
8b6e33c7
AL
1962 *done = 1;
1963 while (*t) {
1964 const int i = m * 10 + (*t - '0');
1965 const int r = (i >> 7); /* r < 10 */
1966 m = i & 0x7F;
1967 if (r) {
1968 *done = 0;
1969 }
1970 *(t++) = '0' + r;
a6ec74c1 1971 }
8b6e33c7
AL
1972 *(t++) = '\0';
1973 SvCUR_set(pnum, (STRLEN) (t - s));
1974 return (m);
a6ec74c1
JH
1975}
1976
18529408 1977/*
7accc089
JH
1978=for apidoc packlist
1979
1980The engine implementing pack() Perl function.
1981
bfce84ec
AL
1982=cut
1983*/
7accc089
JH
1984
1985void
5aaab254 1986Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1987{
aadb217d
JH
1988 tempsym_t sym;
1989
7918f24d
NC
1990 PERL_ARGS_ASSERT_PACKLIST;
1991
f7fe979e 1992 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1993
f337b084
TH
1994 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1995 Also make sure any UTF8 flag is loaded */
56eb0262 1996 SvPV_force_nolen(cat);
bfce84ec
AL
1997 if (DO_UTF8(cat))
1998 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1999
49704364
WL
2000 (void)pack_rec( cat, &sym, beglist, endlist );
2001}
2002
f337b084
TH
2003/* like sv_utf8_upgrade, but also repoint the group start markers */
2004STATIC void
2005marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2006 STRLEN len;
2007 tempsym_t *group;
f7fe979e
AL
2008 const char *from_ptr, *from_start, *from_end, **marks, **m;
2009 char *to_start, *to_ptr;
f337b084
TH
2010
2011 if (SvUTF8(sv)) return;
2012
aa07b2f6 2013 from_start = SvPVX_const(sv);
f337b084
TH
2014 from_end = from_start + SvCUR(sv);
2015 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2016 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2017 if (from_ptr == from_end) {
2018 /* Simple case: no character needs to be changed */
2019 SvUTF8_on(sv);
2020 return;
2021 }
2022
3473cf63 2023 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2024 Newx(to_start, len, char);
f337b084
TH
2025 Copy(from_start, to_start, from_ptr-from_start, char);
2026 to_ptr = to_start + (from_ptr-from_start);
2027
a02a5408 2028 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2029 for (group=sym_ptr; group; group = group->previous)
2030 marks[group->level] = from_start + group->strbeg;
2031 marks[sym_ptr->level+1] = from_end+1;
2032 for (m = marks; *m < from_ptr; m++)
2033 *m = to_start + (*m-from_start);
2034
2035 for (;from_ptr < from_end; from_ptr++) {
2036 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2037 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2038 }
2039 *to_ptr = 0;
2040
2041 while (*m == from_ptr) *m++ = to_ptr;
2042 if (m != marks + sym_ptr->level+1) {
2043 Safefree(marks);
2044 Safefree(to_start);
5637ef5b
NC
2045 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2046 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2047 }
2048 for (group=sym_ptr; group; group = group->previous)
2049 group->strbeg = marks[group->level] - to_start;
2050 Safefree(marks);
2051
2052 if (SvOOK(sv)) {
2053 if (SvIVX(sv)) {
b162af07 2054 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2055 from_start -= SvIVX(sv);
2056 SvIV_set(sv, 0);
2057 }
2058 SvFLAGS(sv) &= ~SVf_OOK;
2059 }
2060 if (SvLEN(sv) != 0)
2061 Safefree(from_start);
f880fe2f 2062 SvPV_set(sv, to_start);
b162af07
SP
2063 SvCUR_set(sv, to_ptr - to_start);
2064 SvLEN_set(sv, len);
f337b084
TH
2065 SvUTF8_on(sv);
2066}
2067
2068/* Exponential string grower. Makes string extension effectively O(n)
2069 needed says how many extra bytes we need (not counting the final '\0')
2070 Only grows the string if there is an actual lack of space
2071*/
2072STATIC char *
0bd48802 2073S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2074 const STRLEN cur = SvCUR(sv);
2075 const STRLEN len = SvLEN(sv);
f337b084 2076 STRLEN extend;
7918f24d
NC
2077
2078 PERL_ARGS_ASSERT_SV_EXP_GROW;
2079
f337b084
TH
2080 if (len - cur > needed) return SvPVX(sv);
2081 extend = needed > len ? needed : len;
2082 return SvGROW(sv, len+extend+1);
2083}
49704364 2084
354b74ae 2085static void
b197e565 2086S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2087{
2088 SvGETMAGIC(sv);
99f450cc 2089 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2090 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2091 const NV nv = SvNV_nomg(sv);
2092 if (c == 'w')
2093 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2094 else
2095 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2096 }
2097}
2098
b197e565
FC
2099#define SvIV_no_inf(sv,d) (S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2100#define SvUV_no_inf(sv,d) (S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2101
49704364
WL
2102STATIC
2103SV **
f337b084 2104S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2105{
49704364 2106 tempsym_t lookahead;
f337b084
TH
2107 I32 items = endlist - beglist;
2108 bool found = next_symbol(symptr);
2109 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2110 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2111 char* from;
f337b084 2112
7918f24d
NC
2113 PERL_ARGS_ASSERT_PACK_REC;
2114
f337b084
TH
2115 if (symptr->level == 0 && found && symptr->code == 'U') {
2116 marked_upgrade(aTHX_ cat, symptr);
2117 symptr->flags |= FLAG_DO_UTF8;
2118 utf8 = 0;
49704364 2119 }
f337b084 2120 symptr->strbeg = SvCUR(cat);
49704364
WL
2121
2122 while (found) {
f337b084
TH
2123 SV *fromstr;
2124 STRLEN fromlen;
2125 I32 len;
a0714e2c 2126 SV *lengthcode = NULL;
49704364 2127 I32 datumtype = symptr->code;
f337b084
TH
2128 howlen_t howlen = symptr->howlen;
2129 char *start = SvPVX(cat);
2130 char *cur = start + SvCUR(cat);
a1219b5e 2131 bool needs_swap;
49704364 2132
f337b084 2133#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
0c7df902 2134#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2135
2136 switch (howlen) {
fc241834 2137 case e_star:
f337b084
TH
2138 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2139 0 : items;
2140 break;
2141 default:
2142 /* e_no_len and e_number */
2143 len = symptr->length;
49704364
WL
2144 break;
2145 }
2146
f337b084 2147 if (len) {
a7a3cfaa 2148 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2149
a7a3cfaa
TH
2150 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2151 /* We can process this letter. */
2152 STRLEN size = props & PACK_SIZE_MASK;
2153 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2154 }
f337b084
TH
2155 }
2156
49704364
WL
2157 /* Look ahead for next symbol. Do we have code/code? */
2158 lookahead = *symptr;
2159 found = next_symbol(&lookahead);
246f24af
TH
2160 if (symptr->flags & FLAG_SLASH) {
2161 IV count;
f337b084 2162 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2163 if (strchr("aAZ", lookahead.code)) {
2164 if (lookahead.howlen == e_number) count = lookahead.length;
2165 else {
ce399ba6 2166 if (items > 0) {
48a5da33 2167 count = sv_len_utf8(*beglist);
ce399ba6 2168 }
246f24af
TH
2169 else count = 0;
2170 if (lookahead.code == 'Z') count++;
2171 }
2172 } else {
2173 if (lookahead.howlen == e_number && lookahead.length < items)
2174 count = lookahead.length;
2175 else count = items;
2176 }
2177 lookahead.howlen = e_number;
2178 lookahead.length = count;
2179 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2180 }
49704364 2181
a1219b5e
NC
2182 needs_swap = NEEDS_SWAP(datumtype);
2183
fc241834
RGS
2184 /* Code inside the switch must take care to properly update
2185 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2186 doesn't simply leave using break */
0c7df902 2187 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2188 default:
f337b084
TH
2189 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2190 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2191 case '%':
49704364 2192 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2193
28be1210 2194 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2195 case '.':
2196 if (howlen == e_star) from = start;
2197 else if (len == 0) from = cur;
2198 else {
2199 tempsym_t *group = symptr;
2200
2201 while (--len && group) group = group->previous;
2202 from = group ? start + group->strbeg : start;
2203 }
2204 fromstr = NEXTFROM;
354b74ae 2205 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2206 goto resize;
28be1210 2207 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2208 case '@':
28be1210
TH
2209 from = start + symptr->strbeg;
2210 resize:
28be1210 2211 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2212 if (len >= 0) {
2213 while (len && from < cur) {
2214 from += UTF8SKIP(from);
2215 len--;
2216 }
2217 if (from > cur)
2218 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2219 if (len) {
2220 /* Here we know from == cur */
2221 grow:
2222 GROWING(0, cat, start, cur, len);
2223 Zero(cur, len, char);
2224 cur += len;
2225 } else if (from < cur) {
2226 len = cur - from;
2227 goto shrink;
2228 } else goto no_change;
2229 } else {
2230 cur = from;
2231 len = -len;
2232 goto utf8_shrink;
f337b084 2233 }
28be1210
TH
2234 else {
2235 len -= cur - from;
f337b084 2236 if (len > 0) goto grow;
28be1210 2237 if (len == 0) goto no_change;
fc241834 2238 len = -len;
28be1210 2239 goto shrink;
f337b084 2240 }
a6ec74c1 2241 break;
81d52ecd 2242
fc241834 2243 case '(': {
49704364 2244 tempsym_t savsym = *symptr;
66c611c5
MHM
2245 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2246 symptr->flags |= group_modifiers;
49704364
WL
2247 symptr->patend = savsym.grpend;
2248 symptr->level++;
f337b084 2249 symptr->previous = &lookahead;
18529408 2250 while (len--) {
f337b084
TH
2251 U32 was_utf8;
2252 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2253 else symptr->flags &= ~FLAG_PARSE_UTF8;
2254 was_utf8 = SvUTF8(cat);
49704364 2255 symptr->patptr = savsym.grpbeg;
f337b084
TH
2256 beglist = pack_rec(cat, symptr, beglist, endlist);
2257 if (SvUTF8(cat) != was_utf8)
2258 /* This had better be an upgrade while in utf8==0 mode */
2259 utf8 = 1;
2260
49704364 2261 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2262 break; /* No way to continue */
2263 }
ee790063 2264 items = endlist - beglist;
f337b084
TH
2265 lookahead.flags = symptr->flags & ~group_modifiers;
2266 goto no_change;
18529408 2267 }
62f95557
IZ
2268 case 'X' | TYPE_IS_SHRIEKING:
2269 if (!len) /* Avoid division by 0 */
2270 len = 1;
f337b084
TH
2271 if (utf8) {
2272 char *hop, *last;
2273 I32 l = len;
2274 hop = last = start;
2275 while (hop < cur) {
2276 hop += UTF8SKIP(hop);
2277 if (--l == 0) {
2278 last = hop;
2279 l = len;
2280 }
2281 }
2282 if (last > cur)
2283 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2284 cur = last;
2285 break;
2286 }
2287 len = (cur-start) % len;
924ba076 2288 /* FALLTHROUGH */
a6ec74c1 2289 case 'X':
f337b084
TH
2290 if (utf8) {
2291 if (len < 1) goto no_change;
28be1210 2292 utf8_shrink:
f337b084
TH
2293 while (len > 0) {
2294 if (cur <= start)
28be1210
TH
2295 Perl_croak(aTHX_ "'%c' outside of string in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2297 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2298 if (cur <= start)
28be1210
TH
2299 Perl_croak(aTHX_ "'%c' outside of string in pack",
2300 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2301 }
2302 len--;
2303 }
2304 } else {
fc241834 2305 shrink:
f337b084 2306 if (cur - start < len)
28be1210
TH
2307 Perl_croak(aTHX_ "'%c' outside of string in pack",
2308 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2309 cur -= len;
2310 }
2311 if (cur < start+symptr->strbeg) {
2312 /* Make sure group starts don't point into the void */
2313 tempsym_t *group;
9e27e96a 2314 const STRLEN length = cur-start;
f337b084
TH
2315 for (group = symptr;
2316 group && length < group->strbeg;
2317 group = group->previous) group->strbeg = length;
2318 lookahead.strbeg = length;
2319 }
a6ec74c1 2320 break;
fc241834
RGS
2321 case 'x' | TYPE_IS_SHRIEKING: {
2322 I32 ai32;
62f95557
IZ
2323 if (!len) /* Avoid division by 0 */
2324 len = 1;
230e1fce 2325 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2326 else ai32 = (cur - start) % len;
2327 if (ai32 == 0) goto no_change;
2328 len -= ai32;
2329 }
924ba076 2330 /* FALLTHROUGH */
a6ec74c1 2331 case 'x':
f337b084 2332 goto grow;
a6ec74c1
JH
2333 case 'A':
2334 case 'Z':
f337b084 2335 case 'a': {
f7fe979e 2336 const char *aptr;
f337b084 2337
a6ec74c1 2338 fromstr = NEXTFROM;
e62f0680 2339 aptr = SvPV_const(fromstr, fromlen);
f337b084 2340 if (DO_UTF8(fromstr)) {
f7fe979e 2341 const char *end, *s;
f337b084
TH
2342
2343 if (!utf8 && !SvUTF8(cat)) {
2344 marked_upgrade(aTHX_ cat, symptr);
2345 lookahead.flags |= FLAG_DO_UTF8;
2346 lookahead.strbeg = symptr->strbeg;
2347 utf8 = 1;
2348 start = SvPVX(cat);
2349 cur = start + SvCUR(cat);
2350 }
fc241834 2351 if (howlen == e_star) {
f337b084
TH
2352 if (utf8) goto string_copy;
2353 len = fromlen+1;
2354 }
2355 s = aptr;
2356 end = aptr + fromlen;
2357 fromlen = datumtype == 'Z' ? len-1 : len;
2358 while ((I32) fromlen > 0 && s < end) {
2359 s += UTF8SKIP(s);
2360 fromlen--;
2361 }
2362 if (s > end)
2363 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2364 if (utf8) {
fc241834 2365 len = fromlen;
f337b084
TH
2366 if (datumtype == 'Z') len++;
2367 fromlen = s-aptr;
2368 len += fromlen;
fc241834 2369
f337b084 2370 goto string_copy;
fc241834 2371 }
f337b084
TH
2372 fromlen = len - fromlen;
2373 if (datumtype == 'Z') fromlen--;
2374 if (howlen == e_star) {
2375 len = fromlen;
2376 if (datumtype == 'Z') len++;
fc241834 2377 }
f337b084 2378 GROWING(0, cat, start, cur, len);
fc241834 2379 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2380 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2381 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2382 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2383 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2384 cur += fromlen;
a6ec74c1 2385 len -= fromlen;
f337b084
TH
2386 } else if (utf8) {
2387 if (howlen == e_star) {
2388 len = fromlen;
2389 if (datumtype == 'Z') len++;
a6ec74c1 2390 }
f337b084
TH
2391 if (len <= (I32) fromlen) {
2392 fromlen = len;
2393 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2394 }
fc241834 2395 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2396 upgrade, so:
2397 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2398 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2399 len -= fromlen;
2400 while (fromlen > 0) {
230e1fce 2401 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2402 aptr++;
2403 fromlen--;
fc241834 2404 }
f337b084
TH
2405 } else {
2406 string_copy:
2407 if (howlen == e_star) {
2408 len = fromlen;
2409 if (datumtype == 'Z') len++;
2410 }
2411 if (len <= (I32) fromlen) {
2412 fromlen = len;
2413 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2414 }
f337b084
TH
2415 GROWING(0, cat, start, cur, len);
2416 Copy(aptr, cur, fromlen, char);
2417 cur += fromlen;
2418 len -= fromlen;
a6ec74c1 2419 }
f337b084
TH
2420 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2421 cur += len;
3c4fb04a 2422 SvTAINT(cat);
a6ec74c1 2423 break;
f337b084 2424 }
a6ec74c1 2425 case 'B':
f337b084 2426 case 'b': {
b83604b4 2427 const char *str, *end;
f337b084
TH
2428 I32 l, field_len;
2429 U8 bits;
2430 bool utf8_source;
2431 U32 utf8_flags;
a6ec74c1 2432
fc241834 2433 fromstr = NEXTFROM;
b83604b4 2434 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2435 end = str + fromlen;
2436 if (DO_UTF8(fromstr)) {
2437 utf8_source = TRUE;
041457d9 2438 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2439 } else {
2440 utf8_source = FALSE;
2441 utf8_flags = 0; /* Unused, but keep compilers happy */
2442 }
2443 if (howlen == e_star) len = fromlen;
2444 field_len = (len+7)/8;
2445 GROWING(utf8, cat, start, cur, field_len);
2446 if (len > (I32)fromlen) len = fromlen;
2447 bits = 0;
2448 l = 0;
2449 if (datumtype == 'B')
2450 while (l++ < len) {
2451 if (utf8_source) {
95b63a38 2452 UV val = 0;
f337b084
TH
2453 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2454 bits |= val & 1;
2455 } else bits |= *str++ & 1;
2456 if (l & 7) bits <<= 1;
fc241834 2457 else {
f337b084
TH
2458 PUSH_BYTE(utf8, cur, bits);
2459 bits = 0;
a6ec74c1
JH
2460 }
2461 }
f337b084
TH
2462 else
2463 /* datumtype == 'b' */
2464 while (l++ < len) {
2465 if (utf8_source) {
95b63a38 2466 UV val = 0;
f337b084
TH
2467 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2468 if (val & 1) bits |= 0x80;
2469 } else if (*str++ & 1)
2470 bits |= 0x80;
2471 if (l & 7) bits >>= 1;
fc241834 2472 else {
f337b084
TH
2473 PUSH_BYTE(utf8, cur, bits);
2474 bits = 0;
a6ec74c1
JH
2475 }
2476 }
f337b084
TH
2477 l--;
2478 if (l & 7) {
fc241834 2479 if (datumtype == 'B')
f337b084 2480 bits <<= 7 - (l & 7);
fc241834 2481 else
f337b084
TH
2482 bits >>= 7 - (l & 7);
2483 PUSH_BYTE(utf8, cur, bits);
2484 l += 7;
a6ec74c1 2485 }
f337b084
TH
2486 /* Determine how many chars are left in the requested field */
2487 l /= 8;
2488 if (howlen == e_star) field_len = 0;
2489 else field_len -= l;
2490 Zero(cur, field_len, char);
2491 cur += field_len;
a6ec74c1 2492 break;
f337b084 2493 }
a6ec74c1 2494 case 'H':
f337b084 2495 case 'h': {
10516c54 2496 const char *str, *end;
f337b084
TH
2497 I32 l, field_len;
2498 U8 bits;
2499 bool utf8_source;
2500 U32 utf8_flags;
a6ec74c1 2501
fc241834 2502 fromstr = NEXTFROM;
10516c54 2503 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2504 end = str + fromlen;
2505 if (DO_UTF8(fromstr)) {
2506 utf8_source = TRUE;
041457d9 2507 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2508 } else {
2509 utf8_source = FALSE;
2510 utf8_flags = 0; /* Unused, but keep compilers happy */
2511 }
2512 if (howlen == e_star) len = fromlen;
2513 field_len = (len+1)/2;
2514 GROWING(utf8, cat, start, cur, field_len);
2515 if (!utf8 && len > (I32)fromlen) len = fromlen;
2516 bits = 0;
2517 l = 0;
2518 if (datumtype == 'H')
2519 while (l++ < len) {
2520 if (utf8_source) {
95b63a38 2521 UV val = 0;
f337b084
TH
2522 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2523 if (val < 256 && isALPHA(val))
2524 bits |= (val + 9) & 0xf;
a6ec74c1 2525 else
f337b084
TH
2526 bits |= val & 0xf;
2527 } else if (isALPHA(*str))
2528 bits |= (*str++ + 9) & 0xf;
2529 else
2530 bits |= *str++ & 0xf;
2531 if (l & 1) bits <<= 4;
fc241834 2532 else {
f337b084
TH
2533 PUSH_BYTE(utf8, cur, bits);
2534 bits = 0;
a6ec74c1
JH
2535 }
2536 }
f337b084
TH
2537 else
2538 while (l++ < len) {
2539 if (utf8_source) {
95b63a38 2540 UV val = 0;
f337b084
TH
2541 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2542 if (val < 256 && isALPHA(val))
2543 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2544 else
f337b084
TH
2545 bits |= (val & 0xf) << 4;
2546 } else if (isALPHA(*str))
2547 bits |= ((*str++ + 9) & 0xf) << 4;
2548 else
2549 bits |= (*str++ & 0xf) << 4;
2550 if (l & 1) bits >>= 4;
fc241834 2551 else {
f337b084
TH
2552 PUSH_BYTE(utf8, cur, bits);
2553 bits = 0;
a6ec74c1 2554 }
fc241834 2555 }
f337b084
TH
2556 l--;
2557 if (l & 1) {
2558 PUSH_BYTE(utf8, cur, bits);
2559 l++;
2560 }
2561 /* Determine how many chars are left in the requested field */
2562 l /= 2;
2563 if (howlen == e_star) field_len = 0;
2564 else field_len -= l;
2565 Zero(cur, field_len, char);
2566 cur += field_len;
2567 break;
fc241834
RGS
2568 }
2569 case 'c':
f337b084
TH
2570 while (len-- > 0) {
2571 IV aiv;
2572 fromstr = NEXTFROM;
354b74ae 2573 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2574 if ((-128 > aiv || aiv > 127))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'c' format wrapped in pack");
585ec06d 2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2578 }
2579 break;
2580 case 'C':
f337b084
TH
2581 if (len == 0) {
2582 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2583 break;
2584 }
a6ec74c1 2585 while (len-- > 0) {
f337b084 2586 IV aiv;
a6ec74c1 2587 fromstr = NEXTFROM;
354b74ae 2588 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2589 if ((0 > aiv || aiv > 0xff))
2590 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2591 "Character in 'C' format wrapped in pack");
1651fc44 2592 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2593 }
fc241834
RGS
2594 break;
2595 case 'W': {
2596 char *end;
670f1322 2597 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2598
2599 end = start+SvLEN(cat)-1;
2600 if (utf8) end -= UTF8_MAXLEN-1;
2601 while (len-- > 0) {
2602 UV auv;
2603 fromstr = NEXTFROM;
354b74ae 2604 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2605 if (in_bytes) auv = auv % 0x100;
2606 if (utf8) {
2607 W_utf8:
2608 if (cur > end) {
2609 *cur = '\0';
b162af07 2610 SvCUR_set(cat, cur - start);
fc241834
RGS
2611
2612 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2613 end = start+SvLEN(cat)-UTF8_MAXLEN;
2614 }
c80e42f3
KW
2615 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2616 auv,
041457d9 2617 warn_utf8 ?
230e1fce 2618 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2619 } else {
2620 if (auv >= 0x100) {
2621 if (!SvUTF8(cat)) {
2622 *cur = '\0';
b162af07 2623 SvCUR_set(cat, cur - start);
fc241834
RGS
2624 marked_upgrade(aTHX_ cat, symptr);
2625 lookahead.flags |= FLAG_DO_UTF8;
2626 lookahead.strbeg = symptr->strbeg;
2627 utf8 = 1;
2628 start = SvPVX(cat);
2629 cur = start + SvCUR(cat);
2630 end = start+SvLEN(cat)-UTF8_MAXLEN;
2631 goto W_utf8;
2632 }
a2a5de95
NC
2633 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2634 "Character in 'W' format wrapped in pack");
fc241834
RGS
2635 auv &= 0xff;
2636 }
2637 if (cur >= end) {
2638 *cur = '\0';
b162af07 2639 SvCUR_set(cat, cur - start);
fc241834
RGS
2640 GROWING(0, cat, start, cur, len+1);
2641 end = start+SvLEN(cat)-1;
2642 }
fe2774ed 2643 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2644 }
2645 }
2646 break;
fc241834
RGS
2647 }
2648 case 'U': {
2649 char *end;
2650
2651 if (len == 0) {
2652 if (!(symptr->flags & FLAG_DO_UTF8)) {
2653 marked_upgrade(aTHX_ cat, symptr);
2654 lookahead.flags |= FLAG_DO_UTF8;
2655 lookahead.strbeg = symptr->strbeg;
2656 }
2657 utf8 = 0;
2658 goto no_change;
2659 }
2660
2661 end = start+SvLEN(cat);
2662 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2663 while (len-- > 0) {
fc241834 2664 UV auv;
a6ec74c1 2665 fromstr = NEXTFROM;
354b74ae 2666 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2667 if (utf8) {
230e1fce 2668 U8 buffer[UTF8_MAXLEN], *endb;
c80e42f3 2669 endb = uvchr_to_utf8_flags(buffer, auv,
041457d9 2670 warn_utf8 ?
fc241834
RGS
2671 0 : UNICODE_ALLOW_ANY);
2672 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2673 *cur = '\0';
b162af07 2674 SvCUR_set(cat, cur - start);
fc241834
RGS
2675 GROWING(0, cat, start, cur,
2676 len+(endb-buffer)*UTF8_EXPAND);
2677 end = start+SvLEN(cat);
2678 }
3a88beaa 2679 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
fc241834
RGS
2680 } else {
2681 if (cur >= end) {
2682 *cur = '\0';
b162af07 2683 SvCUR_set(cat, cur - start);
fc241834
RGS
2684 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2685 end = start+SvLEN(cat)-UTF8_MAXLEN;
2686 }
c80e42f3 2687 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
041457d9 2688 warn_utf8 ?
230e1fce 2689 0 : UNICODE_ALLOW_ANY);
fc241834 2690 }
a6ec74c1 2691 }
a6ec74c1 2692 break;
fc241834 2693 }
a6ec74c1
JH
2694 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2695 case 'f':
a6ec74c1 2696 while (len-- > 0) {
f337b084
TH
2697 float afloat;
2698 NV anv;
a6ec74c1 2699 fromstr = NEXTFROM;
f337b084 2700 anv = SvNV(fromstr);
85bba25f 2701# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2702 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2703 * on Alpha; fake it if we don't have them.
2704 */
f337b084 2705 if (anv > FLT_MAX)
fc241834 2706 afloat = FLT_MAX;
f337b084 2707 else if (anv < -FLT_MAX)
fc241834 2708 afloat = -FLT_MAX;
f337b084 2709 else afloat = (float)anv;
baf3cf9c 2710# else
f337b084 2711 afloat = (float)anv;
baf3cf9c 2712# endif
3a88beaa 2713 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2714 }
2715 break;
2716 case 'd':
a6ec74c1 2717 while (len-- > 0) {
f337b084
TH
2718 double adouble;
2719 NV anv;
a6ec74c1 2720 fromstr = NEXTFROM;
f337b084 2721 anv = SvNV(fromstr);
85bba25f 2722# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2723 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2724 * on Alpha; fake it if we don't have them.
2725 */
f337b084 2726 if (anv > DBL_MAX)
fc241834 2727 adouble = DBL_MAX;
f337b084 2728 else if (anv < -DBL_MAX)
fc241834 2729 adouble = -DBL_MAX;
f337b084 2730 else adouble = (double)anv;
baf3cf9c 2731# else
f337b084 2732 adouble = (double)anv;
baf3cf9c 2733# endif
3a88beaa 2734 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2735 }
2736 break;
fc241834 2737 case 'F': {
275663fa 2738 NV_bytes anv;
1109a392 2739 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2740 while (len-- > 0) {
2741 fromstr = NEXTFROM;
cd07c537
DM
2742#ifdef __GNUC__
2743 /* to work round a gcc/x86 bug; don't use SvNV */
2744 anv.nv = sv_2nv(fromstr);
2745#else
275663fa 2746 anv.nv = SvNV(fromstr);
cd07c537 2747#endif
3a88beaa 2748 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2749 }
2750 break;
fc241834 2751 }
92d41999 2752#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2753 case 'D': {
275663fa 2754 ld_bytes aldouble;
1109a392
MHM
2755 /* long doubles can have unused bits, which may be nonzero */
2756 Zero(&aldouble, 1, long double);
92d41999
JH
2757 while (len-- > 0) {
2758 fromstr = NEXTFROM;
cd07c537
DM
2759# ifdef __GNUC__
2760 /* to work round a gcc/x86 bug; don't use SvNV */
2761 aldouble.ld = (long double)sv_2nv(fromstr);
2762# else
275663fa 2763 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2764# endif
3a88beaa
NC
2765 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2766 needs_swap);
92d41999
JH
2767 }
2768 break;
fc241834 2769 }
92d41999 2770#endif
068bd2e7 2771 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2772 case 'n':
2773 while (len-- > 0) {
f337b084 2774 I16 ai16;
a6ec74c1 2775 fromstr = NEXTFROM;
354b74ae 2776 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2777 ai16 = PerlSock_htons(ai16);
3a88beaa 2778 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2779 }
2780 break;
068bd2e7 2781 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2782 case 'v':
2783 while (len-- > 0) {
f337b084 2784 I16 ai16;
a6ec74c1 2785 fromstr = NEXTFROM;
354b74ae 2786 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2787 ai16 = htovs(ai16);
3a88beaa 2788 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2789 }
2790 break;
49704364 2791 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2792#if SHORTSIZE != SIZE16
fc241834 2793 while (len-- > 0) {
f337b084 2794 unsigned short aushort;
fc241834 2795 fromstr = NEXTFROM;
354b74ae 2796 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2797 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2798 }
49704364
WL
2799 break;
2800#else
924ba076 2801 /* FALLTHROUGH */
a6ec74c1 2802#endif
49704364 2803 case 'S':
fc241834 2804 while (len-- > 0) {
f337b084 2805 U16 au16;
fc241834 2806 fromstr = NEXTFROM;
354b74ae 2807 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2808 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2809 }
2810 break;
49704364 2811 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2812#if SHORTSIZE != SIZE16
fc241834 2813 while (len-- > 0) {
f337b084 2814 short ashort;
fc241834 2815 fromstr = NEXTFROM;
354b74ae 2816 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2817 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2818 }
49704364
WL
2819 break;
2820#else
924ba076 2821 /* FALLTHROUGH */
a6ec74c1 2822#endif
49704364
WL
2823 case 's':
2824 while (len-- > 0) {
f337b084 2825 I16 ai16;
49704364 2826 fromstr = NEXTFROM;
354b74ae 2827 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2828 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2829 }
2830 break;
2831 case 'I':
49704364 2832 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2833 while (len-- > 0) {
f337b084 2834 unsigned int auint;
a6ec74c1 2835 fromstr = NEXTFROM;
354b74ae 2836 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2837 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2838 }
2839 break;
92d41999
JH
2840 case 'j':
2841 while (len-- > 0) {
f337b084 2842 IV aiv;
92d41999 2843 fromstr = NEXTFROM;
354b74ae 2844 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2845 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2846 }
2847 break;
2848 case 'J':
2849 while (len-- > 0) {
f337b084 2850 UV auv;
92d41999 2851 fromstr = NEXTFROM;
354b74ae 2852 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2853 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2854 }
2855 break;
a6ec74c1
JH
2856 case 'w':
2857 while (len-- > 0) {
f337b084 2858 NV anv;
a6ec74c1 2859 fromstr = NEXTFROM;
83388ae2 2860 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2861 anv = SvNV_nomg(fromstr);
a6ec74c1 2862
f337b084
TH
2863 if (anv < 0) {
2864 *cur = '\0';
b162af07 2865 SvCUR_set(cat, cur - start);
49704364 2866 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2867 }
a6ec74c1 2868
196b62db
NC
2869 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2870 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2871 any negative IVs will have already been got by the croak()
2872 above. IOK is untrue for fractions, so we test them
2873 against UV_MAX_P1. */
f337b084
TH
2874 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2875 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2876 char *in = buf + sizeof(buf);
1e9a122e 2877 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2878
2879 do {
eb160463 2880 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2881 auv >>= 7;
2882 } while (auv);
2883 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2884 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2885 in, (buf + sizeof(buf)) - in);
2886 } else if (SvPOKp(fromstr))
2887 goto w_string;
a6ec74c1 2888 else if (SvNOKp(fromstr)) {
0258719b 2889 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2890 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2891 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2892 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2893 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2894 Some C compilers are strict about integral constant
2895 expressions so we conservatively divide by a slightly
2896 smaller integer instead of multiplying by the exact
2897 floating-point value.
0258719b
NC
2898 */
2899#ifdef NV_MAX_10_EXP
f337b084 2900 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2901 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2902#else
f337b084 2903 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2904 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2905#endif
a6ec74c1
JH
2906 char *in = buf + sizeof(buf);
2907
8b6e33c7 2908 anv = Perl_floor(anv);
a6ec74c1 2909 do {
8b6e33c7 2910 const NV next = Perl_floor(anv / 128);
a6ec74c1 2911 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2912 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2913 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2914 anv = next;
2915 } while (anv > 0);
a6ec74c1 2916 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2917 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2918 in, (buf + sizeof(buf)) - in);
2919 } else {
8b6e33c7
AL
2920 const char *from;
2921 char *result, *in;
735b914b
JH
2922 SV *norm;
2923 STRLEN len;
2924 bool done;
2925
f337b084 2926 w_string:
735b914b 2927 /* Copy string and check for compliance */
1e9a122e 2928 from = SvPV_nomg_const(fromstr, len);
735b914b 2929 if ((norm = is_an_int(from, len)) == NULL)
49704364 2930 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2931
a02a5408 2932 Newx(result, len, char);
735b914b
JH
2933 in = result + len;
2934 done = FALSE;
f337b084 2935 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2936 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2937 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2938 in, (result + len) - in);
735b914b
JH
2939 Safefree(result);
2940 SvREFCNT_dec(norm); /* free norm */
fc241834 2941 }
a6ec74c1
JH
2942 }
2943 break;
2944 case 'i':
49704364 2945 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2946 while (len-- > 0) {
f337b084 2947 int aint;
a6ec74c1 2948 fromstr = NEXTFROM;
354b74ae 2949 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2950 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2951 }
2952 break;
068bd2e7 2953 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2954 case 'N':
2955 while (len-- > 0) {
f337b084 2956 U32 au32;
a6ec74c1 2957 fromstr = NEXTFROM;
354b74ae 2958 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2959 au32 = PerlSock_htonl(au32);
3a88beaa 2960 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2961 }
2962 break;
068bd2e7 2963 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2964 case 'V':
2965 while (len-- > 0) {
f337b084 2966 U32 au32;
a6ec74c1 2967 fromstr = NEXTFROM;
354b74ae 2968 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2969 au32 = htovl(au32);
3a88beaa 2970 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2971 }
2972 break;
49704364 2973 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2974#if LONGSIZE != SIZE32
fc241834 2975 while (len-- > 0) {
f337b084 2976 unsigned long aulong;
fc241834 2977 fromstr = NEXTFROM;
354b74ae 2978 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2979 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2980 }
49704364
WL
2981 break;
2982#else
2983 /* Fall though! */
a6ec74c1 2984#endif
49704364 2985 case 'L':
fc241834 2986 while (len-- > 0) {
f337b084 2987 U32 au32;
fc241834 2988 fromstr = NEXTFROM;
354b74ae 2989 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2990 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
2991 }
2992 break;
49704364 2993 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2994#if LONGSIZE != SIZE32
fc241834 2995 while (len-- > 0) {
f337b084 2996 long along;
fc241834 2997 fromstr = NEXTFROM;
354b74ae 2998 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2999 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3000 }
49704364
WL
3001 break;
3002#else
3003 /* Fall though! */
a6ec74c1 3004#endif
49704364
WL
3005 case 'l':
3006 while (len-- > 0) {
f337b084 3007 I32 ai32;
49704364 3008 fromstr = NEXTFROM;
354b74ae 3009 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3010 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3011 }
3012 break;
c174bf3b 3013#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3014 case 'Q':
3015 while (len-- > 0) {
f337b084 3016 Uquad_t auquad;
a6ec74c1 3017 fromstr = NEXTFROM;
354b74ae 3018 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3019 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3020 }
3021 break;
3022 case 'q':
3023 while (len-- > 0) {
f337b084 3024 Quad_t aquad;
a6ec74c1 3025 fromstr = NEXTFROM;
354b74ae 3026 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3027 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3028 }
3029 break;
1640b983 3030#endif
a6ec74c1
JH
3031 case 'P':
3032 len = 1; /* assume SV is correct length */
f337b084 3033 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3034 /* FALLTHROUGH */
a6ec74c1
JH
3035 case 'p':
3036 while (len-- > 0) {
83003860 3037 const char *aptr;
f337b084 3038
a6ec74c1 3039 fromstr = NEXTFROM;
28a4f200
TH
3040 SvGETMAGIC(fromstr);
3041 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3042 else {
a6ec74c1
JH
3043 /* XXX better yet, could spirit away the string to
3044 * a safe spot and hang on to it until the result
3045 * of pack() (and all copies of the result) are
3046 * gone.
3047 */
041457d9 3048 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3049 !SvREADONLY(fromstr)))) {
3050 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3051 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3052 }
3053 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3054 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3055 else
2596d9fe 3056 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3057 }
3a88beaa 3058 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3059 }
3060 break;
fc241834 3061 case 'u': {
f7fe979e 3062 const char *aptr, *aend;
fc241834 3063 bool from_utf8;
f337b084 3064
a6ec74c1 3065 fromstr = NEXTFROM;
fc241834
RGS
3066 if (len <= 2) len = 45;
3067 else len = len / 3 * 3;
3068 if (len >= 64) {
a2a5de95
NC
3069 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3070 "Field too wide in 'u' format in pack");
fc241834
RGS
3071 len = 63;
3072 }
83003860 3073 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3074 from_utf8 = DO_UTF8(fromstr);
3075 if (from_utf8) {
3076 aend = aptr + fromlen;
3f63b0e5 3077 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3078 } else aend = NULL; /* Unused, but keep compilers happy */
3079 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3080 while (fromlen > 0) {
fc241834 3081 U8 *end;
a6ec74c1 3082 I32 todo;
fc241834 3083 U8 hunk[1+63/3*4+1];
a6ec74c1 3084
eb160463 3085 if ((I32)fromlen > len)
a6ec74c1
JH
3086 todo = len;
3087 else
3088 todo = fromlen;
fc241834
RGS
3089 if (from_utf8) {
3090 char buffer[64];
3091 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3092 'u' | TYPE_IS_PACK)) {
3093 *cur = '\0';
b162af07 3094 SvCUR_set(cat, cur - start);
5637ef5b
NC
3095 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3096 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3097 aptr, aend, buffer, (long) todo);
fc241834
RGS
3098 }
3099 end = doencodes(hunk, buffer, todo);
3100 } else {
3101 end = doencodes(hunk, aptr, todo);
3102 aptr += todo;
3103 }
3a88beaa 3104 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3105 fromlen -= todo;
3106 }
a6ec74c1
JH
3107 break;
3108 }
f337b084
TH
3109 }
3110 *cur = '\0';
b162af07 3111 SvCUR_set(cat, cur - start);
f337b084 3112 no_change:
49704364 3113 *symptr = lookahead;
a6ec74c1 3114 }
49704364 3115 return beglist;
18529408
IZ
3116}
3117#undef NEXTFROM
3118
3119
3120PP(pp_pack)
3121{
20b7effb 3122 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3123 SV *cat = TARG;
18529408 3124 STRLEN fromlen;
349d4f2f 3125 SV *pat_sv = *++MARK;
eb578fdb
KW
3126 const char *pat = SvPV_const(pat_sv, fromlen);
3127 const char *patend = pat + fromlen;
18529408
IZ
3128
3129 MARK++;
76f68e9b 3130 sv_setpvs(cat, "");
f337b084 3131 SvUTF8_off(cat);
18529408 3132
7accc089 3133 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3134
a6ec74c1
JH
3135 SvSETMAGIC(cat);
3136 SP = ORIGMARK;
3137 PUSHs(cat);
3138 RETURN;
3139}
a6ec74c1 3140
73cb7263
NC
3141/*
3142 * Local variables:
3143 * c-indentation-style: bsd
3144 * c-basic-offset: 4
14d04a33 3145 * indent-tabs-mode: nil
73cb7263
NC
3146 * End:
3147 *
14d04a33 3148 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3149 */