This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
locale.c: Rmv duplicate strlen()
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
1604cfb0
MS
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
f7fe979e
AL
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
77a2054e 78#if defined(HAS_LONG_DOUBLE)
275663fa
TC
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
1604cfb0 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
0860e090
KW
215#define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
a6ec74c1 218
66c611c5 219/* type modifiers */
62f95557 220#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
221#define TYPE_IS_BIG_ENDIAN 0x200
222#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 223#define TYPE_IS_PACK 0x800
1109a392 224#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 225#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
a5d875a7 226#define TYPE_NO_MODIFIERS(t) ((U8) (t))
1109a392 227
7212898e
NC
228# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
230
231# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
232
78d46eaa 233#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 234#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
235#define PACK_SIZE_MASK 0x3F
236
4ad176ef 237#include "packsizetables.inc"
78d46eaa 238
20aa3a38
NC
239static void
240S_reverse_copy(const char *src, char *dest, STRLEN len)
241{
242 dest += len;
243 while (len--)
244 *--dest = *src++;
245}
246
08ca2aa3 247STATIC U8
9df874cd 248utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 249{
08ca2aa3 250 STRLEN retlen;
df337714
KW
251 UV val;
252
253 if (*s >= end) {
1604cfb0 254 goto croak;
df337714
KW
255 }
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
1604cfb0 257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
258 if (retlen == (STRLEN) -1)
259 croak:
1604cfb0
MS
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
1604cfb0
MS
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
a5d875a7 266 val = (U8) val;
08ca2aa3
TH
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
1604cfb0
MS
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274 *(U8 *)(s)++)
08ca2aa3
TH
275
276STATIC bool
e1b825c1 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
1604cfb0 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3 290 for (;buf_len > 0; buf_len--) {
1604cfb0
MS
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1) {
294 from += UTF8_SAFE_SKIP(from, end);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
a5d875a7 299 val = (U8) val;
1604cfb0 300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
1604cfb0
MS
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
310 const char *ptr;
311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 }
316 if (from > end) from = end;
317 }
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
1604cfb0 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
1604cfb0 357 (cur) += (len); \
230e1fce 358 } \
f337b084
TH
359} STMT_END
360
f17fed50
TC
361#define SAFE_UTF8_EXPAND(var) \
362STMT_START { \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
366} STMT_END
367
368#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
369STMT_START { \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373} STMT_END
374
f337b084
TH
375#define GROWING(utf8, cat, start, cur, in_len) \
376STMT_START { \
377 STRLEN glen = (in_len); \
f17fed50
TC
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
1604cfb0
MS
383 (start) = sv_exp_grow(cat, glen); \
384 (cur) = (start) + SvCUR(cat); \
f337b084
TH
385 } \
386} STMT_END
387
388#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389STMT_START { \
f7fe979e 390 const STRLEN glen = (in_len); \
f337b084 391 STRLEN gl = glen; \
f17fed50 392 if (utf8) SAFE_UTF8_EXPAND(gl); \
f337b084
TH
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
394 *cur = '\0'; \
b162af07 395 SvCUR_set((cat), (cur) - (start)); \
1604cfb0
MS
396 (start) = sv_exp_grow(cat, gl); \
397 (cur) = (start) + SvCUR(cat); \
f337b084 398 } \
3a88beaa 399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
400} STMT_END
401
402#define PUSH_BYTE(utf8, s, byte) \
403STMT_START { \
404 if (utf8) { \
1604cfb0
MS
405 const U8 au8 = (byte); \
406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
407 } else *(U8 *)(s)++ = (byte); \
408} STMT_END
409
410/* Only to be used inside a loop (see the break) */
411#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
412STMT_START { \
413 STRLEN retlen; \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 416 if (retlen == (STRLEN) -1) { \
1604cfb0
MS
417 *cur = '\0'; \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
f337b084
TH
419 } \
420 str += retlen; \
421} STMT_END
422
f7fe979e
AL
423static const char *_action( const tempsym_t* symptr )
424{
10edeb5d 425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
426}
427
206947d2 428/* Returns the sizeof() struct described by pat */
e1b825c1 429STATIC SSize_t
f337b084 430S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 431{
e1b825c1 432 SSize_t total = 0;
206947d2 433
7918f24d
NC
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
435
49704364 436 while (next_symbol(symptr)) {
1604cfb0 437 SSize_t len, size;
f337b084
TH
438
439 switch (symptr->howlen) {
1604cfb0
MS
440 case e_star:
441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 442 _action( symptr ) );
81d52ecd 443
1604cfb0
MS
444 default:
445 /* e_no_len and e_number */
446 len = symptr->length;
447 break;
49704364
WL
448 }
449
1604cfb0
MS
450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
451 if (!size) {
e1b825c1 452 SSize_t star;
1604cfb0
MS
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455 default:
d3cdb721 456 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0
MS
457 Perl_croak(aTHX_ "Invalid type '%c' in %s",
458 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 459 _action( symptr ) );
1604cfb0
MS
460 case '.' | TYPE_IS_SHRIEKING:
461 case '@' | TYPE_IS_SHRIEKING:
462 case '@':
463 case '.':
464 case '/':
465 case 'U': /* XXXX Is it correct? */
466 case 'w':
467 case 'u':
468 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
469 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 470 _action( symptr ) );
1604cfb0
MS
471 case '%':
472 size = 0;
473 break;
474 case '(':
475 {
476 tempsym_t savsym = *symptr;
477 symptr->patptr = savsym.grpbeg;
478 symptr->patend = savsym.grpend;
479 /* XXXX Theoretically, we need to measure many times at
480 different positions, since the subexpression may contain
481 alignment commands, but be not of aligned length.
482 Need to detect this and croak(). */
483 size = measure_struct(symptr);
484 *symptr = savsym;
485 break;
486 }
487 case 'X' | TYPE_IS_SHRIEKING:
488 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
489 */
490 if (!len) /* Avoid division by 0 */
491 len = 1;
492 len = total % len; /* Assumed: the start is aligned. */
493 /* FALLTHROUGH */
494 case 'X':
495 size = -1;
496 if (total < len)
f7fe979e 497 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
1604cfb0
MS
498 break;
499 case 'x' | TYPE_IS_SHRIEKING:
500 if (!len) /* Avoid division by 0 */
501 len = 1;
502 star = total % len; /* Assumed: the start is aligned. */
503 if (star) /* Other portable ways? */
504 len = len - star;
505 else
506 len = 0;
507 /* FALLTHROUGH */
508 case 'x':
509 case 'A':
510 case 'Z':
511 case 'a':
512 size = 1;
513 break;
514 case 'B':
515 case 'b':
516 len = (len + 7)/8;
517 size = 1;
518 break;
519 case 'H':
520 case 'h':
521 len = (len + 1)/2;
522 size = 1;
523 break;
524
525 case 'P':
526 len = 1;
527 size = sizeof(char*);
528 break;
529 }
530 }
531 total += len * size;
206947d2
IZ
532 }
533 return total;
534}
535
49704364
WL
536
537/* locate matching closing parenthesis or bracket
538 * returns char pointer to char after match, or NULL
539 */
f7fe979e 540STATIC const char *
5aaab254 541S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 542{
7918f24d 543 PERL_ARGS_ASSERT_GROUP_END;
f24623d6 544 Size_t opened = 0; /* number of pending opened brackets */
7918f24d 545
49704364 546 while (patptr < patend) {
1604cfb0
MS
547 const char c = *patptr++;
548
f24623d6 549 if (opened == 0 && c == ender)
1604cfb0
MS
550 return patptr-1;
551 else if (c == '#') {
552 while (patptr < patend && *patptr != '\n')
553 patptr++;
554 continue;
f24623d6
HS
555 } else if (c == '(' || c == '[')
556 ++opened;
557 else if (c == ')' || c == ']') {
558 if (opened == 0)
559 Perl_croak(aTHX_ "Mismatched brackets in template");
560 --opened;
561 }
18529408 562 }
49704364
WL
563 Perl_croak(aTHX_ "No group ending character '%c' found in template",
564 ender);
a25b5927 565 NOT_REACHED; /* NOTREACHED */
18529408
IZ
566}
567
49704364
WL
568
569/* Convert unsigned decimal number to binary.
570 * Expects a pointer to the first digit and address of length variable
571 * Advances char pointer to 1st non-digit char and returns number
fc241834 572 */
f7fe979e 573STATIC const char *
e1b825c1 574S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 575{
e1b825c1 576 SSize_t len = *patptr++ - '0';
7918f24d
NC
577
578 PERL_ARGS_ASSERT_GET_NUM;
579
49704364 580 while (isDIGIT(*patptr)) {
e1b825c1
Z
581 SSize_t nlen = (len * 10) + (*patptr++ - '0');
582 if (nlen < 0 || nlen/10 != len)
49704364 583 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 584 len = nlen;
49704364
WL
585 }
586 *lenptr = len;
587 return patptr;
588}
589
590/* The marvellous template parsing routine: Using state stored in *symptr,
591 * locates next template code and count
592 */
593STATIC bool
f337b084 594S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 595{
f7fe979e 596 const char* patptr = symptr->patptr;
0bcc34c2 597 const char* const patend = symptr->patend;
49704364 598
7918f24d
NC
599 PERL_ARGS_ASSERT_NEXT_SYMBOL;
600
49704364
WL
601 symptr->flags &= ~FLAG_SLASH;
602
603 while (patptr < patend) {
604 if (isSPACE(*patptr))
605 patptr++;
606 else if (*patptr == '#') {
607 patptr++;
608 while (patptr < patend && *patptr != '\n')
1604cfb0 609 patptr++;
49704364 610 if (patptr < patend)
1604cfb0 611 patptr++;
49704364 612 } else {
fc241834 613 /* We should have found a template code */
a5d875a7 614 I32 code = (U8) *patptr++;
66c611c5 615 U32 inherited_modifiers = 0;
49704364 616
6920b5ce
NC
617 /* unrecognised characters in pack/unpack formats were made fatal in
618 * 5.004, with an exception added in 5.004_04 for ',' to "just" warn: */
619 if (code == ','){
1604cfb0 620 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
49704364 621 symptr->flags |= FLAG_COMMA;
d3cdb721 622 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0
MS
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Invalid type ',' in %s", _action( symptr ) );
49704364 625 }
1604cfb0 626 continue;
49704364 627 }
fc241834 628
49704364 629 /* for '(', skip to ')' */
fc241834 630 if (code == '(') {
49704364
WL
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 633 _action( symptr ) );
49704364
WL
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1604cfb0 637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 638 _action( symptr ) );
49704364
WL
639 }
640
66c611c5
MHM
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
645 }
646
1109a392
MHM
647 /* look for modifiers */
648 while (patptr < patend) {
b7787f18
AL
649 const char *allowed;
650 I32 modifier;
1109a392
MHM
651 switch (*patptr) {
652 case '!':
653 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 654 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
655 break;
656 case '>':
657 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 658 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
659 break;
660 case '<':
661 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 662 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
663 break;
664 default:
b7787f18
AL
665 allowed = "";
666 modifier = 0;
1109a392
MHM
667 break;
668 }
66c611c5 669
1109a392
MHM
670 if (modifier == 0)
671 break;
66c611c5 672
1109a392
MHM
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 675 allowed, _action( symptr ) );
66c611c5
MHM
676
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 683 *patptr, _action( symptr ) );
66c611c5 684
a2a5de95 685 if ((code & modifier)) {
1604cfb0
MS
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
689 _action( symptr ) );
1109a392 690 }
66c611c5 691
1109a392
MHM
692 code |= modifier;
693 patptr++;
49704364
WL
694 }
695
66c611c5
MHM
696 /* inherit modifiers */
697 code |= inherited_modifiers;
698
fc241834 699 /* look for count and/or / */
49704364 700 if (patptr < patend) {
1604cfb0
MS
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
49704364
WL
703 symptr->howlen = e_number;
704
705 } else if (*patptr == '*') {
706 patptr++;
707 symptr->howlen = e_star;
708
709 } else if (*patptr == '[') {
f7fe979e 710 const char* lenptr = ++patptr;
49704364
WL
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
716 if( *lenptr != ']' )
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 718 _action( symptr ) );
49704364
WL
719 } else {
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
724 *symptr = savsym;
725 }
726 } else {
727 symptr->howlen = e_no_len;
728 symptr->length = 1;
729 }
730
731 /* try to find / */
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
734 patptr++;
735 else if (*patptr == '#') {
736 patptr++;
737 while (patptr < patend && *patptr != '\n')
1604cfb0 738 patptr++;
49704364 739 if (patptr < patend)
1604cfb0 740 patptr++;
49704364 741 } else {
66c611c5 742 if (*patptr == '/') {
49704364
WL
743 symptr->flags |= FLAG_SLASH;
744 patptr++;
66c611c5
MHM
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 748 _action( symptr ) );
49704364
WL
749 }
750 break;
1604cfb0
MS
751 }
752 }
49704364
WL
753 } else {
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
756 symptr->length = 1;
757 }
758
759 symptr->code = code;
fc241834 760 symptr->patptr = patptr;
49704364 761 return TRUE;
18529408 762 }
49704364 763 }
fc241834 764 symptr->patptr = patptr;
49704364 765 return FALSE;
18529408
IZ
766}
767
18529408 768/*
fc241834 769 There is no way to cleanly handle the case where we should process the
08ca2aa3 770 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
775 themselves if they need to do a lot of unpacks like this on it
776*/
fc241834 777STATIC bool
08ca2aa3
TH
778need_utf8(const char *pat, const char *patend)
779{
780 bool first = TRUE;
7918f24d
NC
781
782 PERL_ARGS_ASSERT_NEED_UTF8;
783
08ca2aa3 784 while (pat < patend) {
1604cfb0
MS
785 if (pat[0] == '#') {
786 pat++;
787 pat = (const char *) memchr(pat, '\n', patend-pat);
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
792 pat++;
08ca2aa3
TH
793 }
794 return FALSE;
795}
796
797STATIC char
798first_symbol(const char *pat, const char *patend) {
7918f24d
NC
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
800
08ca2aa3 801 while (pat < patend) {
1604cfb0
MS
802 if (pat[0] != '#') return pat[0];
803 pat++;
804 pat = (const char *) memchr(pat, '\n', patend-pat);
805 if (!pat) return 0;
806 pat++;
08ca2aa3
TH
807 }
808 return 0;
809}
810
811/*
dcccc8ff 812
7accc089
JH
813=for apidoc unpackstring
814
796b6530 815The engine implementing the C<unpack()> Perl function.
21ebfc7a 816
796b6530
KW
817Using the template C<pat..patend>, this function unpacks the string
818C<s..strend> into a number of mortal SVs, which it pushes onto the perl
819argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 820C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
821pushed elements.
822
796b6530
KW
823The C<strend> and C<patend> pointers should point to the byte following the
824last character of each string.
21ebfc7a
DM
825
826Although this function returns its values on the perl argument stack, it
827doesn't take any parameters from that stack (and thus in particular
796b6530 828there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 829example).
7accc089
JH
830
831=cut */
832
e1b825c1 833SSize_t
f7fe979e 834Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 835{
f7fe979e 836 tempsym_t sym;
08ca2aa3 837
7918f24d
NC
838 PERL_ARGS_ASSERT_UNPACKSTRING;
839
f337b084 840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3 841 else if (need_utf8(pat, patend)) {
1604cfb0
MS
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
846 SAVEFREEPV(s);
847 strend = s + len;
848 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
849 }
850
f337b084 851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1604cfb0 852 flags |= FLAG_PARSE_UTF8;
08ca2aa3 853
f7fe979e 854 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
855
856 return unpack_rec(&sym, s, s, strend, NULL );
857}
858
e1b825c1 859STATIC SSize_t
f7fe979e 860S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 861{
20b7effb 862 dSP;
3297d27d 863 SV *sv = NULL;
e1b825c1 864 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 865 howlen_t howlen;
e1b825c1 866 SSize_t checksum = 0;
92d41999 867 UV cuv = 0;
a6ec74c1 868 NV cdouble = 0.0;
e1b825c1 869 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 870 bool beyond = FALSE;
21c16052 871 bool explicit_length;
9e27e96a 872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
874
875 PERL_ARGS_ASSERT_UNPACK_REC;
876
28be1210 877 symptr->strbeg = s - strbeg;
49704364 878
49704364 879 while (next_symbol(symptr)) {
1604cfb0
MS
880 packprops_t props;
881 SSize_t len;
f337b084 882 I32 datumtype = symptr->code;
a1219b5e 883 bool needs_swap;
1604cfb0
MS
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
49704364 887 if ( unpack_only_one
1604cfb0
MS
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 890 break;
49704364 891
f337b084 892 switch (howlen = symptr->howlen) {
1604cfb0
MS
893 case e_star:
894 len = strend - strbeg; /* long enough */
895 break;
896 default:
897 /* e_no_len and e_number */
898 len = symptr->length;
899 break;
49704364 900 }
18529408 901
21c16052 902 explicit_length = TRUE;
a6ec74c1 903 redo_switch:
49704364 904 beyond = s >= strend;
a7a3cfaa 905
1604cfb0
MS
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
907 if (props) {
908 /* props nonzero means we can process this letter. */
e1b825c1
Z
909 const SSize_t size = props & PACK_SIZE_MASK;
910 const SSize_t howmany = (strend - s) / size;
1604cfb0
MS
911 if (len > howmany)
912 len = howmany;
a7a3cfaa 913
1604cfb0
MS
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
916 EXTEND(SP, len);
917 EXTEND_MORTAL(len);
918 }
919 }
a7a3cfaa 920
a1219b5e
NC
921 needs_swap = NEEDS_SWAP(datumtype);
922
1604cfb0
MS
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
924 default:
d3cdb721 925 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0 926 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 927
1604cfb0
MS
928 case '%':
929 if (howlen == e_no_len)
930 len = 16; /* len is not specified */
931 checksum = len;
932 cuv = 0;
933 cdouble = 0;
934 continue;
81d52ecd 935
1604cfb0
MS
936 case '(':
937 {
49704364 938 tempsym_t savsym = *symptr;
9e27e96a 939 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1604cfb0 940 symptr->flags |= group_modifiers;
49704364 941 symptr->patend = savsym.grpend;
c0668a30 942 /* cppcheck-suppress autoVariables */
1604cfb0 943 symptr->previous = &savsym;
49704364 944 symptr->level++;
1604cfb0
MS
945 PUTBACK;
946 if (len && unpack_only_one) len = 1;
947 while (len--) {
948 symptr->patptr = savsym.grpbeg;
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
951 unpack_rec(symptr, s, strbeg, strend, &s);
08ca2aa3 952 if (s == strend && savsym.howlen == e_star)
1604cfb0
MS
953 break; /* No way to continue */
954 }
955 SPAGAIN;
28be1210 956 savsym.flags = symptr->flags & ~group_modifiers;
49704364 957 *symptr = savsym;
1604cfb0
MS
958 break;
959 }
960 case '.' | TYPE_IS_SHRIEKING:
961 case '.': {
962 const char *from;
963 SV *sv;
964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
967 else {
968 tempsym_t *group = symptr;
969
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
972 }
973 sv = from <= s ?
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
976 mXPUSHs(sv);
977 break;
978 }
979 case '@' | TYPE_IS_SHRIEKING:
980 case '@':
981 s = strbeg + symptr->strbeg;
982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
983 {
984 while (len > 0) {
985 if (s >= strend)
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
987 s += UTF8SKIP(s);
988 len--;
989 }
990 if (s > strend)
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 } else {
993 if (strend-s < len)
994 Perl_croak(aTHX_ "'@' outside of string in unpack");
995 s += len;
996 }
997 break;
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1000 len = 1;
1001 if (utf8) {
1002 const char *hop, *last;
1003 SSize_t l = len;
1004 hop = last = strbeg;
1005 while (hop < s) {
1006 hop += UTF8SKIP(hop);
1007 if (--l == 0) {
1008 last = hop;
1009 l = len;
1010 }
1011 }
1012 if (last > s)
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1014 s = last;
1015 break;
1016 }
1017 len = (s - strbeg) % len;
1018 /* FALLTHROUGH */
1019 case 'X':
1020 if (utf8) {
1021 while (len > 0) {
1022 if (s <= strbeg)
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
1025 if (s <= strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1027 }
1028 len--;
1029 }
1030 } else {
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033 s -= len;
1034 }
1035 break;
1036 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1037 SSize_t ai32;
1604cfb0
MS
1038 if (!len) /* Avoid division by 0 */
1039 len = 1;
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
1042 if (ai32 == 0) break;
1043 len -= ai32;
1044 }
1045 /* FALLTHROUGH */
1046 case 'x':
1047 if (utf8) {
1048 while (len>0) {
1049 if (s >= strend)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += UTF8SKIP(s);
1052 len--;
1053 }
1054 } else {
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1057 s += len;
1058 }
1059 break;
1060 case '/':
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1062
1063 case 'A':
1064 case 'Z':
1065 case 'a':
1066 if (checksum) {
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1069 goto W_checksum;
1070 }
1071 if (utf8) {
1072 SSize_t l;
1073 const char *hop;
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1076 if (hop > strend)
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 break;
1079 }
1080 }
1081 if (hop > strend)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 len = hop - s;
1084 } else if (len > strend - s)
1085 len = strend - s;
1086
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
1089 const char *ptr, *end;
1090 end = s + len;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
1097 const char *ptr;
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1099 for (ptr = s+len-1; ptr >= s; ptr--) {
1100 if ( *ptr != 0
1101 && !UTF8_IS_CONTINUATION(*ptr)
1102 && !isSPACE_utf8_safe(ptr, strend))
1103 {
1104 break;
1105 }
1106 }
1604cfb0
MS
1107 if (ptr >= s) ptr += UTF8SKIP(ptr);
1108 else ptr++;
1109 if (ptr > s+len)
1110 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1111 } else {
1112 for (ptr = s+len-1; ptr >= s; ptr--)
1113 if (*ptr != 0 && !isSPACE(*ptr)) break;
1114 ptr++;
1115 }
1116 sv = newSVpvn(s, ptr-s);
1117 } else sv = newSVpvn(s, len);
1118
1119 if (utf8) {
1120 SvUTF8_on(sv);
1121 /* Undo any upgrade done due to need_utf8() */
1122 if (!(symptr->flags & FLAG_WAS_UTF8))
1123 sv_utf8_downgrade(sv, 0);
1124 }
1125 mXPUSHs(sv);
1126 s += len;
1127 break;
1128 case 'B':
1129 case 'b': {
1130 char *str;
1131 if (howlen == e_star || len > (strend - s) * 8)
1132 len = (strend - s) * 8;
1133 if (checksum) {
1134 if (utf8)
1135 while (len >= 8 && s < strend) {
1136 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1137 len -= 8;
1138 }
1139 else
1140 while (len >= 8) {
1141 cuv += PL_bitcount[*(U8 *)s++];
1142 len -= 8;
1143 }
1144 if (len && s < strend) {
1145 U8 bits;
1146 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1147 if (datumtype == 'b')
1148 while (len-- > 0) {
1149 if (bits & 1) cuv++;
1150 bits >>= 1;
1151 }
1152 else
1153 while (len-- > 0) {
1154 if (bits & 0x80) cuv++;
1155 bits <<= 1;
1156 }
1157 }
1158 break;
1159 }
1160
1161 sv = sv_2mortal(newSV(len ? len : 1));
1162 SvPOK_on(sv);
1163 str = SvPVX(sv);
1164 if (datumtype == 'b') {
1165 U8 bits = 0;
1166 const SSize_t ai32 = len;
1167 for (len = 0; len < ai32; len++) {
1168 if (len & 7) bits >>= 1;
1169 else if (utf8) {
1170 if (s >= strend) break;
1171 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1172 } else bits = *(U8 *) s++;
1173 *str++ = bits & 1 ? '1' : '0';
1174 }
1175 } else {
1176 U8 bits = 0;
1177 const SSize_t ai32 = len;
1178 for (len = 0; len < ai32; len++) {
1179 if (len & 7) bits <<= 1;
1180 else if (utf8) {
1181 if (s >= strend) break;
1182 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1183 } else bits = *(U8 *) s++;
1184 *str++ = bits & 0x80 ? '1' : '0';
1185 }
1186 }
1187 *str = '\0';
1188 SvCUR_set(sv, str - SvPVX_const(sv));
1189 XPUSHs(sv);
1190 break;
1191 }
1192 case 'H':
1193 case 'h': {
1194 char *str = NULL;
1195 /* Preliminary length estimate, acceptable for utf8 too */
1196 if (howlen == e_star || len > (strend - s) * 2)
1197 len = (strend - s) * 2;
1198 if (!checksum) {
1199 sv = sv_2mortal(newSV(len ? len : 1));
1200 SvPOK_on(sv);
1201 str = SvPVX(sv);
1202 }
1203 if (datumtype == 'h') {
1204 U8 bits = 0;
1205 SSize_t ai32 = len;
1206 for (len = 0; len < ai32; len++) {
1207 if (len & 1) bits >>= 4;
1208 else if (utf8) {
1209 if (s >= strend) break;
1210 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1211 } else bits = * (U8 *) s++;
1212 if (!checksum)
1213 *str++ = PL_hexdigit[bits & 15];
1214 }
1215 } else {
1216 U8 bits = 0;
1217 const SSize_t ai32 = len;
1218 for (len = 0; len < ai32; len++) {
1219 if (len & 1) bits <<= 4;
1220 else if (utf8) {
1221 if (s >= strend) break;
1222 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1223 } else bits = *(U8 *) s++;
1224 if (!checksum)
1225 *str++ = PL_hexdigit[(bits >> 4) & 15];
1226 }
1227 }
1228 if (!checksum) {
1229 *str = '\0';
1230 SvCUR_set(sv, str - SvPVX_const(sv));
1231 XPUSHs(sv);
1232 }
1233 break;
1234 }
1235 case 'C':
1651fc44
ML
1236 if (len == 0) {
1237 if (explicit_length)
1604cfb0
MS
1238 /* Switch to "character" mode */
1239 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1240 break;
1241 }
1242 /* FALLTHROUGH */
1243 case 'c':
1244 while (len-- > 0 && s < strend) {
1245 int aint;
1246 if (utf8)
1247 {
1248 STRLEN retlen;
1249 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1250 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1251 if (retlen == (STRLEN) -1)
1252 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1253 s += retlen;
1254 }
1255 else
1256 aint = *(U8 *)(s)++;
1257 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1258 aint -= 256;
1259 if (!checksum)
1260 mPUSHi(aint);
1261 else if (checksum > bits_in_uv)
1262 cdouble += (NV)aint;
1263 else
1264 cuv += aint;
1265 }
1266 break;
1267 case 'W':
1268 W_checksum:
1269 if (utf8) {
1270 while (len-- > 0 && s < strend) {
1271 STRLEN retlen;
1272 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1273 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1274 if (retlen == (STRLEN) -1)
1275 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1276 s += retlen;
1277 if (!checksum)
1278 mPUSHu(val);
1279 else if (checksum > bits_in_uv)
1280 cdouble += (NV) val;
1281 else
1282 cuv += val;
1283 }
1284 } else if (!checksum)
1285 while (len-- > 0) {
1286 const U8 ch = *(U8 *) s++;
1287 mPUSHu(ch);
1288 }
1289 else if (checksum > bits_in_uv)
1290 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1291 else
1292 while (len-- > 0) cuv += *(U8 *) s++;
1293 break;
1294 case 'U':
1295 if (len == 0) {
c5333953 1296 if (explicit_length && howlen != e_star) {
1604cfb0
MS
1297 /* Switch to "bytes in UTF-8" mode */
1298 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1299 else
1300 /* Should be impossible due to the need_utf8() test */
1301 Perl_croak(aTHX_ "U0 mode on a byte string");
1302 }
1303 break;
1304 }
1305 if (len > strend - s) len = strend - s;
1306 if (!checksum) {
1307 if (len && unpack_only_one) len = 1;
1308 EXTEND(SP, len);
1309 EXTEND_MORTAL(len);
1310 }
1311 while (len-- > 0 && s < strend) {
1312 STRLEN retlen;
1313 UV auv;
1314 if (utf8) {
1315 U8 result[UTF8_MAXLEN+1];
1316 const char *ptr = s;
1317 STRLEN len;
1318 /* Bug: warns about bad utf8 even if we are short on bytes
1319 and will break out of the loop */
1320 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1321 'U'))
1322 break;
1323 len = UTF8SKIP(result);
1324 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1325 (char *) &result[1], len-1, 'U')) break;
3c2b2fcb
KW
1326 auv = utf8n_to_uvchr(result, len, &retlen,
1327 UTF8_ALLOW_DEFAULT);
1604cfb0
MS
1328 s = ptr;
1329 } else {
3c2b2fcb
KW
1330 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
1331 UTF8_ALLOW_DEFAULT);
1604cfb0
MS
1332 if (retlen == (STRLEN) -1)
1333 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1334 s += retlen;
1335 }
1336 if (!checksum)
1337 mPUSHu(auv);
1338 else if (checksum > bits_in_uv)
1339 cdouble += (NV) auv;
1340 else
1341 cuv += auv;
1342 }
1343 break;
1344 case 's' | TYPE_IS_SHRIEKING:
49704364 1345#if SHORTSIZE != SIZE16
1604cfb0
MS
1346 while (len-- > 0) {
1347 short ashort;
aaec8192 1348 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1604cfb0
MS
1349 if (!checksum)
1350 mPUSHi(ashort);
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV)ashort;
1353 else
1354 cuv += ashort;
1355 }
1356 break;
49704364 1357#else
1604cfb0 1358 /* FALLTHROUGH */
a6ec74c1 1359#endif
1604cfb0
MS
1360 case 's':
1361 while (len-- > 0) {
1362 I16 ai16;
08ca2aa3
TH
1363
1364#if U16SIZE > SIZE16
1604cfb0 1365 ai16 = 0;
08ca2aa3 1366#endif
aaec8192 1367 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1368#if U16SIZE > SIZE16
1604cfb0
MS
1369 if (ai16 > 32767)
1370 ai16 -= 65536;
a6ec74c1 1371#endif
1604cfb0
MS
1372 if (!checksum)
1373 mPUSHi(ai16);
1374 else if (checksum > bits_in_uv)
1375 cdouble += (NV)ai16;
1376 else
1377 cuv += ai16;
1378 }
1379 break;
1380 case 'S' | TYPE_IS_SHRIEKING:
49704364 1381#if SHORTSIZE != SIZE16
1604cfb0
MS
1382 while (len-- > 0) {
1383 unsigned short aushort;
1384 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1385 if (!checksum)
1386 mPUSHu(aushort);
1387 else if (checksum > bits_in_uv)
1388 cdouble += (NV)aushort;
1389 else
1390 cuv += aushort;
1391 }
1392 break;
49704364 1393#else
924ba076 1394 /* FALLTHROUGH */
49704364 1395#endif
1604cfb0
MS
1396 case 'v':
1397 case 'n':
1398 case 'S':
1399 while (len-- > 0) {
1400 U16 au16;
08ca2aa3 1401#if U16SIZE > SIZE16
1604cfb0 1402 au16 = 0;
08ca2aa3 1403#endif
aaec8192 1404 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1604cfb0
MS
1405 if (datumtype == 'n')
1406 au16 = PerlSock_ntohs(au16);
1407 if (datumtype == 'v')
1408 au16 = vtohs(au16);
1409 if (!checksum)
1410 mPUSHu(au16);
1411 else if (checksum > bits_in_uv)
1412 cdouble += (NV) au16;
1413 else
1414 cuv += au16;
1415 }
1416 break;
1417 case 'v' | TYPE_IS_SHRIEKING:
1418 case 'n' | TYPE_IS_SHRIEKING:
1419 while (len-- > 0) {
1420 I16 ai16;
08ca2aa3 1421# if U16SIZE > SIZE16
1604cfb0 1422 ai16 = 0;
08ca2aa3 1423# endif
aaec8192 1424 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1425 /* There should never be any byte-swapping here. */
1426 assert(!TYPE_ENDIANNESS(datumtype));
1604cfb0
MS
1427 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1428 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1429 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1430 ai16 = (I16) vtohs((U16) ai16);
1431 if (!checksum)
1432 mPUSHi(ai16);
1433 else if (checksum > bits_in_uv)
1434 cdouble += (NV) ai16;
1435 else
1436 cuv += ai16;
1437 }
1438 break;
1439 case 'i':
1440 case 'i' | TYPE_IS_SHRIEKING:
1441 while (len-- > 0) {
1442 int aint;
aaec8192 1443 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1604cfb0
MS
1444 if (!checksum)
1445 mPUSHi(aint);
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)aint;
1448 else
1449 cuv += aint;
1450 }
1451 break;
1452 case 'I':
1453 case 'I' | TYPE_IS_SHRIEKING:
1454 while (len-- > 0) {
1455 unsigned int auint;
aaec8192 1456 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1604cfb0
MS
1457 if (!checksum)
1458 mPUSHu(auint);
1459 else if (checksum > bits_in_uv)
1460 cdouble += (NV)auint;
1461 else
1462 cuv += auint;
1463 }
1464 break;
1465 case 'j':
1466 while (len-- > 0) {
1467 IV aiv;
aaec8192 1468 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1604cfb0
MS
1469 if (!checksum)
1470 mPUSHi(aiv);
1471 else if (checksum > bits_in_uv)
1472 cdouble += (NV)aiv;
1473 else
1474 cuv += aiv;
1475 }
1476 break;
1477 case 'J':
1478 while (len-- > 0) {
1479 UV auv;
aaec8192 1480 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1604cfb0
MS
1481 if (!checksum)
1482 mPUSHu(auv);
1483 else if (checksum > bits_in_uv)
1484 cdouble += (NV)auv;
1485 else
1486 cuv += auv;
1487 }
1488 break;
1489 case 'l' | TYPE_IS_SHRIEKING:
49704364 1490#if LONGSIZE != SIZE32
1604cfb0
MS
1491 while (len-- > 0) {
1492 long along;
aaec8192 1493 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1604cfb0
MS
1494 if (!checksum)
1495 mPUSHi(along);
1496 else if (checksum > bits_in_uv)
1497 cdouble += (NV)along;
1498 else
1499 cuv += along;
1500 }
1501 break;
49704364 1502#else
1604cfb0 1503 /* FALLTHROUGH */
a6ec74c1 1504#endif
1604cfb0
MS
1505 case 'l':
1506 while (len-- > 0) {
1507 I32 ai32;
08ca2aa3 1508#if U32SIZE > SIZE32
1604cfb0 1509 ai32 = 0;
08ca2aa3 1510#endif
aaec8192 1511 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1512#if U32SIZE > SIZE32
1604cfb0 1513 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1514#endif
1604cfb0
MS
1515 if (!checksum)
1516 mPUSHi(ai32);
1517 else if (checksum > bits_in_uv)
1518 cdouble += (NV)ai32;
1519 else
1520 cuv += ai32;
1521 }
1522 break;
1523 case 'L' | TYPE_IS_SHRIEKING:
49704364 1524#if LONGSIZE != SIZE32
1604cfb0
MS
1525 while (len-- > 0) {
1526 unsigned long aulong;
aaec8192 1527 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1604cfb0
MS
1528 if (!checksum)
1529 mPUSHu(aulong);
1530 else if (checksum > bits_in_uv)
1531 cdouble += (NV)aulong;
1532 else
1533 cuv += aulong;
1534 }
1535 break;
49704364 1536#else
924ba076 1537 /* FALLTHROUGH */
49704364 1538#endif
1604cfb0
MS
1539 case 'V':
1540 case 'N':
1541 case 'L':
1542 while (len-- > 0) {
1543 U32 au32;
08ca2aa3 1544#if U32SIZE > SIZE32
1604cfb0 1545 au32 = 0;
08ca2aa3 1546#endif
aaec8192 1547 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1604cfb0
MS
1548 if (datumtype == 'N')
1549 au32 = PerlSock_ntohl(au32);
1550 if (datumtype == 'V')
1551 au32 = vtohl(au32);
1552 if (!checksum)
1553 mPUSHu(au32);
1554 else if (checksum > bits_in_uv)
1555 cdouble += (NV)au32;
1556 else
1557 cuv += au32;
1558 }
1559 break;
1560 case 'V' | TYPE_IS_SHRIEKING:
1561 case 'N' | TYPE_IS_SHRIEKING:
1562 while (len-- > 0) {
1563 I32 ai32;
f8e5a5db 1564#if U32SIZE > SIZE32
1604cfb0 1565 ai32 = 0;
f8e5a5db 1566#endif
aaec8192 1567 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1568 /* There should never be any byte swapping here. */
1569 assert(!TYPE_ENDIANNESS(datumtype));
1604cfb0
MS
1570 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1572 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1573 ai32 = (I32)vtohl((U32)ai32);
1574 if (!checksum)
1575 mPUSHi(ai32);
1576 else if (checksum > bits_in_uv)
1577 cdouble += (NV)ai32;
1578 else
1579 cuv += ai32;
1580 }
1581 break;
1582 case 'p':
1583 while (len-- > 0) {
1584 const char *aptr;
aaec8192 1585 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1604cfb0
MS
1586 /* newSVpv generates undef if aptr is NULL */
1587 mPUSHs(newSVpv(aptr, 0));
1588 }
1589 break;
1590 case 'w':
1591 {
1592 UV auv = 0;
1593 size_t bytes = 0;
1594
1595 while (len > 0 && s < strend) {
1596 U8 ch;
1597 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1598 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1599 /* UTF8_IS_XXXXX not right here because this is a BER, not
1600 * UTF-8 format - using constant 0x80 */
1604cfb0
MS
1601 if (ch < 0x80) {
1602 bytes = 0;
1603 mPUSHu(auv);
1604 len--;
1605 auv = 0;
1606 continue;
1607 }
1608 if (++bytes >= sizeof(UV)) { /* promote to string */
1609 const char *t;
1610
1611 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
147e3846 1612 (int)TYPE_DIGITS(UV), auv);
1604cfb0
MS
1613 while (s < strend) {
1614 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1615 sv = mul128(sv, (U8)(ch & 0x7f));
1616 if (!(ch & 0x80)) {
1617 bytes = 0;
1618 break;
1619 }
1620 }
1621 t = SvPV_nolen_const(sv);
1622 while (*t == '0')
1623 t++;
1624 sv_chop(sv, t);
1625 mPUSHs(sv);
1626 len--;
1627 auv = 0;
1628 }
1629 }
1630 if ((s >= strend) && bytes)
1631 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1632 }
1633 break;
1634 case 'P':
1635 if (symptr->howlen == e_star)
1636 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1637 EXTEND(SP, 1);
1638 if (s + sizeof(char*) <= strend) {
1639 char *aptr;
aaec8192 1640 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1604cfb0
MS
1641 /* newSVpvn generates undef if aptr is NULL */
1642 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1643 }
1644 break;
c174bf3b 1645#if defined(HAS_QUAD) && IVSIZE >= 8
1604cfb0
MS
1646 case 'q':
1647 while (len-- > 0) {
1648 Quad_t aquad;
aaec8192 1649 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1604cfb0 1650 if (!checksum)
c174bf3b 1651 mPUSHs(newSViv((IV)aquad));
1604cfb0
MS
1652 else if (checksum > bits_in_uv)
1653 cdouble += (NV)aquad;
1654 else
1655 cuv += aquad;
1656 }
1657 break;
1658 case 'Q':
1659 while (len-- > 0) {
1660 Uquad_t auquad;
aaec8192 1661 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1604cfb0
MS
1662 if (!checksum)
1663 mPUSHs(newSVuv((UV)auquad));
1664 else if (checksum > bits_in_uv)
1665 cdouble += (NV)auquad;
1666 else
1667 cuv += auquad;
1668 }
1669 break;
1640b983 1670#endif
1604cfb0
MS
1671 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1672 case 'f':
1673 while (len-- > 0) {
1674 float afloat;
aaec8192 1675 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1604cfb0
MS
1676 if (!checksum)
1677 mPUSHn(afloat);
1678 else
1679 cdouble += afloat;
1680 }
1681 break;
1682 case 'd':
1683 while (len-- > 0) {
1684 double adouble;
aaec8192 1685 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1604cfb0
MS
1686 if (!checksum)
1687 mPUSHn(adouble);
1688 else
1689 cdouble += adouble;
1690 }
1691 break;
1692 case 'F':
1693 while (len-- > 0) {
1694 NV_bytes anv;
aaec8192
NC
1695 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1696 datumtype, needs_swap);
1604cfb0
MS
1697 if (!checksum)
1698 mPUSHn(anv.nv);
1699 else
1700 cdouble += anv.nv;
1701 }
1702 break;
77a2054e 1703#if defined(HAS_LONG_DOUBLE)
1604cfb0
MS
1704 case 'D':
1705 while (len-- > 0) {
1706 ld_bytes aldouble;
aaec8192
NC
1707 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1708 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1709 /* The most common long double format, the x86 80-bit
1710 * extended precision, has either 2 or 6 unused bytes,
1711 * which may contain garbage, which may contain
1712 * unintentional data. While we do zero the bytes of
1713 * the long double data in pack(), here in unpack() we
1714 * don't, because it's really hard to envision that
1715 * reading the long double off aldouble would be
e075ae47 1716 * affected by the unused bytes.
42262fd3
JH
1717 *
1718 * Note that trying to unpack 'long doubles' of 'long
1719 * doubles' packed in another system is in the general
1720 * case doomed without having more detail. */
1604cfb0
MS
1721 if (!checksum)
1722 mPUSHn(aldouble.ld);
1723 else
1724 cdouble += aldouble.ld;
1725 }
1726 break;
92d41999 1727#endif
1604cfb0
MS
1728 case 'u':
1729 if (!checksum) {
f7fe979e 1730 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1604cfb0
MS
1731 sv = sv_2mortal(newSV(l));
1732 if (l) {
12cad9bd
TC
1733 SvPOK_on(sv);
1734 *SvEND(sv) = '\0';
1735 }
1604cfb0 1736 }
08ca2aa3 1737
bee81d36
KW
1738 /* Note that all legal uuencoded strings are ASCII printables, so
1739 * have the same representation under UTF-8 vs not. This means we
1740 * can ignore UTF8ness on legal input. For illegal we stop at the
1741 * first failure, and don't report where/what that is, so again we
1742 * can ignore UTF8ness */
1743
9233b56b
KW
1744 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1745 I32 a, b, c, d;
1746 char hunk[3];
1747
1748 len = PL_uudmap[*(U8*)s++] & 077;
1749 while (len > 0) {
1750 if (s < strend && ISUUCHAR(*s))
1751 a = PL_uudmap[*(U8*)s++] & 077;
1752 else
1753 a = 0;
1754 if (s < strend && ISUUCHAR(*s))
1755 b = PL_uudmap[*(U8*)s++] & 077;
1756 else
1757 b = 0;
1758 if (s < strend && ISUUCHAR(*s))
1759 c = PL_uudmap[*(U8*)s++] & 077;
1760 else
1761 c = 0;
1762 if (s < strend && ISUUCHAR(*s))
1763 d = PL_uudmap[*(U8*)s++] & 077;
1764 else
1765 d = 0;
1766 hunk[0] = (char)((a << 2) | (b >> 4));
1767 hunk[1] = (char)((b << 4) | (c >> 2));
1768 hunk[2] = (char)((c << 6) | d);
1769 if (!checksum)
1770 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1771 len -= 3;
1772 }
1773 if (*s == '\n')
1774 s++;
1775 else /* possible checksum byte */
1776 if (s + 1 < strend && s[1] == '\n')
1777 s += 2;
1778 }
1604cfb0
MS
1779 if (!checksum)
1780 XPUSHs(sv);
1781 break;
1782 } /* End of switch */
49704364 1783
1604cfb0
MS
1784 if (checksum) {
1785 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1786 (checksum > bits_in_uv &&
1787 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1788 NV trouble, anv;
a6ec74c1 1789
08ca2aa3 1790 anv = (NV) (1 << (checksum & 15));
1604cfb0
MS
1791 while (checksum >= 16) {
1792 checksum -= 16;
1793 anv *= 65536.0;
1794 }
1795 while (cdouble < 0.0)
1796 cdouble += anv;
1797 cdouble = Perl_modf(cdouble / anv, &trouble);
09b94b1f
S
1798#ifdef LONGDOUBLE_DOUBLEDOUBLE
1799 /* Workaround for powerpc doubledouble modfl bug:
1800 * close to 1.0L and -1.0L cdouble is 0, and trouble
1801 * is cdouble / anv. */
1802 if (trouble != Perl_ceil(trouble)) {
1803 cdouble = trouble;
1804 if (cdouble > 1.0L) cdouble -= 1.0L;
1805 if (cdouble < -1.0L) cdouble += 1.0L;
1806 }
1807#endif
1808 cdouble *= anv;
1604cfb0
MS
1809 sv = newSVnv(cdouble);
1810 }
1811 else {
1812 if (checksum < bits_in_uv) {
1813 UV mask = nBIT_MASK(checksum);
1814 cuv &= mask;
1815 }
1816 sv = newSVuv(cuv);
1817 }
1818 mXPUSHs(sv);
1819 checksum = 0;
1820 }
fc241834 1821
49704364
WL
1822 if (symptr->flags & FLAG_SLASH){
1823 if (SP - PL_stack_base - start_sp_offset <= 0)
1604cfb0 1824 break;
49704364
WL
1825 if( next_symbol(symptr) ){
1826 if( symptr->howlen == e_number )
1604cfb0 1827 Perl_croak(aTHX_ "Count after length/code in unpack" );
49704364 1828 if( beyond ){
1604cfb0
MS
1829 /* ...end of char buffer then no decent length available */
1830 Perl_croak(aTHX_ "length/code after end of string in unpack" );
49704364 1831 } else {
1604cfb0 1832 /* take top of stack (hope it's numeric) */
49704364
WL
1833 len = POPi;
1834 if( len < 0 )
1835 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1836 }
1837 } else {
1604cfb0 1838 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
49704364
WL
1839 }
1840 datumtype = symptr->code;
21c16052 1841 explicit_length = FALSE;
1604cfb0 1842 goto redo_switch;
49704364 1843 }
a6ec74c1 1844 }
49704364 1845
18529408 1846 if (new_s)
1604cfb0 1847 *new_s = s;
18529408
IZ
1848 PUTBACK;
1849 return SP - PL_stack_base - start_sp_offset;
1850}
1851
91db8a47 1852PP_wrapped(pp_unpack, 2, 0)
18529408
IZ
1853{
1854 dSP;
bab9c0ac 1855 dPOPPOPssrl;
1c23e2bd 1856 U8 gimme = GIMME_V;
18529408
IZ
1857 STRLEN llen;
1858 STRLEN rlen;
5c144d81
NC
1859 const char *pat = SvPV_const(left, llen);
1860 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1861 const char *strend = s + rlen;
1862 const char *patend = pat + llen;
e1b825c1 1863 SSize_t cnt;
18529408
IZ
1864
1865 PUTBACK;
7accc089 1866 cnt = unpackstring(pat, patend, s, strend,
1604cfb0
MS
1867 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1868 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1869
18529408
IZ
1870 SPAGAIN;
1871 if ( !cnt && gimme == G_SCALAR )
1872 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1873 RETURN;
1874}
1875
f337b084 1876STATIC U8 *
e1b825c1 1877doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1878{
f337b084 1879 *h++ = PL_uuemap[len];
a6ec74c1 1880 while (len > 2) {
1604cfb0
MS
1881 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1882 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1883 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1884 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1885 s += 3;
1886 len -= 3;
a6ec74c1
JH
1887 }
1888 if (len > 0) {
e68aed92 1889 const U8 r = (len > 1 ? s[1] : '\0');
1604cfb0
MS
1890 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1891 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1892 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1893 *h++ = PL_uuemap[0];
a6ec74c1 1894 }
f337b084
TH
1895 *h++ = '\n';
1896 return h;
a6ec74c1
JH
1897}
1898
1899STATIC SV *
f7fe979e 1900S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1901{
8b6e33c7
AL
1902 SV *result = newSVpvn(s, l);
1903 char *const result_c = SvPV_nolen(result); /* convenience */
1904 char *out = result_c;
1905 bool skip = 1;
1906 bool ignore = 0;
a6ec74c1 1907
7918f24d
NC
1908 PERL_ARGS_ASSERT_IS_AN_INT;
1909
a6ec74c1
JH
1910 while (*s) {
1911 switch (*s) {
1912 case ' ':
1913 break;
1914 case '+':
1915 if (!skip) {
1604cfb0
MS
1916 SvREFCNT_dec(result);
1917 return (NULL);
a6ec74c1
JH
1918 }
1919 break;
1920 case '0':
1921 case '1':
1922 case '2':
1923 case '3':
1924 case '4':
1925 case '5':
1926 case '6':
1927 case '7':
1928 case '8':
1929 case '9':
1930 skip = 0;
1931 if (!ignore) {
1604cfb0 1932 *(out++) = *s;
a6ec74c1
JH
1933 }
1934 break;
1935 case '.':
1936 ignore = 1;
1937 break;
1938 default:
1939 SvREFCNT_dec(result);
1940 return (NULL);
1941 }
1942 s++;
1943 }
1944 *(out++) = '\0';
1945 SvCUR_set(result, out - result_c);
1946 return (result);
1947}
1948
1949/* pnum must be '\0' terminated */
1950STATIC int
1951S_div128(pTHX_ SV *pnum, bool *done)
1952{
8b6e33c7
AL
1953 STRLEN len;
1954 char * const s = SvPV(pnum, len);
1955 char *t = s;
1956 int m = 0;
1957
7918f24d
NC
1958 PERL_ARGS_ASSERT_DIV128;
1959
8b6e33c7
AL
1960 *done = 1;
1961 while (*t) {
1604cfb0
MS
1962 const int i = m * 10 + (*t - '0');
1963 const int r = (i >> 7); /* r < 10 */
1964 m = i & 0x7F;
1965 if (r) {
1966 *done = 0;
1967 }
1968 *(t++) = '0' + r;
a6ec74c1 1969 }
8b6e33c7
AL
1970 *(t++) = '\0';
1971 SvCUR_set(pnum, (STRLEN) (t - s));
1972 return (m);
a6ec74c1
JH
1973}
1974
18529408 1975/*
7accc089
JH
1976=for apidoc packlist
1977
796b6530 1978The engine implementing C<pack()> Perl function.
7accc089 1979
bfce84ec
AL
1980=cut
1981*/
7accc089
JH
1982
1983void
5aaab254 1984Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1985{
aadb217d
JH
1986 tempsym_t sym;
1987
7918f24d
NC
1988 PERL_ARGS_ASSERT_PACKLIST;
1989
f7fe979e 1990 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1991
f337b084
TH
1992 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1993 Also make sure any UTF8 flag is loaded */
56eb0262 1994 SvPV_force_nolen(cat);
bfce84ec 1995 if (DO_UTF8(cat))
1604cfb0 1996 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1997
49704364
WL
1998 (void)pack_rec( cat, &sym, beglist, endlist );
1999}
2000
f337b084
TH
2001/* like sv_utf8_upgrade, but also repoint the group start markers */
2002STATIC void
2003marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2004 STRLEN len;
2005 tempsym_t *group;
f7fe979e
AL
2006 const char *from_ptr, *from_start, *from_end, **marks, **m;
2007 char *to_start, *to_ptr;
f337b084
TH
2008
2009 if (SvUTF8(sv)) return;
2010
aa07b2f6 2011 from_start = SvPVX_const(sv);
f337b084
TH
2012 from_end = from_start + SvCUR(sv);
2013 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1604cfb0 2014 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084 2015 if (from_ptr == from_end) {
1604cfb0
MS
2016 /* Simple case: no character needs to be changed */
2017 SvUTF8_on(sv);
2018 return;
f337b084
TH
2019 }
2020
3473cf63 2021 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2022 Newx(to_start, len, char);
f337b084
TH
2023 Copy(from_start, to_start, from_ptr-from_start, char);
2024 to_ptr = to_start + (from_ptr-from_start);
2025
a02a5408 2026 Newx(marks, sym_ptr->level+2, const char *);
f337b084 2027 for (group=sym_ptr; group; group = group->previous)
1604cfb0 2028 marks[group->level] = from_start + group->strbeg;
f337b084
TH
2029 marks[sym_ptr->level+1] = from_end+1;
2030 for (m = marks; *m < from_ptr; m++)
1604cfb0 2031 *m = to_start + (*m-from_start);
f337b084
TH
2032
2033 for (;from_ptr < from_end; from_ptr++) {
1604cfb0
MS
2034 while (*m == from_ptr) *m++ = to_ptr;
2035 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2036 }
2037 *to_ptr = 0;
2038
2039 while (*m == from_ptr) *m++ = to_ptr;
2040 if (m != marks + sym_ptr->level+1) {
1604cfb0
MS
2041 Safefree(marks);
2042 Safefree(to_start);
2043 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2044 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2045 }
2046 for (group=sym_ptr; group; group = group->previous)
1604cfb0 2047 group->strbeg = marks[group->level] - to_start;
f337b084
TH
2048 Safefree(marks);
2049
2050 if (SvOOK(sv)) {
1604cfb0
MS
2051 if (SvIVX(sv)) {
2052 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2053 from_start -= SvIVX(sv);
2054 SvIV_set(sv, 0);
2055 }
2056 SvFLAGS(sv) &= ~SVf_OOK;
f337b084
TH
2057 }
2058 if (SvLEN(sv) != 0)
1604cfb0 2059 Safefree(from_start);
f880fe2f 2060 SvPV_set(sv, to_start);
b162af07
SP
2061 SvCUR_set(sv, to_ptr - to_start);
2062 SvLEN_set(sv, len);
f337b084
TH
2063 SvUTF8_on(sv);
2064}
2065
2066/* Exponential string grower. Makes string extension effectively O(n)
2067 needed says how many extra bytes we need (not counting the final '\0')
2068 Only grows the string if there is an actual lack of space
2069*/
2070STATIC char *
0bd48802 2071S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2072 const STRLEN cur = SvCUR(sv);
2073 const STRLEN len = SvLEN(sv);
f337b084 2074 STRLEN extend;
7918f24d
NC
2075
2076 PERL_ARGS_ASSERT_SV_EXP_GROW;
2077
f337b084
TH
2078 if (len - cur > needed) return SvPVX(sv);
2079 extend = needed > len ? needed : len;
2080 return SvGROW(sv, len+extend+1);
2081}
49704364 2082
93f6e112 2083static SV *
b197e565 2084S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2085{
2086 SvGETMAGIC(sv);
93f6e112 2087 if (UNLIKELY(SvAMAGIC(sv)))
1604cfb0 2088 sv = sv_2num(sv);
99f450cc 2089 if (UNLIKELY(isinfnansv(sv))) {
1604cfb0
MS
2090 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2091 const NV nv = SvNV_nomg(sv);
2092 if (c == 'w')
2093 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2094 else
2095 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2096 }
93f6e112 2097 return sv;
354b74ae
FC
2098}
2099
93f6e112 2100#define SvIV_no_inf(sv,d) \
1604cfb0 2101 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
93f6e112 2102#define SvUV_no_inf(sv,d) \
1604cfb0 2103 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2104
49704364
WL
2105STATIC
2106SV **
f337b084 2107S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2108{
49704364 2109 tempsym_t lookahead;
e1b825c1 2110 SSize_t items = endlist - beglist;
f337b084
TH
2111 bool found = next_symbol(symptr);
2112 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2113 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2114 char* from;
f337b084 2115
7918f24d
NC
2116 PERL_ARGS_ASSERT_PACK_REC;
2117
f337b084 2118 if (symptr->level == 0 && found && symptr->code == 'U') {
1604cfb0
MS
2119 marked_upgrade(aTHX_ cat, symptr);
2120 symptr->flags |= FLAG_DO_UTF8;
2121 utf8 = 0;
49704364 2122 }
f337b084 2123 symptr->strbeg = SvCUR(cat);
49704364
WL
2124
2125 while (found) {
1604cfb0
MS
2126 SV *fromstr;
2127 STRLEN fromlen;
2128 SSize_t len;
2129 SV *lengthcode = NULL;
49704364 2130 I32 datumtype = symptr->code;
f337b084 2131 howlen_t howlen = symptr->howlen;
1604cfb0
MS
2132 char *start = SvPVX(cat);
2133 char *cur = start + SvCUR(cat);
a1219b5e 2134 bool needs_swap;
49704364 2135
fc1bb3f2 2136#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2137#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2138
2139 switch (howlen) {
1604cfb0
MS
2140 case e_star:
2141 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2142 0 : items;
2143 break;
2144 default:
2145 /* e_no_len and e_number */
2146 len = symptr->length;
2147 break;
49704364
WL
2148 }
2149
1604cfb0
MS
2150 if (len) {
2151 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2152
1604cfb0
MS
2153 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2154 /* We can process this letter. */
2155 STRLEN size = props & PACK_SIZE_MASK;
2156 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2157 }
f337b084
TH
2158 }
2159
49704364
WL
2160 /* Look ahead for next symbol. Do we have code/code? */
2161 lookahead = *symptr;
2162 found = next_symbol(&lookahead);
1604cfb0
MS
2163 if (symptr->flags & FLAG_SLASH) {
2164 IV count;
2165 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2166 if (memCHRs("aAZ", lookahead.code)) {
2167 if (lookahead.howlen == e_number) count = lookahead.length;
2168 else {
2169 if (items > 0) {
2170 count = sv_len_utf8(*beglist);
2171 }
2172 else count = 0;
2173 if (lookahead.code == 'Z') count++;
2174 }
2175 } else {
2176 if (lookahead.howlen == e_number && lookahead.length < items)
2177 count = lookahead.length;
2178 else count = items;
2179 }
2180 lookahead.howlen = e_number;
2181 lookahead.length = count;
2182 lengthcode = sv_2mortal(newSViv(count));
2183 }
49704364 2184
a1219b5e
NC
2185 needs_swap = NEEDS_SWAP(datumtype);
2186
1604cfb0
MS
2187 /* Code inside the switch must take care to properly update
2188 cat (CUR length and '\0' termination) if it updated *cur and
2189 doesn't simply leave using break */
2190 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2191 default:
d3cdb721 2192 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0
MS
2193 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2194 (int) TYPE_NO_MODIFIERS(datumtype));
2195 case '%':
2196 Perl_croak(aTHX_ "'%%' may not be used in pack");
2197
2198 case '.' | TYPE_IS_SHRIEKING:
2199 case '.':
2200 if (howlen == e_star) from = start;
2201 else if (len == 0) from = cur;
2202 else {
2203 tempsym_t *group = symptr;
2204
2205 while (--len && group) group = group->previous;
2206 from = group ? start + group->strbeg : start;
2207 }
2208 fromstr = NEXTFROM;
2209 len = SvIV_no_inf(fromstr, datumtype);
2210 goto resize;
2211 case '@' | TYPE_IS_SHRIEKING:
2212 case '@':
2213 from = start + symptr->strbeg;
2214 resize:
2215 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2216 if (len >= 0) {
2217 while (len && from < cur) {
2218 from += UTF8SKIP(from);
2219 len--;
2220 }
2221 if (from > cur)
2222 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2223 if (len) {
2224 /* Here we know from == cur */
2225 grow:
2226 GROWING(0, cat, start, cur, len);
2227 Zero(cur, len, char);
2228 cur += len;
2229 } else if (from < cur) {
2230 len = cur - from;
2231 goto shrink;
2232 } else goto no_change;
2233 } else {
2234 cur = from;
2235 len = -len;
2236 goto utf8_shrink;
2237 }
2238 else {
2239 len -= cur - from;
2240 if (len > 0) goto grow;
2241 if (len == 0) goto no_change;
2242 len = -len;
2243 goto shrink;
2244 }
2245 break;
2246
2247 case '(': {
49704364 2248 tempsym_t savsym = *symptr;
1604cfb0
MS
2249 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2250 symptr->flags |= group_modifiers;
49704364
WL
2251 symptr->patend = savsym.grpend;
2252 symptr->level++;
c0668a30 2253 /* cppcheck-suppress autoVariables */
1604cfb0
MS
2254 symptr->previous = &lookahead;
2255 while (len--) {
2256 U32 was_utf8;
2257 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2258 else symptr->flags &= ~FLAG_PARSE_UTF8;
2259 was_utf8 = SvUTF8(cat);
2260 symptr->patptr = savsym.grpbeg;
2261 beglist = pack_rec(cat, symptr, beglist, endlist);
2262 if (SvUTF8(cat) != was_utf8)
2263 /* This had better be an upgrade while in utf8==0 mode */
2264 utf8 = 1;
2265
2266 if (savsym.howlen == e_star && beglist == endlist)
2267 break; /* No way to continue */
2268 }
2269 items = endlist - beglist;
2270 lookahead.flags = symptr->flags & ~group_modifiers;
2271 goto no_change;
2272 }
2273 case 'X' | TYPE_IS_SHRIEKING:
2274 if (!len) /* Avoid division by 0 */
2275 len = 1;
2276 if (utf8) {
2277 char *hop, *last;
2278 SSize_t l = len;
2279 hop = last = start;
2280 while (hop < cur) {
2281 hop += UTF8SKIP(hop);
2282 if (--l == 0) {
2283 last = hop;
2284 l = len;
2285 }
2286 }
2287 if (last > cur)
2288 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2289 cur = last;
2290 break;
2291 }
2292 len = (cur-start) % len;
2293 /* FALLTHROUGH */
2294 case 'X':
2295 if (utf8) {
2296 if (len < 1) goto no_change;
2297 utf8_shrink:
2298 while (len > 0) {
2299 if (cur <= start)
2300 Perl_croak(aTHX_ "'%c' outside of string in pack",
2301 (int) TYPE_NO_MODIFIERS(datumtype));
2302 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2303 if (cur <= start)
2304 Perl_croak(aTHX_ "'%c' outside of string in pack",
2305 (int) TYPE_NO_MODIFIERS(datumtype));
2306 }
2307 len--;
2308 }
2309 } else {
2310 shrink:
2311 if (cur - start < len)
2312 Perl_croak(aTHX_ "'%c' outside of string in pack",
2313 (int) TYPE_NO_MODIFIERS(datumtype));
2314 cur -= len;
2315 }
2316 if (cur < start+symptr->strbeg) {
2317 /* Make sure group starts don't point into the void */
2318 tempsym_t *group;
2319 const STRLEN length = cur-start;
2320 for (group = symptr;
2321 group && length < group->strbeg;
2322 group = group->previous) group->strbeg = length;
2323 lookahead.strbeg = length;
2324 }
2325 break;
2326 case 'x' | TYPE_IS_SHRIEKING: {
2327 SSize_t ai32;
2328 if (!len) /* Avoid division by 0 */
2329 len = 1;
2330 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2331 else ai32 = (cur - start) % len;
2332 if (ai32 == 0) goto no_change;
2333 len -= ai32;
2334 }
2335 /* FALLTHROUGH */
2336 case 'x':
2337 goto grow;
2338 case 'A':
2339 case 'Z':
2340 case 'a': {
2341 const char *aptr;
2342
2343 fromstr = NEXTFROM;
2344 aptr = SvPV_const(fromstr, fromlen);
2345 if (DO_UTF8(fromstr)) {
f7fe979e 2346 const char *end, *s;
f337b084 2347
1604cfb0
MS
2348 if (!utf8 && !SvUTF8(cat)) {
2349 marked_upgrade(aTHX_ cat, symptr);
2350 lookahead.flags |= FLAG_DO_UTF8;
2351 lookahead.strbeg = symptr->strbeg;
2352 utf8 = 1;
2353 start = SvPVX(cat);
2354 cur = start + SvCUR(cat);
2355 }
2356 if (howlen == e_star) {
2357 if (utf8) goto string_copy;
2358 len = fromlen+1;
2359 }
2360 s = aptr;
2361 end = aptr + fromlen;
2362 fromlen = datumtype == 'Z' ? len-1 : len;
2363 while ((SSize_t) fromlen > 0 && s < end) {
2364 s += UTF8SKIP(s);
2365 fromlen--;
2366 }
2367 if (s > end)
2368 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2369 if (utf8) {
2370 len = fromlen;
2371 if (datumtype == 'Z') len++;
2372 fromlen = s-aptr;
2373 len += fromlen;
2374
2375 goto string_copy;
2376 }
2377 fromlen = len - fromlen;
2378 if (datumtype == 'Z') fromlen--;
2379 if (howlen == e_star) {
2380 len = fromlen;
2381 if (datumtype == 'Z') len++;
2382 }
2383 GROWING(0, cat, start, cur, len);
2384 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2385 datumtype | TYPE_IS_PACK))
2386 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2387 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2388 (int)datumtype, aptr, end, cur, fromlen);
2389 cur += fromlen;
2390 len -= fromlen;
2391 } else if (utf8) {
2392 if (howlen == e_star) {
2393 len = fromlen;
2394 if (datumtype == 'Z') len++;
2395 }
2396 if (len <= (SSize_t) fromlen) {
2397 fromlen = len;
2398 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2399 }
2400 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2401 upgrade, so:
2402 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2403 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2404 len -= fromlen;
2405 while (fromlen > 0) {
2406 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2407 aptr++;
2408 fromlen--;
2409 }
2410 } else {
2411 string_copy:
2412 if (howlen == e_star) {
2413 len = fromlen;
2414 if (datumtype == 'Z') len++;
2415 }
2416 if (len <= (SSize_t) fromlen) {
2417 fromlen = len;
2418 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2419 }
2420 GROWING(0, cat, start, cur, len);
2421 Copy(aptr, cur, fromlen, char);
2422 cur += fromlen;
2423 len -= fromlen;
2424 }
2425 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2426 cur += len;
2427 SvTAINT(cat);
2428 break;
2429 }
2430 case 'B':
2431 case 'b': {
2432 const char *str, *end;
2433 SSize_t l, field_len;
2434 U8 bits;
2435 bool utf8_source;
2436 U32 utf8_flags;
2437
2438 fromstr = NEXTFROM;
2439 str = SvPV_const(fromstr, fromlen);
2440 end = str + fromlen;
2441 if (DO_UTF8(fromstr)) {
2442 utf8_source = TRUE;
2443 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2444 } else {
2445 utf8_source = FALSE;
2446 utf8_flags = 0; /* Unused, but keep compilers happy */
2447 }
2448 if (howlen == e_star) len = fromlen;
2449 field_len = (len+7)/8;
2450 GROWING(utf8, cat, start, cur, field_len);
2451 if (len > (SSize_t)fromlen) len = fromlen;
2452 bits = 0;
2453 l = 0;
2454 if (datumtype == 'B')
2455 while (l++ < len) {
2456 if (utf8_source) {
2457 UV val = 0;
2458 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2459 bits |= val & 1;
2460 } else bits |= *str++ & 1;
2461 if (l & 7) bits <<= 1;
2462 else {
2463 PUSH_BYTE(utf8, cur, bits);
2464 bits = 0;
2465 }
2466 }
2467 else
2468 /* datumtype == 'b' */
2469 while (l++ < len) {
2470 if (utf8_source) {
2471 UV val = 0;
2472 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2473 if (val & 1) bits |= 0x80;
2474 } else if (*str++ & 1)
2475 bits |= 0x80;
2476 if (l & 7) bits >>= 1;
2477 else {
2478 PUSH_BYTE(utf8, cur, bits);
2479 bits = 0;
2480 }
2481 }
2482 l--;
2483 if (l & 7) {
2484 if (datumtype == 'B')
2485 bits <<= 7 - (l & 7);
2486 else
2487 bits >>= 7 - (l & 7);
2488 PUSH_BYTE(utf8, cur, bits);
2489 l += 7;
2490 }
2491 /* Determine how many chars are left in the requested field */
2492 l /= 8;
2493 if (howlen == e_star) field_len = 0;
2494 else field_len -= l;
2495 Zero(cur, field_len, char);
2496 cur += field_len;
2497 break;
2498 }
2499 case 'H':
2500 case 'h': {
2501 const char *str, *end;
2502 SSize_t l, field_len;
2503 U8 bits;
2504 bool utf8_source;
2505 U32 utf8_flags;
2506
2507 fromstr = NEXTFROM;
2508 str = SvPV_const(fromstr, fromlen);
2509 end = str + fromlen;
2510 if (DO_UTF8(fromstr)) {
2511 utf8_source = TRUE;
2512 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2513 } else {
2514 utf8_source = FALSE;
2515 utf8_flags = 0; /* Unused, but keep compilers happy */
2516 }
2517 if (howlen == e_star) len = fromlen;
2518 field_len = (len+1)/2;
2519 GROWING(utf8, cat, start, cur, field_len);
2520 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2521 bits = 0;
2522 l = 0;
2523 if (datumtype == 'H')
2524 while (l++ < len) {
2525 if (utf8_source) {
2526 UV val = 0;
2527 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2528 if (val < 256 && isALPHA(val))
2529 bits |= (val + 9) & 0xf;
2530 else
2531 bits |= val & 0xf;
2532 } else if (isALPHA(*str))
2533 bits |= (*str++ + 9) & 0xf;
2534 else
2535 bits |= *str++ & 0xf;
2536 if (l & 1) bits <<= 4;
2537 else {
2538 PUSH_BYTE(utf8, cur, bits);
2539 bits = 0;
2540 }
2541 }
2542 else
2543 while (l++ < len) {
2544 if (utf8_source) {
2545 UV val = 0;
2546 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2547 if (val < 256 && isALPHA(val))
2548 bits |= ((val + 9) & 0xf) << 4;
2549 else
2550 bits |= (val & 0xf) << 4;
2551 } else if (isALPHA(*str))
2552 bits |= ((*str++ + 9) & 0xf) << 4;
2553 else
2554 bits |= (*str++ & 0xf) << 4;
2555 if (l & 1) bits >>= 4;
2556 else {
2557 PUSH_BYTE(utf8, cur, bits);
2558 bits = 0;
2559 }
2560 }
2561 l--;
2562 if (l & 1) {
2563 PUSH_BYTE(utf8, cur, bits);
2564 l++;
2565 }
2566 /* Determine how many chars are left in the requested field */
2567 l /= 2;
2568 if (howlen == e_star) field_len = 0;
2569 else field_len -= l;
2570 Zero(cur, field_len, char);
2571 cur += field_len;
2572 break;
2573 }
2574 case 'c':
2575 while (len-- > 0) {
2576 IV aiv;
2577 fromstr = NEXTFROM;
354b74ae 2578 aiv = SvIV_no_inf(fromstr, datumtype);
1604cfb0
MS
2579 if ((-128 > aiv || aiv > 127))
2580 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2581 "Character in 'c' format wrapped in pack");
a5d875a7 2582 PUSH_BYTE(utf8, cur, (U8)aiv);
1604cfb0
MS
2583 }
2584 break;
2585 case 'C':
2586 if (len == 0) {
2587 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2588 break;
2589 }
2590 while (len-- > 0) {
2591 IV aiv;
2592 fromstr = NEXTFROM;
354b74ae 2593 aiv = SvIV_no_inf(fromstr, datumtype);
1604cfb0
MS
2594 if ((0 > aiv || aiv > 0xff))
2595 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2596 "Character in 'C' format wrapped in pack");
a5d875a7 2597 PUSH_BYTE(utf8, cur, (U8)aiv);
1604cfb0
MS
2598 }
2599 break;
2600 case 'W': {
2601 char *end;
2602 U8 in_bytes = (U8)IN_BYTES;
2603
2604 end = start+SvLEN(cat)-1;
2605 if (utf8) end -= UTF8_MAXLEN-1;
2606 while (len-- > 0) {
2607 UV auv;
2608 fromstr = NEXTFROM;
2609 auv = SvUV_no_inf(fromstr, datumtype);
2610 if (in_bytes) auv = auv % 0x100;
2611 if (utf8) {
2612 W_utf8:
2613 if (cur >= end) {
2614 *cur = '\0';
2615 SvCUR_set(cat, cur - start);
2616
2617 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2618 end = start+SvLEN(cat)-UTF8_MAXLEN;
2619 }
2620 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2621 } else {
2622 if (auv >= 0x100) {
2623 if (!SvUTF8(cat)) {
2624 *cur = '\0';
2625 SvCUR_set(cat, cur - start);
2626 marked_upgrade(aTHX_ cat, symptr);
2627 lookahead.flags |= FLAG_DO_UTF8;
2628 lookahead.strbeg = symptr->strbeg;
2629 utf8 = 1;
2630 start = SvPVX(cat);
2631 cur = start + SvCUR(cat);
2632 end = start+SvLEN(cat)-UTF8_MAXLEN;
2633 goto W_utf8;
2634 }
2635 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2636 "Character in 'W' format wrapped in pack");
a5d875a7 2637 auv = (U8) auv;
1604cfb0
MS
2638 }
2639 if (cur >= end) {
2640 *cur = '\0';
2641 SvCUR_set(cat, cur - start);
2642 GROWING(0, cat, start, cur, len+1);
2643 end = start+SvLEN(cat)-1;
2644 }
2645 *(U8 *) cur++ = (U8)auv;
2646 }
2647 }
2648 break;
2649 }
2650 case 'U': {
2651 char *end;
2652
2653 if (len == 0) {
2654 if (!(symptr->flags & FLAG_DO_UTF8)) {
2655 marked_upgrade(aTHX_ cat, symptr);
2656 lookahead.flags |= FLAG_DO_UTF8;
2657 lookahead.strbeg = symptr->strbeg;
2658 }
2659 utf8 = 0;
2660 goto no_change;
2661 }
2662
2663 end = start+SvLEN(cat);
2664 if (!utf8) end -= UTF8_MAXLEN;
2665 while (len-- > 0) {
2666 UV auv;
2667 fromstr = NEXTFROM;
2668 auv = SvUV_no_inf(fromstr, datumtype);
2669 if (utf8) {
2670 U8 buffer[UTF8_MAXLEN+1], *endb;
3c2b2fcb 2671 endb = uvchr_to_utf8_flags(buffer, auv, 0);
1604cfb0
MS
2672 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2673 *cur = '\0';
2674 SvCUR_set(cat, cur - start);
2675 GROWING(0, cat, start, cur,
2676 len+(endb-buffer)*UTF8_EXPAND);
2677 end = start+SvLEN(cat);
2678 }
d21d7215 2679 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
1604cfb0
MS
2680 } else {
2681 if (cur >= end) {
2682 *cur = '\0';
2683 SvCUR_set(cat, cur - start);
2684 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2685 end = start+SvLEN(cat)-UTF8_MAXLEN;
2686 }
3c2b2fcb 2687 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
1604cfb0
MS
2688 }
2689 }
2690 break;
2691 }
2692 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2693 case 'f':
2694 while (len-- > 0) {
2695 float afloat;
2696 NV anv;
2697 fromstr = NEXTFROM;
2698 anv = SvNV(fromstr);
a7157111 2699# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
1604cfb0
MS
2700 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2701 * on Alpha; fake it if we don't have them.
2702 */
2703 if (anv > FLT_MAX)
2704 afloat = FLT_MAX;
2705 else if (anv < -FLT_MAX)
2706 afloat = -FLT_MAX;
2707 else afloat = (float)anv;
baf3cf9c 2708# else
a7157111 2709# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1604cfb0
MS
2710 if(Perl_isnan(anv))
2711 afloat = (float)NV_NAN;
2712 else
a7157111
JH
2713# endif
2714# ifdef NV_INF
919894b7
DM
2715 /* a simple cast to float is undefined if outside
2716 * the range of values that can be represented */
1604cfb0 2717 afloat = (float)(anv > FLT_MAX ? NV_INF :
919894b7 2718 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2719# endif
baf3cf9c 2720# endif
3a88beaa 2721 PUSH_VAR(utf8, cur, afloat, needs_swap);
1604cfb0
MS
2722 }
2723 break;
2724 case 'd':
2725 while (len-- > 0) {
2726 double adouble;
2727 NV anv;
2728 fromstr = NEXTFROM;
2729 anv = SvNV(fromstr);
a7157111 2730# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
1604cfb0
MS
2731 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2732 * on Alpha; fake it if we don't have them.
2733 */
2734 if (anv > DBL_MAX)
2735 adouble = DBL_MAX;
2736 else if (anv < -DBL_MAX)
2737 adouble = -DBL_MAX;
2738 else adouble = (double)anv;
baf3cf9c 2739# else
1604cfb0 2740 adouble = (double)anv;
baf3cf9c 2741# endif
3a88beaa 2742 PUSH_VAR(utf8, cur, adouble, needs_swap);
1604cfb0
MS
2743 }
2744 break;
2745 case 'F': {
2746 NV_bytes anv;
2747 Zero(&anv, 1, NV); /* can be long double with unused bits */
2748 while (len-- > 0) {
2749 fromstr = NEXTFROM;
cd07c537 2750#ifdef __GNUC__
1604cfb0
MS
2751 /* to work round a gcc/x86 bug; don't use SvNV */
2752 anv.nv = sv_2nv(fromstr);
070e2677
TC
2753# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2754 && LONG_DOUBLESIZE > 10
2755 /* GCC sometimes overwrites the padding in the
2756 assignment above */
2757 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2758# endif
cd07c537 2759#else
1604cfb0 2760 anv.nv = SvNV(fromstr);
cd07c537 2761#endif
3a88beaa 2762 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
1604cfb0
MS
2763 }
2764 break;
2765 }
77a2054e 2766#if defined(HAS_LONG_DOUBLE)
1604cfb0
MS
2767 case 'D': {
2768 ld_bytes aldouble;
2769 /* long doubles can have unused bits, which may be nonzero */
2770 Zero(&aldouble, 1, long double);
2771 while (len-- > 0) {
2772 fromstr = NEXTFROM;
cd07c537 2773# ifdef __GNUC__
1604cfb0
MS
2774 /* to work round a gcc/x86 bug; don't use SvNV */
2775 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2776# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2777 /* GCC sometimes overwrites the padding in the
2778 assignment above */
2779 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2780# endif
cd07c537 2781# else
1604cfb0 2782 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2783# endif
3a88beaa
NC
2784 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2785 needs_swap);
1604cfb0
MS
2786 }
2787 break;
2788 }
92d41999 2789#endif
1604cfb0
MS
2790 case 'n' | TYPE_IS_SHRIEKING:
2791 case 'n':
2792 while (len-- > 0) {
2793 I16 ai16;
2794 fromstr = NEXTFROM;
2795 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2796 ai16 = PerlSock_htons(ai16);
3a88beaa 2797 PUSH16(utf8, cur, &ai16, FALSE);
1604cfb0
MS
2798 }
2799 break;
2800 case 'v' | TYPE_IS_SHRIEKING:
2801 case 'v':
2802 while (len-- > 0) {
2803 I16 ai16;
2804 fromstr = NEXTFROM;
2805 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2806 ai16 = htovs(ai16);
3a88beaa 2807 PUSH16(utf8, cur, &ai16, FALSE);
1604cfb0
MS
2808 }
2809 break;
49704364 2810 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2811#if SHORTSIZE != SIZE16
1604cfb0
MS
2812 while (len-- > 0) {
2813 unsigned short aushort;
2814 fromstr = NEXTFROM;
2815 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2816 PUSH_VAR(utf8, cur, aushort, needs_swap);
1604cfb0 2817 }
49704364
WL
2818 break;
2819#else
924ba076 2820 /* FALLTHROUGH */
a6ec74c1 2821#endif
1604cfb0
MS
2822 case 'S':
2823 while (len-- > 0) {
2824 U16 au16;
2825 fromstr = NEXTFROM;
2826 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2827 PUSH16(utf8, cur, &au16, needs_swap);
1604cfb0
MS
2828 }
2829 break;
2830 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2831#if SHORTSIZE != SIZE16
1604cfb0
MS
2832 while (len-- > 0) {
2833 short ashort;
2834 fromstr = NEXTFROM;
2835 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2836 PUSH_VAR(utf8, cur, ashort, needs_swap);
1604cfb0 2837 }
49704364
WL
2838 break;
2839#else
924ba076 2840 /* FALLTHROUGH */
a6ec74c1 2841#endif
1604cfb0
MS
2842 case 's':
2843 while (len-- > 0) {
2844 I16 ai16;
2845 fromstr = NEXTFROM;
2846 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2847 PUSH16(utf8, cur, &ai16, needs_swap);
1604cfb0
MS
2848 }
2849 break;
2850 case 'I':
2851 case 'I' | TYPE_IS_SHRIEKING:
2852 while (len-- > 0) {
2853 unsigned int auint;
2854 fromstr = NEXTFROM;
2855 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2856 PUSH_VAR(utf8, cur, auint, needs_swap);
1604cfb0
MS
2857 }
2858 break;
2859 case 'j':
2860 while (len-- > 0) {
2861 IV aiv;
2862 fromstr = NEXTFROM;
2863 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2864 PUSH_VAR(utf8, cur, aiv, needs_swap);
1604cfb0
MS
2865 }
2866 break;
2867 case 'J':
2868 while (len-- > 0) {
2869 UV auv;
2870 fromstr = NEXTFROM;
2871 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2872 PUSH_VAR(utf8, cur, auv, needs_swap);
1604cfb0
MS
2873 }
2874 break;
2875 case 'w':
a6ec74c1 2876 while (len-- > 0) {
1604cfb0
MS
2877 NV anv;
2878 fromstr = NEXTFROM;
2879 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2880 anv = SvNV_nomg(fromstr);
2881
2882 if (anv < 0) {
2883 *cur = '\0';
2884 SvCUR_set(cat, cur - start);
2885 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2886 }
a6ec74c1 2887
196b62db
NC
2888 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2889 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2890 any negative IVs will have already been got by the croak()
2891 above. IOK is untrue for fractions, so we test them
2892 against UV_MAX_P1. */
1604cfb0
MS
2893 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2894 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2895 char *in = buf + sizeof(buf);
2896 UV auv = SvUV_nomg(fromstr);
2897
2898 do {
2899 *--in = (char)((auv & 0x7f) | 0x80);
2900 auv >>= 7;
2901 } while (auv);
2902 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2903 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2904 in, (buf + sizeof(buf)) - in);
2905 } else if (SvPOKp(fromstr))
2906 goto w_string;
2907 else if (SvNOKp(fromstr)) {
2908 /* 10**NV_MAX_10_EXP is the largest power of 10
2909 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2910 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2911 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2912 And with that many bytes only Inf can overflow.
2913 Some C compilers are strict about integral constant
2914 expressions so we conservatively divide by a slightly
2915 smaller integer instead of multiplying by the exact
2916 floating-point value.
2917 */
0258719b 2918#ifdef NV_MAX_10_EXP
1604cfb0
MS
2919 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2920 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2921#else
1604cfb0
MS
2922 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2923 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2924#endif
1604cfb0
MS
2925 char *in = buf + sizeof(buf);
2926
2927 anv = Perl_floor(anv);
2928 do {
2929 const NV next = Perl_floor(anv / 128);
2930 if (in <= buf) /* this cannot happen ;-) */
2931 Perl_croak(aTHX_ "Cannot compress integer in pack");
2932 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2933 anv = next;
2934 } while (anv > 0);
2935 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2936 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2937 in, (buf + sizeof(buf)) - in);
2938 } else {
2939 const char *from;
2940 char *result, *in;
2941 SV *norm;
2942 STRLEN len;
2943 bool done;
2944
2945 w_string:
2946 /* Copy string and check for compliance */
2947 from = SvPV_nomg_const(fromstr, len);
2948 if ((norm = is_an_int(from, len)) == NULL)
2949 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2950
2951 Newx(result, len, char);
2952 in = result + len;
2953 done = FALSE;
2954 while (!done) *--in = div128(norm, &done) | 0x80;
2955 result[len - 1] &= 0x7F; /* clear continue bit */
2956 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2957 in, (result + len) - in);
2958 Safefree(result);
2959 SvREFCNT_dec(norm); /* free norm */
2960 }
2961 }
2962 break;
2963 case 'i':
2964 case 'i' | TYPE_IS_SHRIEKING:
2965 while (len-- > 0) {
2966 int aint;
2967 fromstr = NEXTFROM;
2968 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2969 PUSH_VAR(utf8, cur, aint, needs_swap);
1604cfb0
MS
2970 }
2971 break;
2972 case 'N' | TYPE_IS_SHRIEKING:
2973 case 'N':
2974 while (len-- > 0) {
2975 U32 au32;
2976 fromstr = NEXTFROM;
2977 au32 = SvUV_no_inf(fromstr, datumtype);
2978 au32 = PerlSock_htonl(au32);
3a88beaa 2979 PUSH32(utf8, cur, &au32, FALSE);
1604cfb0
MS
2980 }
2981 break;
2982 case 'V' | TYPE_IS_SHRIEKING:
2983 case 'V':
2984 while (len-- > 0) {
2985 U32 au32;
2986 fromstr = NEXTFROM;
2987 au32 = SvUV_no_inf(fromstr, datumtype);
2988 au32 = htovl(au32);
3a88beaa 2989 PUSH32(utf8, cur, &au32, FALSE);
1604cfb0
MS
2990 }
2991 break;
2992 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2993#if LONGSIZE != SIZE32
1604cfb0
MS
2994 while (len-- > 0) {
2995 unsigned long aulong;
2996 fromstr = NEXTFROM;
2997 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2998 PUSH_VAR(utf8, cur, aulong, needs_swap);
1604cfb0
MS
2999 }
3000 break;
49704364
WL
3001#else
3002 /* Fall though! */
a6ec74c1 3003#endif
1604cfb0
MS
3004 case 'L':
3005 while (len-- > 0) {
3006 U32 au32;
3007 fromstr = NEXTFROM;
3008 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3009 PUSH32(utf8, cur, &au32, needs_swap);
1604cfb0
MS
3010 }
3011 break;
3012 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3013#if LONGSIZE != SIZE32
1604cfb0
MS
3014 while (len-- > 0) {
3015 long along;
3016 fromstr = NEXTFROM;
3017 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3018 PUSH_VAR(utf8, cur, along, needs_swap);
1604cfb0
MS
3019 }
3020 break;
49704364
WL
3021#else
3022 /* Fall though! */
a6ec74c1 3023#endif
1604cfb0 3024 case 'l':
49704364 3025 while (len-- > 0) {
1604cfb0
MS
3026 I32 ai32;
3027 fromstr = NEXTFROM;
3028 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3029 PUSH32(utf8, cur, &ai32, needs_swap);
1604cfb0
MS
3030 }
3031 break;
c174bf3b 3032#if defined(HAS_QUAD) && IVSIZE >= 8
1604cfb0
MS
3033 case 'Q':
3034 while (len-- > 0) {
3035 Uquad_t auquad;
3036 fromstr = NEXTFROM;
3037 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3038 PUSH_VAR(utf8, cur, auquad, needs_swap);
1604cfb0
MS
3039 }
3040 break;
3041 case 'q':
3042 while (len-- > 0) {
3043 Quad_t aquad;
3044 fromstr = NEXTFROM;
3045 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3046 PUSH_VAR(utf8, cur, aquad, needs_swap);
1604cfb0
MS
3047 }
3048 break;
1640b983 3049#endif
1604cfb0
MS
3050 case 'P':
3051 len = 1; /* assume SV is correct length */
3052 GROWING(utf8, cat, start, cur, sizeof(char *));
3053 /* FALLTHROUGH */
3054 case 'p':
3055 while (len-- > 0) {
3056 const char *aptr;
3057
3058 fromstr = NEXTFROM;
3059 SvGETMAGIC(fromstr);
3060 if (!SvOK(fromstr)) aptr = NULL;
3061 else {
3062 /* XXX better yet, could spirit away the string to
3063 * a safe spot and hang on to it until the result
3064 * of pack() (and all copies of the result) are
3065 * gone.
3066 */
ba1af285 3067 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) <=
9a500794 3068#ifdef PERL_RC_STACK
ba1af285
DM
3069 2
3070#else
3071 1
3072#endif
3073 )
1604cfb0
MS
3074 || (SvPADTMP(fromstr) &&
3075 !SvREADONLY(fromstr)))) {
3076 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3077 "Attempt to pack pointer to temporary value");
3078 }
3079 if (SvPOK(fromstr) || SvNIOK(fromstr))
3080 aptr = SvPV_nomg_const_nolen(fromstr);
3081 else
3082 aptr = SvPV_force_flags_nolen(fromstr, 0);
3083 }
3a88beaa 3084 PUSH_VAR(utf8, cur, aptr, needs_swap);
1604cfb0
MS
3085 }
3086 break;
3087 case 'u': {
3088 const char *aptr, *aend;
3089 bool from_utf8;
3090
3091 fromstr = NEXTFROM;
3092 if (len <= 2) len = 45;
3093 else len = len / 3 * 3;
3094 if (len >= 64) {
3095 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3096 "Field too wide in 'u' format in pack");
3097 len = 63;
3098 }
3099 aptr = SvPV_const(fromstr, fromlen);
3100 from_utf8 = DO_UTF8(fromstr);
3101 if (from_utf8) {
3102 aend = aptr + fromlen;
3103 fromlen = sv_len_utf8_nomg(fromstr);
3104 } else aend = NULL; /* Unused, but keep compilers happy */
3105 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3106 while (fromlen > 0) {
3107 U8 *end;
3108 SSize_t todo;
3109 U8 hunk[1+63/3*4+1];
3110
3111 if ((SSize_t)fromlen > len)
3112 todo = len;
3113 else
3114 todo = fromlen;
3115 if (from_utf8) {
3116 char buffer[64];
3117 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3118 'u' | TYPE_IS_PACK)) {
3119 *cur = '\0';
3120 SvCUR_set(cat, cur - start);
3121 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3122 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3123 aptr, aend, buffer, todo);
3124 }
3125 end = doencodes(hunk, (const U8 *)buffer, todo);
3126 } else {
3127 end = doencodes(hunk, (const U8 *)aptr, todo);
3128 aptr += todo;
3129 }
3130 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3131 fromlen -= todo;
3132 }
3133 break;
3134 }
3135 }
3136 *cur = '\0';
3137 SvCUR_set(cat, cur - start);
f337b084 3138 no_change:
1604cfb0 3139 *symptr = lookahead;
a6ec74c1 3140 }
49704364 3141 return beglist;
18529408
IZ
3142}
3143#undef NEXTFROM
3144
3145
91db8a47 3146PP_wrapped(pp_pack, 0, 1)
18529408 3147{
20b7effb 3148 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3149 SV *cat = TARG;
18529408 3150 STRLEN fromlen;
349d4f2f 3151 SV *pat_sv = *++MARK;
eb578fdb
KW
3152 const char *pat = SvPV_const(pat_sv, fromlen);
3153 const char *patend = pat + fromlen;
18529408
IZ
3154
3155 MARK++;
e8f7c79f 3156 SvPVCLEAR(cat);
f337b084 3157 SvUTF8_off(cat);
18529408 3158
7accc089 3159 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3160
fd879d93
KW
3161 if (SvUTF8(cat)) {
3162 STRLEN result_len;
3163 const char * result = SvPV_nomg(cat, result_len);
3164 const U8 * error_pos;
3165
3166 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3167 _force_out_malformed_utf8_message(error_pos,
3168 (U8 *) result + result_len,
3169 0, /* no flags */
3170 1 /* Die */
3171 );
3172 NOT_REACHED; /* NOTREACHED */
3173 }
3174 }
3175
a6ec74c1
JH
3176 SvSETMAGIC(cat);
3177 SP = ORIGMARK;
3178 PUSHs(cat);
3179 RETURN;
3180}
a6ec74c1 3181
73cb7263 3182/*
14d04a33 3183 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3184 */