This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Promote v5.36 usage and feature bundles doc
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
1604cfb0
MS
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
f7fe979e
AL
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
77a2054e 78#if defined(HAS_LONG_DOUBLE)
275663fa
TC
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
1604cfb0 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
0860e090
KW
215#define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
a6ec74c1 218
66c611c5 219/* type modifiers */
62f95557 220#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
221#define TYPE_IS_BIG_ENDIAN 0x200
222#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 223#define TYPE_IS_PACK 0x800
1109a392 224#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 225#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
a5d875a7 226#define TYPE_NO_MODIFIERS(t) ((U8) (t))
1109a392 227
7212898e
NC
228# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
230
231# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
232
78d46eaa 233#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 234#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
235#define PACK_SIZE_MASK 0x3F
236
4ad176ef 237#include "packsizetables.inc"
78d46eaa 238
20aa3a38
NC
239static void
240S_reverse_copy(const char *src, char *dest, STRLEN len)
241{
242 dest += len;
243 while (len--)
244 *--dest = *src++;
245}
246
08ca2aa3 247STATIC U8
9df874cd 248utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 249{
08ca2aa3 250 STRLEN retlen;
df337714
KW
251 UV val;
252
253 if (*s >= end) {
1604cfb0 254 goto croak;
df337714
KW
255 }
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
1604cfb0 257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
258 if (retlen == (STRLEN) -1)
259 croak:
1604cfb0
MS
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 262 if (val >= 0x100) {
1604cfb0
MS
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
a5d875a7 266 val = (U8) val;
08ca2aa3
TH
267 }
268 *s += retlen;
fe2774ed 269 return (U8)val;
08ca2aa3
TH
270}
271
f337b084 272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
1604cfb0
MS
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274 *(U8 *)(s)++)
08ca2aa3
TH
275
276STATIC bool
e1b825c1 277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
278{
279 UV val;
280 STRLEN retlen;
f7fe979e 281 const char *from = *s;
08ca2aa3 282 int bad = 0;
f7fe979e 283 const U32 flags = ckWARN(WARN_UTF8) ?
1604cfb0 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
228e69a7 287 if (UNLIKELY(needs_swap))
20aa3a38
NC
288 buf += buf_len;
289
08ca2aa3 290 for (;buf_len > 0; buf_len--) {
1604cfb0
MS
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1) {
294 from += UTF8_SAFE_SKIP(from, end);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
a5d875a7 299 val = (U8) val;
1604cfb0 300 }
228e69a7 301 if (UNLIKELY(needs_swap))
20aa3a38
NC
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
1604cfb0
MS
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
310 const char *ptr;
311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 }
316 if (from > end) from = end;
317 }
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
324 }
325 *s = from;
326 return TRUE;
327}
328
64844641 329STATIC char *
d21d7215
KW
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 332
228e69a7 333 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
334 const U8 *p = start + len;
335 while (p-- > start) {
55d09dc8 336 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
55d09dc8 341 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
342 start++;
343 }
f337b084 344 }
64844641 345 return dest;
f337b084
TH
346}
347
3a88beaa 348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 349STMT_START { \
228e69a7 350 if (UNLIKELY(utf8)) \
1604cfb0 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 352 else { \
228e69a7 353 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
1604cfb0 357 (cur) += (len); \
230e1fce 358 } \
f337b084
TH
359} STMT_END
360
f17fed50
TC
361#define SAFE_UTF8_EXPAND(var) \
362STMT_START { \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
366} STMT_END
367
368#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
369STMT_START { \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373} STMT_END
374
f337b084
TH
375#define GROWING(utf8, cat, start, cur, in_len) \
376STMT_START { \
377 STRLEN glen = (in_len); \
f17fed50
TC
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
1604cfb0
MS
383 (start) = sv_exp_grow(cat, glen); \
384 (cur) = (start) + SvCUR(cat); \
f337b084
TH
385 } \
386} STMT_END
387
388#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389STMT_START { \
f7fe979e 390 const STRLEN glen = (in_len); \
f337b084 391 STRLEN gl = glen; \
f17fed50 392 if (utf8) SAFE_UTF8_EXPAND(gl); \
f337b084
TH
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
394 *cur = '\0'; \
b162af07 395 SvCUR_set((cat), (cur) - (start)); \
1604cfb0
MS
396 (start) = sv_exp_grow(cat, gl); \
397 (cur) = (start) + SvCUR(cat); \
f337b084 398 } \
3a88beaa 399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
400} STMT_END
401
402#define PUSH_BYTE(utf8, s, byte) \
403STMT_START { \
404 if (utf8) { \
1604cfb0
MS
405 const U8 au8 = (byte); \
406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
407 } else *(U8 *)(s)++ = (byte); \
408} STMT_END
409
410/* Only to be used inside a loop (see the break) */
411#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
412STMT_START { \
413 STRLEN retlen; \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 416 if (retlen == (STRLEN) -1) { \
1604cfb0
MS
417 *cur = '\0'; \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
f337b084
TH
419 } \
420 str += retlen; \
421} STMT_END
422
f7fe979e
AL
423static const char *_action( const tempsym_t* symptr )
424{
10edeb5d 425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
426}
427
206947d2 428/* Returns the sizeof() struct described by pat */
e1b825c1 429STATIC SSize_t
f337b084 430S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 431{
e1b825c1 432 SSize_t total = 0;
206947d2 433
7918f24d
NC
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
435
49704364 436 while (next_symbol(symptr)) {
1604cfb0 437 SSize_t len, size;
f337b084
TH
438
439 switch (symptr->howlen) {
1604cfb0
MS
440 case e_star:
441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 442 _action( symptr ) );
81d52ecd 443
1604cfb0
MS
444 default:
445 /* e_no_len and e_number */
446 len = symptr->length;
447 break;
49704364
WL
448 }
449
1604cfb0
MS
450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
451 if (!size) {
e1b825c1 452 SSize_t star;
1604cfb0
MS
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455 default:
d3cdb721 456 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0
MS
457 Perl_croak(aTHX_ "Invalid type '%c' in %s",
458 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 459 _action( symptr ) );
1604cfb0
MS
460 case '.' | TYPE_IS_SHRIEKING:
461 case '@' | TYPE_IS_SHRIEKING:
462 case '@':
463 case '.':
464 case '/':
465 case 'U': /* XXXX Is it correct? */
466 case 'w':
467 case 'u':
468 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
469 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 470 _action( symptr ) );
1604cfb0
MS
471 case '%':
472 size = 0;
473 break;
474 case '(':
475 {
476 tempsym_t savsym = *symptr;
477 symptr->patptr = savsym.grpbeg;
478 symptr->patend = savsym.grpend;
479 /* XXXX Theoretically, we need to measure many times at
480 different positions, since the subexpression may contain
481 alignment commands, but be not of aligned length.
482 Need to detect this and croak(). */
483 size = measure_struct(symptr);
484 *symptr = savsym;
485 break;
486 }
487 case 'X' | TYPE_IS_SHRIEKING:
488 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
489 */
490 if (!len) /* Avoid division by 0 */
491 len = 1;
492 len = total % len; /* Assumed: the start is aligned. */
493 /* FALLTHROUGH */
494 case 'X':
495 size = -1;
496 if (total < len)
f7fe979e 497 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
1604cfb0
MS
498 break;
499 case 'x' | TYPE_IS_SHRIEKING:
500 if (!len) /* Avoid division by 0 */
501 len = 1;
502 star = total % len; /* Assumed: the start is aligned. */
503 if (star) /* Other portable ways? */
504 len = len - star;
505 else
506 len = 0;
507 /* FALLTHROUGH */
508 case 'x':
509 case 'A':
510 case 'Z':
511 case 'a':
512 size = 1;
513 break;
514 case 'B':
515 case 'b':
516 len = (len + 7)/8;
517 size = 1;
518 break;
519 case 'H':
520 case 'h':
521 len = (len + 1)/2;
522 size = 1;
523 break;
524
525 case 'P':
526 len = 1;
527 size = sizeof(char*);
528 break;
529 }
530 }
531 total += len * size;
206947d2
IZ
532 }
533 return total;
534}
535
49704364
WL
536
537/* locate matching closing parenthesis or bracket
538 * returns char pointer to char after match, or NULL
539 */
f7fe979e 540STATIC const char *
5aaab254 541S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 542{
7918f24d 543 PERL_ARGS_ASSERT_GROUP_END;
f24623d6 544 Size_t opened = 0; /* number of pending opened brackets */
7918f24d 545
49704364 546 while (patptr < patend) {
1604cfb0
MS
547 const char c = *patptr++;
548
f24623d6 549 if (opened == 0 && c == ender)
1604cfb0
MS
550 return patptr-1;
551 else if (c == '#') {
552 while (patptr < patend && *patptr != '\n')
553 patptr++;
554 continue;
f24623d6
HS
555 } else if (c == '(' || c == '[')
556 ++opened;
557 else if (c == ')' || c == ']') {
558 if (opened == 0)
559 Perl_croak(aTHX_ "Mismatched brackets in template");
560 --opened;
561 }
18529408 562 }
49704364
WL
563 Perl_croak(aTHX_ "No group ending character '%c' found in template",
564 ender);
a25b5927 565 NOT_REACHED; /* NOTREACHED */
18529408
IZ
566}
567
49704364
WL
568
569/* Convert unsigned decimal number to binary.
570 * Expects a pointer to the first digit and address of length variable
571 * Advances char pointer to 1st non-digit char and returns number
fc241834 572 */
f7fe979e 573STATIC const char *
e1b825c1 574S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 575{
e1b825c1 576 SSize_t len = *patptr++ - '0';
7918f24d
NC
577
578 PERL_ARGS_ASSERT_GET_NUM;
579
49704364 580 while (isDIGIT(*patptr)) {
e1b825c1
Z
581 SSize_t nlen = (len * 10) + (*patptr++ - '0');
582 if (nlen < 0 || nlen/10 != len)
49704364 583 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 584 len = nlen;
49704364
WL
585 }
586 *lenptr = len;
587 return patptr;
588}
589
590/* The marvellous template parsing routine: Using state stored in *symptr,
591 * locates next template code and count
592 */
593STATIC bool
f337b084 594S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 595{
f7fe979e 596 const char* patptr = symptr->patptr;
0bcc34c2 597 const char* const patend = symptr->patend;
49704364 598
7918f24d
NC
599 PERL_ARGS_ASSERT_NEXT_SYMBOL;
600
49704364
WL
601 symptr->flags &= ~FLAG_SLASH;
602
603 while (patptr < patend) {
604 if (isSPACE(*patptr))
605 patptr++;
606 else if (*patptr == '#') {
607 patptr++;
608 while (patptr < patend && *patptr != '\n')
1604cfb0 609 patptr++;
49704364 610 if (patptr < patend)
1604cfb0 611 patptr++;
49704364 612 } else {
fc241834 613 /* We should have found a template code */
a5d875a7 614 I32 code = (U8) *patptr++;
66c611c5 615 U32 inherited_modifiers = 0;
49704364 616
6920b5ce
NC
617 /* unrecognised characters in pack/unpack formats were made fatal in
618 * 5.004, with an exception added in 5.004_04 for ',' to "just" warn: */
619 if (code == ','){
1604cfb0 620 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
49704364 621 symptr->flags |= FLAG_COMMA;
d3cdb721 622 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0
MS
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Invalid type ',' in %s", _action( symptr ) );
49704364 625 }
1604cfb0 626 continue;
49704364 627 }
fc241834 628
49704364 629 /* for '(', skip to ')' */
fc241834 630 if (code == '(') {
49704364
WL
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 633 _action( symptr ) );
49704364
WL
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1604cfb0 637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 638 _action( symptr ) );
49704364
WL
639 }
640
66c611c5
MHM
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
645 }
646
1109a392
MHM
647 /* look for modifiers */
648 while (patptr < patend) {
b7787f18
AL
649 const char *allowed;
650 I32 modifier;
1109a392
MHM
651 switch (*patptr) {
652 case '!':
653 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 654 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
655 break;
656 case '>':
657 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 658 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
659 break;
660 case '<':
661 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 662 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
663 break;
664 default:
b7787f18
AL
665 allowed = "";
666 modifier = 0;
1109a392
MHM
667 break;
668 }
66c611c5 669
1109a392
MHM
670 if (modifier == 0)
671 break;
66c611c5 672
1109a392
MHM
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 675 allowed, _action( symptr ) );
66c611c5
MHM
676
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 683 *patptr, _action( symptr ) );
66c611c5 684
a2a5de95 685 if ((code & modifier)) {
1604cfb0
MS
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
689 _action( symptr ) );
1109a392 690 }
66c611c5 691
1109a392
MHM
692 code |= modifier;
693 patptr++;
49704364
WL
694 }
695
66c611c5
MHM
696 /* inherit modifiers */
697 code |= inherited_modifiers;
698
fc241834 699 /* look for count and/or / */
49704364 700 if (patptr < patend) {
1604cfb0
MS
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
49704364
WL
703 symptr->howlen = e_number;
704
705 } else if (*patptr == '*') {
706 patptr++;
707 symptr->howlen = e_star;
708
709 } else if (*patptr == '[') {
f7fe979e 710 const char* lenptr = ++patptr;
49704364
WL
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
716 if( *lenptr != ']' )
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 718 _action( symptr ) );
49704364
WL
719 } else {
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
724 *symptr = savsym;
725 }
726 } else {
727 symptr->howlen = e_no_len;
728 symptr->length = 1;
729 }
730
731 /* try to find / */
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
734 patptr++;
735 else if (*patptr == '#') {
736 patptr++;
737 while (patptr < patend && *patptr != '\n')
1604cfb0 738 patptr++;
49704364 739 if (patptr < patend)
1604cfb0 740 patptr++;
49704364 741 } else {
66c611c5 742 if (*patptr == '/') {
49704364
WL
743 symptr->flags |= FLAG_SLASH;
744 patptr++;
66c611c5
MHM
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 748 _action( symptr ) );
49704364
WL
749 }
750 break;
1604cfb0
MS
751 }
752 }
49704364
WL
753 } else {
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
756 symptr->length = 1;
757 }
758
759 symptr->code = code;
fc241834 760 symptr->patptr = patptr;
49704364 761 return TRUE;
18529408 762 }
49704364 763 }
fc241834 764 symptr->patptr = patptr;
49704364 765 return FALSE;
18529408
IZ
766}
767
18529408 768/*
fc241834 769 There is no way to cleanly handle the case where we should process the
08ca2aa3 770 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
775 themselves if they need to do a lot of unpacks like this on it
776*/
fc241834 777STATIC bool
08ca2aa3
TH
778need_utf8(const char *pat, const char *patend)
779{
780 bool first = TRUE;
7918f24d
NC
781
782 PERL_ARGS_ASSERT_NEED_UTF8;
783
08ca2aa3 784 while (pat < patend) {
1604cfb0
MS
785 if (pat[0] == '#') {
786 pat++;
787 pat = (const char *) memchr(pat, '\n', patend-pat);
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
792 pat++;
08ca2aa3
TH
793 }
794 return FALSE;
795}
796
797STATIC char
798first_symbol(const char *pat, const char *patend) {
7918f24d
NC
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
800
08ca2aa3 801 while (pat < patend) {
1604cfb0
MS
802 if (pat[0] != '#') return pat[0];
803 pat++;
804 pat = (const char *) memchr(pat, '\n', patend-pat);
805 if (!pat) return 0;
806 pat++;
08ca2aa3
TH
807 }
808 return 0;
809}
810
811/*
dcccc8ff 812
7accc089
JH
813=for apidoc unpackstring
814
796b6530 815The engine implementing the C<unpack()> Perl function.
21ebfc7a 816
796b6530
KW
817Using the template C<pat..patend>, this function unpacks the string
818C<s..strend> into a number of mortal SVs, which it pushes onto the perl
819argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 820C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
821pushed elements.
822
796b6530
KW
823The C<strend> and C<patend> pointers should point to the byte following the
824last character of each string.
21ebfc7a
DM
825
826Although this function returns its values on the perl argument stack, it
827doesn't take any parameters from that stack (and thus in particular
796b6530 828there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 829example).
7accc089
JH
830
831=cut */
832
e1b825c1 833SSize_t
f7fe979e 834Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 835{
f7fe979e 836 tempsym_t sym;
08ca2aa3 837
7918f24d
NC
838 PERL_ARGS_ASSERT_UNPACKSTRING;
839
f337b084 840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3 841 else if (need_utf8(pat, patend)) {
1604cfb0
MS
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
846 SAVEFREEPV(s);
847 strend = s + len;
848 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
849 }
850
f337b084 851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1604cfb0 852 flags |= FLAG_PARSE_UTF8;
08ca2aa3 853
f7fe979e 854 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
855
856 return unpack_rec(&sym, s, s, strend, NULL );
857}
858
e1b825c1 859STATIC SSize_t
f7fe979e 860S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 861{
20b7effb 862 dSP;
3297d27d 863 SV *sv = NULL;
e1b825c1 864 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 865 howlen_t howlen;
e1b825c1 866 SSize_t checksum = 0;
92d41999 867 UV cuv = 0;
a6ec74c1 868 NV cdouble = 0.0;
e1b825c1 869 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 870 bool beyond = FALSE;
21c16052 871 bool explicit_length;
9e27e96a 872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
874
875 PERL_ARGS_ASSERT_UNPACK_REC;
876
28be1210 877 symptr->strbeg = s - strbeg;
49704364 878
49704364 879 while (next_symbol(symptr)) {
1604cfb0
MS
880 packprops_t props;
881 SSize_t len;
f337b084 882 I32 datumtype = symptr->code;
a1219b5e 883 bool needs_swap;
1604cfb0
MS
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
49704364 887 if ( unpack_only_one
1604cfb0
MS
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 890 break;
49704364 891
f337b084 892 switch (howlen = symptr->howlen) {
1604cfb0
MS
893 case e_star:
894 len = strend - strbeg; /* long enough */
895 break;
896 default:
897 /* e_no_len and e_number */
898 len = symptr->length;
899 break;
49704364 900 }
18529408 901
21c16052 902 explicit_length = TRUE;
a6ec74c1 903 redo_switch:
49704364 904 beyond = s >= strend;
a7a3cfaa 905
1604cfb0
MS
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
907 if (props) {
908 /* props nonzero means we can process this letter. */
e1b825c1
Z
909 const SSize_t size = props & PACK_SIZE_MASK;
910 const SSize_t howmany = (strend - s) / size;
1604cfb0
MS
911 if (len > howmany)
912 len = howmany;
a7a3cfaa 913
1604cfb0
MS
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
916 EXTEND(SP, len);
917 EXTEND_MORTAL(len);
918 }
919 }
a7a3cfaa 920
a1219b5e
NC
921 needs_swap = NEEDS_SWAP(datumtype);
922
1604cfb0
MS
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
924 default:
d3cdb721 925 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0 926 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 927
1604cfb0
MS
928 case '%':
929 if (howlen == e_no_len)
930 len = 16; /* len is not specified */
931 checksum = len;
932 cuv = 0;
933 cdouble = 0;
934 continue;
81d52ecd 935
1604cfb0
MS
936 case '(':
937 {
49704364 938 tempsym_t savsym = *symptr;
9e27e96a 939 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1604cfb0 940 symptr->flags |= group_modifiers;
49704364 941 symptr->patend = savsym.grpend;
1604cfb0 942 symptr->previous = &savsym;
49704364 943 symptr->level++;
1604cfb0
MS
944 PUTBACK;
945 if (len && unpack_only_one) len = 1;
946 while (len--) {
947 symptr->patptr = savsym.grpbeg;
948 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
949 else symptr->flags &= ~FLAG_PARSE_UTF8;
950 unpack_rec(symptr, s, strbeg, strend, &s);
08ca2aa3 951 if (s == strend && savsym.howlen == e_star)
1604cfb0
MS
952 break; /* No way to continue */
953 }
954 SPAGAIN;
28be1210 955 savsym.flags = symptr->flags & ~group_modifiers;
49704364 956 *symptr = savsym;
1604cfb0
MS
957 break;
958 }
959 case '.' | TYPE_IS_SHRIEKING:
960 case '.': {
961 const char *from;
962 SV *sv;
963 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
964 if (howlen == e_star) from = strbeg;
965 else if (len <= 0) from = s;
966 else {
967 tempsym_t *group = symptr;
968
969 while (--len && group) group = group->previous;
970 from = group ? strbeg + group->strbeg : strbeg;
971 }
972 sv = from <= s ?
973 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
974 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
975 mXPUSHs(sv);
976 break;
977 }
978 case '@' | TYPE_IS_SHRIEKING:
979 case '@':
980 s = strbeg + symptr->strbeg;
981 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
982 {
983 while (len > 0) {
984 if (s >= strend)
985 Perl_croak(aTHX_ "'@' outside of string in unpack");
986 s += UTF8SKIP(s);
987 len--;
988 }
989 if (s > strend)
990 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
991 } else {
992 if (strend-s < len)
993 Perl_croak(aTHX_ "'@' outside of string in unpack");
994 s += len;
995 }
996 break;
997 case 'X' | TYPE_IS_SHRIEKING:
998 if (!len) /* Avoid division by 0 */
999 len = 1;
1000 if (utf8) {
1001 const char *hop, *last;
1002 SSize_t l = len;
1003 hop = last = strbeg;
1004 while (hop < s) {
1005 hop += UTF8SKIP(hop);
1006 if (--l == 0) {
1007 last = hop;
1008 l = len;
1009 }
1010 }
1011 if (last > s)
1012 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1013 s = last;
1014 break;
1015 }
1016 len = (s - strbeg) % len;
1017 /* FALLTHROUGH */
1018 case 'X':
1019 if (utf8) {
1020 while (len > 0) {
1021 if (s <= strbeg)
1022 Perl_croak(aTHX_ "'X' outside of string in unpack");
1023 while (--s, UTF8_IS_CONTINUATION(*s)) {
1024 if (s <= strbeg)
1025 Perl_croak(aTHX_ "'X' outside of string in unpack");
1026 }
1027 len--;
1028 }
1029 } else {
1030 if (len > s - strbeg)
1031 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1032 s -= len;
1033 }
1034 break;
1035 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1036 SSize_t ai32;
1604cfb0
MS
1037 if (!len) /* Avoid division by 0 */
1038 len = 1;
1039 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1040 else ai32 = (s - strbeg) % len;
1041 if (ai32 == 0) break;
1042 len -= ai32;
1043 }
1044 /* FALLTHROUGH */
1045 case 'x':
1046 if (utf8) {
1047 while (len>0) {
1048 if (s >= strend)
1049 Perl_croak(aTHX_ "'x' outside of string in unpack");
1050 s += UTF8SKIP(s);
1051 len--;
1052 }
1053 } else {
1054 if (len > strend - s)
1055 Perl_croak(aTHX_ "'x' outside of string in unpack");
1056 s += len;
1057 }
1058 break;
1059 case '/':
1060 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1061
1062 case 'A':
1063 case 'Z':
1064 case 'a':
1065 if (checksum) {
1066 /* Preliminary length estimate is assumed done in 'W' */
1067 if (len > strend - s) len = strend - s;
1068 goto W_checksum;
1069 }
1070 if (utf8) {
1071 SSize_t l;
1072 const char *hop;
1073 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1074 if (hop >= strend) {
1075 if (hop > strend)
1076 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1077 break;
1078 }
1079 }
1080 if (hop > strend)
1081 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 len = hop - s;
1083 } else if (len > strend - s)
1084 len = strend - s;
1085
1086 if (datumtype == 'Z') {
1087 /* 'Z' strips stuff after first null */
1088 const char *ptr, *end;
1089 end = s + len;
1090 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1091 sv = newSVpvn(s, ptr-s);
1092 if (howlen == e_star) /* exact for 'Z*' */
1093 len = ptr-s + (ptr != strend ? 1 : 0);
1094 } else if (datumtype == 'A') {
1095 /* 'A' strips both nulls and spaces */
1096 const char *ptr;
1097 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1098 for (ptr = s+len-1; ptr >= s; ptr--) {
1099 if ( *ptr != 0
1100 && !UTF8_IS_CONTINUATION(*ptr)
1101 && !isSPACE_utf8_safe(ptr, strend))
1102 {
1103 break;
1104 }
1105 }
1604cfb0
MS
1106 if (ptr >= s) ptr += UTF8SKIP(ptr);
1107 else ptr++;
1108 if (ptr > s+len)
1109 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1110 } else {
1111 for (ptr = s+len-1; ptr >= s; ptr--)
1112 if (*ptr != 0 && !isSPACE(*ptr)) break;
1113 ptr++;
1114 }
1115 sv = newSVpvn(s, ptr-s);
1116 } else sv = newSVpvn(s, len);
1117
1118 if (utf8) {
1119 SvUTF8_on(sv);
1120 /* Undo any upgrade done due to need_utf8() */
1121 if (!(symptr->flags & FLAG_WAS_UTF8))
1122 sv_utf8_downgrade(sv, 0);
1123 }
1124 mXPUSHs(sv);
1125 s += len;
1126 break;
1127 case 'B':
1128 case 'b': {
1129 char *str;
1130 if (howlen == e_star || len > (strend - s) * 8)
1131 len = (strend - s) * 8;
1132 if (checksum) {
1133 if (utf8)
1134 while (len >= 8 && s < strend) {
1135 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1136 len -= 8;
1137 }
1138 else
1139 while (len >= 8) {
1140 cuv += PL_bitcount[*(U8 *)s++];
1141 len -= 8;
1142 }
1143 if (len && s < strend) {
1144 U8 bits;
1145 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1146 if (datumtype == 'b')
1147 while (len-- > 0) {
1148 if (bits & 1) cuv++;
1149 bits >>= 1;
1150 }
1151 else
1152 while (len-- > 0) {
1153 if (bits & 0x80) cuv++;
1154 bits <<= 1;
1155 }
1156 }
1157 break;
1158 }
1159
1160 sv = sv_2mortal(newSV(len ? len : 1));
1161 SvPOK_on(sv);
1162 str = SvPVX(sv);
1163 if (datumtype == 'b') {
1164 U8 bits = 0;
1165 const SSize_t ai32 = len;
1166 for (len = 0; len < ai32; len++) {
1167 if (len & 7) bits >>= 1;
1168 else if (utf8) {
1169 if (s >= strend) break;
1170 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1171 } else bits = *(U8 *) s++;
1172 *str++ = bits & 1 ? '1' : '0';
1173 }
1174 } else {
1175 U8 bits = 0;
1176 const SSize_t ai32 = len;
1177 for (len = 0; len < ai32; len++) {
1178 if (len & 7) bits <<= 1;
1179 else if (utf8) {
1180 if (s >= strend) break;
1181 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1182 } else bits = *(U8 *) s++;
1183 *str++ = bits & 0x80 ? '1' : '0';
1184 }
1185 }
1186 *str = '\0';
1187 SvCUR_set(sv, str - SvPVX_const(sv));
1188 XPUSHs(sv);
1189 break;
1190 }
1191 case 'H':
1192 case 'h': {
1193 char *str = NULL;
1194 /* Preliminary length estimate, acceptable for utf8 too */
1195 if (howlen == e_star || len > (strend - s) * 2)
1196 len = (strend - s) * 2;
1197 if (!checksum) {
1198 sv = sv_2mortal(newSV(len ? len : 1));
1199 SvPOK_on(sv);
1200 str = SvPVX(sv);
1201 }
1202 if (datumtype == 'h') {
1203 U8 bits = 0;
1204 SSize_t ai32 = len;
1205 for (len = 0; len < ai32; len++) {
1206 if (len & 1) bits >>= 4;
1207 else if (utf8) {
1208 if (s >= strend) break;
1209 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1210 } else bits = * (U8 *) s++;
1211 if (!checksum)
1212 *str++ = PL_hexdigit[bits & 15];
1213 }
1214 } else {
1215 U8 bits = 0;
1216 const SSize_t ai32 = len;
1217 for (len = 0; len < ai32; len++) {
1218 if (len & 1) bits <<= 4;
1219 else if (utf8) {
1220 if (s >= strend) break;
1221 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1222 } else bits = *(U8 *) s++;
1223 if (!checksum)
1224 *str++ = PL_hexdigit[(bits >> 4) & 15];
1225 }
1226 }
1227 if (!checksum) {
1228 *str = '\0';
1229 SvCUR_set(sv, str - SvPVX_const(sv));
1230 XPUSHs(sv);
1231 }
1232 break;
1233 }
1234 case 'C':
1651fc44
ML
1235 if (len == 0) {
1236 if (explicit_length)
1604cfb0
MS
1237 /* Switch to "character" mode */
1238 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1239 break;
1240 }
1241 /* FALLTHROUGH */
1242 case 'c':
1243 while (len-- > 0 && s < strend) {
1244 int aint;
1245 if (utf8)
1246 {
1247 STRLEN retlen;
1248 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1249 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1250 if (retlen == (STRLEN) -1)
1251 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1252 s += retlen;
1253 }
1254 else
1255 aint = *(U8 *)(s)++;
1256 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1257 aint -= 256;
1258 if (!checksum)
1259 mPUSHi(aint);
1260 else if (checksum > bits_in_uv)
1261 cdouble += (NV)aint;
1262 else
1263 cuv += aint;
1264 }
1265 break;
1266 case 'W':
1267 W_checksum:
1268 if (utf8) {
1269 while (len-- > 0 && s < strend) {
1270 STRLEN retlen;
1271 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1272 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1273 if (retlen == (STRLEN) -1)
1274 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1275 s += retlen;
1276 if (!checksum)
1277 mPUSHu(val);
1278 else if (checksum > bits_in_uv)
1279 cdouble += (NV) val;
1280 else
1281 cuv += val;
1282 }
1283 } else if (!checksum)
1284 while (len-- > 0) {
1285 const U8 ch = *(U8 *) s++;
1286 mPUSHu(ch);
1287 }
1288 else if (checksum > bits_in_uv)
1289 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1290 else
1291 while (len-- > 0) cuv += *(U8 *) s++;
1292 break;
1293 case 'U':
1294 if (len == 0) {
c5333953 1295 if (explicit_length && howlen != e_star) {
1604cfb0
MS
1296 /* Switch to "bytes in UTF-8" mode */
1297 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1298 else
1299 /* Should be impossible due to the need_utf8() test */
1300 Perl_croak(aTHX_ "U0 mode on a byte string");
1301 }
1302 break;
1303 }
1304 if (len > strend - s) len = strend - s;
1305 if (!checksum) {
1306 if (len && unpack_only_one) len = 1;
1307 EXTEND(SP, len);
1308 EXTEND_MORTAL(len);
1309 }
1310 while (len-- > 0 && s < strend) {
1311 STRLEN retlen;
1312 UV auv;
1313 if (utf8) {
1314 U8 result[UTF8_MAXLEN+1];
1315 const char *ptr = s;
1316 STRLEN len;
1317 /* Bug: warns about bad utf8 even if we are short on bytes
1318 and will break out of the loop */
1319 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1320 'U'))
1321 break;
1322 len = UTF8SKIP(result);
1323 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1324 (char *) &result[1], len-1, 'U')) break;
3c2b2fcb
KW
1325 auv = utf8n_to_uvchr(result, len, &retlen,
1326 UTF8_ALLOW_DEFAULT);
1604cfb0
MS
1327 s = ptr;
1328 } else {
3c2b2fcb
KW
1329 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
1330 UTF8_ALLOW_DEFAULT);
1604cfb0
MS
1331 if (retlen == (STRLEN) -1)
1332 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1333 s += retlen;
1334 }
1335 if (!checksum)
1336 mPUSHu(auv);
1337 else if (checksum > bits_in_uv)
1338 cdouble += (NV) auv;
1339 else
1340 cuv += auv;
1341 }
1342 break;
1343 case 's' | TYPE_IS_SHRIEKING:
49704364 1344#if SHORTSIZE != SIZE16
1604cfb0
MS
1345 while (len-- > 0) {
1346 short ashort;
aaec8192 1347 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1604cfb0
MS
1348 if (!checksum)
1349 mPUSHi(ashort);
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ashort;
1352 else
1353 cuv += ashort;
1354 }
1355 break;
49704364 1356#else
1604cfb0 1357 /* FALLTHROUGH */
a6ec74c1 1358#endif
1604cfb0
MS
1359 case 's':
1360 while (len-- > 0) {
1361 I16 ai16;
08ca2aa3
TH
1362
1363#if U16SIZE > SIZE16
1604cfb0 1364 ai16 = 0;
08ca2aa3 1365#endif
aaec8192 1366 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1367#if U16SIZE > SIZE16
1604cfb0
MS
1368 if (ai16 > 32767)
1369 ai16 -= 65536;
a6ec74c1 1370#endif
1604cfb0
MS
1371 if (!checksum)
1372 mPUSHi(ai16);
1373 else if (checksum > bits_in_uv)
1374 cdouble += (NV)ai16;
1375 else
1376 cuv += ai16;
1377 }
1378 break;
1379 case 'S' | TYPE_IS_SHRIEKING:
49704364 1380#if SHORTSIZE != SIZE16
1604cfb0
MS
1381 while (len-- > 0) {
1382 unsigned short aushort;
1383 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1384 if (!checksum)
1385 mPUSHu(aushort);
1386 else if (checksum > bits_in_uv)
1387 cdouble += (NV)aushort;
1388 else
1389 cuv += aushort;
1390 }
1391 break;
49704364 1392#else
924ba076 1393 /* FALLTHROUGH */
49704364 1394#endif
1604cfb0
MS
1395 case 'v':
1396 case 'n':
1397 case 'S':
1398 while (len-- > 0) {
1399 U16 au16;
08ca2aa3 1400#if U16SIZE > SIZE16
1604cfb0 1401 au16 = 0;
08ca2aa3 1402#endif
aaec8192 1403 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1604cfb0
MS
1404 if (datumtype == 'n')
1405 au16 = PerlSock_ntohs(au16);
1406 if (datumtype == 'v')
1407 au16 = vtohs(au16);
1408 if (!checksum)
1409 mPUSHu(au16);
1410 else if (checksum > bits_in_uv)
1411 cdouble += (NV) au16;
1412 else
1413 cuv += au16;
1414 }
1415 break;
1416 case 'v' | TYPE_IS_SHRIEKING:
1417 case 'n' | TYPE_IS_SHRIEKING:
1418 while (len-- > 0) {
1419 I16 ai16;
08ca2aa3 1420# if U16SIZE > SIZE16
1604cfb0 1421 ai16 = 0;
08ca2aa3 1422# endif
aaec8192 1423 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1424 /* There should never be any byte-swapping here. */
1425 assert(!TYPE_ENDIANNESS(datumtype));
1604cfb0
MS
1426 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1427 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1428 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1429 ai16 = (I16) vtohs((U16) ai16);
1430 if (!checksum)
1431 mPUSHi(ai16);
1432 else if (checksum > bits_in_uv)
1433 cdouble += (NV) ai16;
1434 else
1435 cuv += ai16;
1436 }
1437 break;
1438 case 'i':
1439 case 'i' | TYPE_IS_SHRIEKING:
1440 while (len-- > 0) {
1441 int aint;
aaec8192 1442 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1604cfb0
MS
1443 if (!checksum)
1444 mPUSHi(aint);
1445 else if (checksum > bits_in_uv)
1446 cdouble += (NV)aint;
1447 else
1448 cuv += aint;
1449 }
1450 break;
1451 case 'I':
1452 case 'I' | TYPE_IS_SHRIEKING:
1453 while (len-- > 0) {
1454 unsigned int auint;
aaec8192 1455 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1604cfb0
MS
1456 if (!checksum)
1457 mPUSHu(auint);
1458 else if (checksum > bits_in_uv)
1459 cdouble += (NV)auint;
1460 else
1461 cuv += auint;
1462 }
1463 break;
1464 case 'j':
1465 while (len-- > 0) {
1466 IV aiv;
aaec8192 1467 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1604cfb0
MS
1468 if (!checksum)
1469 mPUSHi(aiv);
1470 else if (checksum > bits_in_uv)
1471 cdouble += (NV)aiv;
1472 else
1473 cuv += aiv;
1474 }
1475 break;
1476 case 'J':
1477 while (len-- > 0) {
1478 UV auv;
aaec8192 1479 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1604cfb0
MS
1480 if (!checksum)
1481 mPUSHu(auv);
1482 else if (checksum > bits_in_uv)
1483 cdouble += (NV)auv;
1484 else
1485 cuv += auv;
1486 }
1487 break;
1488 case 'l' | TYPE_IS_SHRIEKING:
49704364 1489#if LONGSIZE != SIZE32
1604cfb0
MS
1490 while (len-- > 0) {
1491 long along;
aaec8192 1492 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1604cfb0
MS
1493 if (!checksum)
1494 mPUSHi(along);
1495 else if (checksum > bits_in_uv)
1496 cdouble += (NV)along;
1497 else
1498 cuv += along;
1499 }
1500 break;
49704364 1501#else
1604cfb0 1502 /* FALLTHROUGH */
a6ec74c1 1503#endif
1604cfb0
MS
1504 case 'l':
1505 while (len-- > 0) {
1506 I32 ai32;
08ca2aa3 1507#if U32SIZE > SIZE32
1604cfb0 1508 ai32 = 0;
08ca2aa3 1509#endif
aaec8192 1510 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1511#if U32SIZE > SIZE32
1604cfb0 1512 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1513#endif
1604cfb0
MS
1514 if (!checksum)
1515 mPUSHi(ai32);
1516 else if (checksum > bits_in_uv)
1517 cdouble += (NV)ai32;
1518 else
1519 cuv += ai32;
1520 }
1521 break;
1522 case 'L' | TYPE_IS_SHRIEKING:
49704364 1523#if LONGSIZE != SIZE32
1604cfb0
MS
1524 while (len-- > 0) {
1525 unsigned long aulong;
aaec8192 1526 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1604cfb0
MS
1527 if (!checksum)
1528 mPUSHu(aulong);
1529 else if (checksum > bits_in_uv)
1530 cdouble += (NV)aulong;
1531 else
1532 cuv += aulong;
1533 }
1534 break;
49704364 1535#else
924ba076 1536 /* FALLTHROUGH */
49704364 1537#endif
1604cfb0
MS
1538 case 'V':
1539 case 'N':
1540 case 'L':
1541 while (len-- > 0) {
1542 U32 au32;
08ca2aa3 1543#if U32SIZE > SIZE32
1604cfb0 1544 au32 = 0;
08ca2aa3 1545#endif
aaec8192 1546 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1604cfb0
MS
1547 if (datumtype == 'N')
1548 au32 = PerlSock_ntohl(au32);
1549 if (datumtype == 'V')
1550 au32 = vtohl(au32);
1551 if (!checksum)
1552 mPUSHu(au32);
1553 else if (checksum > bits_in_uv)
1554 cdouble += (NV)au32;
1555 else
1556 cuv += au32;
1557 }
1558 break;
1559 case 'V' | TYPE_IS_SHRIEKING:
1560 case 'N' | TYPE_IS_SHRIEKING:
1561 while (len-- > 0) {
1562 I32 ai32;
f8e5a5db 1563#if U32SIZE > SIZE32
1604cfb0 1564 ai32 = 0;
f8e5a5db 1565#endif
aaec8192 1566 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1567 /* There should never be any byte swapping here. */
1568 assert(!TYPE_ENDIANNESS(datumtype));
1604cfb0
MS
1569 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1570 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1571 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1572 ai32 = (I32)vtohl((U32)ai32);
1573 if (!checksum)
1574 mPUSHi(ai32);
1575 else if (checksum > bits_in_uv)
1576 cdouble += (NV)ai32;
1577 else
1578 cuv += ai32;
1579 }
1580 break;
1581 case 'p':
1582 while (len-- > 0) {
1583 const char *aptr;
aaec8192 1584 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1604cfb0
MS
1585 /* newSVpv generates undef if aptr is NULL */
1586 mPUSHs(newSVpv(aptr, 0));
1587 }
1588 break;
1589 case 'w':
1590 {
1591 UV auv = 0;
1592 size_t bytes = 0;
1593
1594 while (len > 0 && s < strend) {
1595 U8 ch;
1596 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1597 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1598 /* UTF8_IS_XXXXX not right here because this is a BER, not
1599 * UTF-8 format - using constant 0x80 */
1604cfb0
MS
1600 if (ch < 0x80) {
1601 bytes = 0;
1602 mPUSHu(auv);
1603 len--;
1604 auv = 0;
1605 continue;
1606 }
1607 if (++bytes >= sizeof(UV)) { /* promote to string */
1608 const char *t;
1609
1610 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
147e3846 1611 (int)TYPE_DIGITS(UV), auv);
1604cfb0
MS
1612 while (s < strend) {
1613 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1614 sv = mul128(sv, (U8)(ch & 0x7f));
1615 if (!(ch & 0x80)) {
1616 bytes = 0;
1617 break;
1618 }
1619 }
1620 t = SvPV_nolen_const(sv);
1621 while (*t == '0')
1622 t++;
1623 sv_chop(sv, t);
1624 mPUSHs(sv);
1625 len--;
1626 auv = 0;
1627 }
1628 }
1629 if ((s >= strend) && bytes)
1630 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1631 }
1632 break;
1633 case 'P':
1634 if (symptr->howlen == e_star)
1635 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1636 EXTEND(SP, 1);
1637 if (s + sizeof(char*) <= strend) {
1638 char *aptr;
aaec8192 1639 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1604cfb0
MS
1640 /* newSVpvn generates undef if aptr is NULL */
1641 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1642 }
1643 break;
c174bf3b 1644#if defined(HAS_QUAD) && IVSIZE >= 8
1604cfb0
MS
1645 case 'q':
1646 while (len-- > 0) {
1647 Quad_t aquad;
aaec8192 1648 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1604cfb0 1649 if (!checksum)
c174bf3b 1650 mPUSHs(newSViv((IV)aquad));
1604cfb0
MS
1651 else if (checksum > bits_in_uv)
1652 cdouble += (NV)aquad;
1653 else
1654 cuv += aquad;
1655 }
1656 break;
1657 case 'Q':
1658 while (len-- > 0) {
1659 Uquad_t auquad;
aaec8192 1660 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1604cfb0
MS
1661 if (!checksum)
1662 mPUSHs(newSVuv((UV)auquad));
1663 else if (checksum > bits_in_uv)
1664 cdouble += (NV)auquad;
1665 else
1666 cuv += auquad;
1667 }
1668 break;
1640b983 1669#endif
1604cfb0
MS
1670 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1671 case 'f':
1672 while (len-- > 0) {
1673 float afloat;
aaec8192 1674 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1604cfb0
MS
1675 if (!checksum)
1676 mPUSHn(afloat);
1677 else
1678 cdouble += afloat;
1679 }
1680 break;
1681 case 'd':
1682 while (len-- > 0) {
1683 double adouble;
aaec8192 1684 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1604cfb0
MS
1685 if (!checksum)
1686 mPUSHn(adouble);
1687 else
1688 cdouble += adouble;
1689 }
1690 break;
1691 case 'F':
1692 while (len-- > 0) {
1693 NV_bytes anv;
aaec8192
NC
1694 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1695 datumtype, needs_swap);
1604cfb0
MS
1696 if (!checksum)
1697 mPUSHn(anv.nv);
1698 else
1699 cdouble += anv.nv;
1700 }
1701 break;
77a2054e 1702#if defined(HAS_LONG_DOUBLE)
1604cfb0
MS
1703 case 'D':
1704 while (len-- > 0) {
1705 ld_bytes aldouble;
aaec8192
NC
1706 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1707 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1708 /* The most common long double format, the x86 80-bit
1709 * extended precision, has either 2 or 6 unused bytes,
1710 * which may contain garbage, which may contain
1711 * unintentional data. While we do zero the bytes of
1712 * the long double data in pack(), here in unpack() we
1713 * don't, because it's really hard to envision that
1714 * reading the long double off aldouble would be
e075ae47 1715 * affected by the unused bytes.
42262fd3
JH
1716 *
1717 * Note that trying to unpack 'long doubles' of 'long
1718 * doubles' packed in another system is in the general
1719 * case doomed without having more detail. */
1604cfb0
MS
1720 if (!checksum)
1721 mPUSHn(aldouble.ld);
1722 else
1723 cdouble += aldouble.ld;
1724 }
1725 break;
92d41999 1726#endif
1604cfb0
MS
1727 case 'u':
1728 if (!checksum) {
f7fe979e 1729 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1604cfb0
MS
1730 sv = sv_2mortal(newSV(l));
1731 if (l) {
12cad9bd
TC
1732 SvPOK_on(sv);
1733 *SvEND(sv) = '\0';
1734 }
1604cfb0 1735 }
08ca2aa3 1736
bee81d36
KW
1737 /* Note that all legal uuencoded strings are ASCII printables, so
1738 * have the same representation under UTF-8 vs not. This means we
1739 * can ignore UTF8ness on legal input. For illegal we stop at the
1740 * first failure, and don't report where/what that is, so again we
1741 * can ignore UTF8ness */
1742
9233b56b
KW
1743 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1744 I32 a, b, c, d;
1745 char hunk[3];
1746
1747 len = PL_uudmap[*(U8*)s++] & 077;
1748 while (len > 0) {
1749 if (s < strend && ISUUCHAR(*s))
1750 a = PL_uudmap[*(U8*)s++] & 077;
1751 else
1752 a = 0;
1753 if (s < strend && ISUUCHAR(*s))
1754 b = PL_uudmap[*(U8*)s++] & 077;
1755 else
1756 b = 0;
1757 if (s < strend && ISUUCHAR(*s))
1758 c = PL_uudmap[*(U8*)s++] & 077;
1759 else
1760 c = 0;
1761 if (s < strend && ISUUCHAR(*s))
1762 d = PL_uudmap[*(U8*)s++] & 077;
1763 else
1764 d = 0;
1765 hunk[0] = (char)((a << 2) | (b >> 4));
1766 hunk[1] = (char)((b << 4) | (c >> 2));
1767 hunk[2] = (char)((c << 6) | d);
1768 if (!checksum)
1769 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1770 len -= 3;
1771 }
1772 if (*s == '\n')
1773 s++;
1774 else /* possible checksum byte */
1775 if (s + 1 < strend && s[1] == '\n')
1776 s += 2;
1777 }
1604cfb0
MS
1778 if (!checksum)
1779 XPUSHs(sv);
1780 break;
1781 } /* End of switch */
49704364 1782
1604cfb0
MS
1783 if (checksum) {
1784 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1785 (checksum > bits_in_uv &&
1786 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1787 NV trouble, anv;
a6ec74c1 1788
08ca2aa3 1789 anv = (NV) (1 << (checksum & 15));
1604cfb0
MS
1790 while (checksum >= 16) {
1791 checksum -= 16;
1792 anv *= 65536.0;
1793 }
1794 while (cdouble < 0.0)
1795 cdouble += anv;
1796 cdouble = Perl_modf(cdouble / anv, &trouble);
09b94b1f
S
1797#ifdef LONGDOUBLE_DOUBLEDOUBLE
1798 /* Workaround for powerpc doubledouble modfl bug:
1799 * close to 1.0L and -1.0L cdouble is 0, and trouble
1800 * is cdouble / anv. */
1801 if (trouble != Perl_ceil(trouble)) {
1802 cdouble = trouble;
1803 if (cdouble > 1.0L) cdouble -= 1.0L;
1804 if (cdouble < -1.0L) cdouble += 1.0L;
1805 }
1806#endif
1807 cdouble *= anv;
1604cfb0
MS
1808 sv = newSVnv(cdouble);
1809 }
1810 else {
1811 if (checksum < bits_in_uv) {
1812 UV mask = nBIT_MASK(checksum);
1813 cuv &= mask;
1814 }
1815 sv = newSVuv(cuv);
1816 }
1817 mXPUSHs(sv);
1818 checksum = 0;
1819 }
fc241834 1820
49704364
WL
1821 if (symptr->flags & FLAG_SLASH){
1822 if (SP - PL_stack_base - start_sp_offset <= 0)
1604cfb0 1823 break;
49704364
WL
1824 if( next_symbol(symptr) ){
1825 if( symptr->howlen == e_number )
1604cfb0 1826 Perl_croak(aTHX_ "Count after length/code in unpack" );
49704364 1827 if( beyond ){
1604cfb0
MS
1828 /* ...end of char buffer then no decent length available */
1829 Perl_croak(aTHX_ "length/code after end of string in unpack" );
49704364 1830 } else {
1604cfb0 1831 /* take top of stack (hope it's numeric) */
49704364
WL
1832 len = POPi;
1833 if( len < 0 )
1834 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1835 }
1836 } else {
1604cfb0 1837 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
49704364
WL
1838 }
1839 datumtype = symptr->code;
21c16052 1840 explicit_length = FALSE;
1604cfb0 1841 goto redo_switch;
49704364 1842 }
a6ec74c1 1843 }
49704364 1844
18529408 1845 if (new_s)
1604cfb0 1846 *new_s = s;
18529408
IZ
1847 PUTBACK;
1848 return SP - PL_stack_base - start_sp_offset;
1849}
1850
1851PP(pp_unpack)
1852{
1853 dSP;
bab9c0ac 1854 dPOPPOPssrl;
1c23e2bd 1855 U8 gimme = GIMME_V;
18529408
IZ
1856 STRLEN llen;
1857 STRLEN rlen;
5c144d81
NC
1858 const char *pat = SvPV_const(left, llen);
1859 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1860 const char *strend = s + rlen;
1861 const char *patend = pat + llen;
e1b825c1 1862 SSize_t cnt;
18529408
IZ
1863
1864 PUTBACK;
7accc089 1865 cnt = unpackstring(pat, patend, s, strend,
1604cfb0
MS
1866 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1867 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1868
18529408
IZ
1869 SPAGAIN;
1870 if ( !cnt && gimme == G_SCALAR )
1871 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1872 RETURN;
1873}
1874
f337b084 1875STATIC U8 *
e1b825c1 1876doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1877{
f337b084 1878 *h++ = PL_uuemap[len];
a6ec74c1 1879 while (len > 2) {
1604cfb0
MS
1880 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1881 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1882 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1883 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1884 s += 3;
1885 len -= 3;
a6ec74c1
JH
1886 }
1887 if (len > 0) {
e68aed92 1888 const U8 r = (len > 1 ? s[1] : '\0');
1604cfb0
MS
1889 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1890 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1891 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1892 *h++ = PL_uuemap[0];
a6ec74c1 1893 }
f337b084
TH
1894 *h++ = '\n';
1895 return h;
a6ec74c1
JH
1896}
1897
1898STATIC SV *
f7fe979e 1899S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1900{
8b6e33c7
AL
1901 SV *result = newSVpvn(s, l);
1902 char *const result_c = SvPV_nolen(result); /* convenience */
1903 char *out = result_c;
1904 bool skip = 1;
1905 bool ignore = 0;
a6ec74c1 1906
7918f24d
NC
1907 PERL_ARGS_ASSERT_IS_AN_INT;
1908
a6ec74c1
JH
1909 while (*s) {
1910 switch (*s) {
1911 case ' ':
1912 break;
1913 case '+':
1914 if (!skip) {
1604cfb0
MS
1915 SvREFCNT_dec(result);
1916 return (NULL);
a6ec74c1
JH
1917 }
1918 break;
1919 case '0':
1920 case '1':
1921 case '2':
1922 case '3':
1923 case '4':
1924 case '5':
1925 case '6':
1926 case '7':
1927 case '8':
1928 case '9':
1929 skip = 0;
1930 if (!ignore) {
1604cfb0 1931 *(out++) = *s;
a6ec74c1
JH
1932 }
1933 break;
1934 case '.':
1935 ignore = 1;
1936 break;
1937 default:
1938 SvREFCNT_dec(result);
1939 return (NULL);
1940 }
1941 s++;
1942 }
1943 *(out++) = '\0';
1944 SvCUR_set(result, out - result_c);
1945 return (result);
1946}
1947
1948/* pnum must be '\0' terminated */
1949STATIC int
1950S_div128(pTHX_ SV *pnum, bool *done)
1951{
8b6e33c7
AL
1952 STRLEN len;
1953 char * const s = SvPV(pnum, len);
1954 char *t = s;
1955 int m = 0;
1956
7918f24d
NC
1957 PERL_ARGS_ASSERT_DIV128;
1958
8b6e33c7
AL
1959 *done = 1;
1960 while (*t) {
1604cfb0
MS
1961 const int i = m * 10 + (*t - '0');
1962 const int r = (i >> 7); /* r < 10 */
1963 m = i & 0x7F;
1964 if (r) {
1965 *done = 0;
1966 }
1967 *(t++) = '0' + r;
a6ec74c1 1968 }
8b6e33c7
AL
1969 *(t++) = '\0';
1970 SvCUR_set(pnum, (STRLEN) (t - s));
1971 return (m);
a6ec74c1
JH
1972}
1973
18529408 1974/*
7accc089
JH
1975=for apidoc packlist
1976
796b6530 1977The engine implementing C<pack()> Perl function.
7accc089 1978
bfce84ec
AL
1979=cut
1980*/
7accc089
JH
1981
1982void
5aaab254 1983Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1984{
aadb217d
JH
1985 tempsym_t sym;
1986
7918f24d
NC
1987 PERL_ARGS_ASSERT_PACKLIST;
1988
f7fe979e 1989 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1990
f337b084
TH
1991 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1992 Also make sure any UTF8 flag is loaded */
56eb0262 1993 SvPV_force_nolen(cat);
bfce84ec 1994 if (DO_UTF8(cat))
1604cfb0 1995 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1996
49704364
WL
1997 (void)pack_rec( cat, &sym, beglist, endlist );
1998}
1999
f337b084
TH
2000/* like sv_utf8_upgrade, but also repoint the group start markers */
2001STATIC void
2002marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2003 STRLEN len;
2004 tempsym_t *group;
f7fe979e
AL
2005 const char *from_ptr, *from_start, *from_end, **marks, **m;
2006 char *to_start, *to_ptr;
f337b084
TH
2007
2008 if (SvUTF8(sv)) return;
2009
aa07b2f6 2010 from_start = SvPVX_const(sv);
f337b084
TH
2011 from_end = from_start + SvCUR(sv);
2012 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1604cfb0 2013 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084 2014 if (from_ptr == from_end) {
1604cfb0
MS
2015 /* Simple case: no character needs to be changed */
2016 SvUTF8_on(sv);
2017 return;
f337b084
TH
2018 }
2019
3473cf63 2020 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2021 Newx(to_start, len, char);
f337b084
TH
2022 Copy(from_start, to_start, from_ptr-from_start, char);
2023 to_ptr = to_start + (from_ptr-from_start);
2024
a02a5408 2025 Newx(marks, sym_ptr->level+2, const char *);
f337b084 2026 for (group=sym_ptr; group; group = group->previous)
1604cfb0 2027 marks[group->level] = from_start + group->strbeg;
f337b084
TH
2028 marks[sym_ptr->level+1] = from_end+1;
2029 for (m = marks; *m < from_ptr; m++)
1604cfb0 2030 *m = to_start + (*m-from_start);
f337b084
TH
2031
2032 for (;from_ptr < from_end; from_ptr++) {
1604cfb0
MS
2033 while (*m == from_ptr) *m++ = to_ptr;
2034 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2035 }
2036 *to_ptr = 0;
2037
2038 while (*m == from_ptr) *m++ = to_ptr;
2039 if (m != marks + sym_ptr->level+1) {
1604cfb0
MS
2040 Safefree(marks);
2041 Safefree(to_start);
2042 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2043 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2044 }
2045 for (group=sym_ptr; group; group = group->previous)
1604cfb0 2046 group->strbeg = marks[group->level] - to_start;
f337b084
TH
2047 Safefree(marks);
2048
2049 if (SvOOK(sv)) {
1604cfb0
MS
2050 if (SvIVX(sv)) {
2051 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2052 from_start -= SvIVX(sv);
2053 SvIV_set(sv, 0);
2054 }
2055 SvFLAGS(sv) &= ~SVf_OOK;
f337b084
TH
2056 }
2057 if (SvLEN(sv) != 0)
1604cfb0 2058 Safefree(from_start);
f880fe2f 2059 SvPV_set(sv, to_start);
b162af07
SP
2060 SvCUR_set(sv, to_ptr - to_start);
2061 SvLEN_set(sv, len);
f337b084
TH
2062 SvUTF8_on(sv);
2063}
2064
2065/* Exponential string grower. Makes string extension effectively O(n)
2066 needed says how many extra bytes we need (not counting the final '\0')
2067 Only grows the string if there is an actual lack of space
2068*/
2069STATIC char *
0bd48802 2070S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2071 const STRLEN cur = SvCUR(sv);
2072 const STRLEN len = SvLEN(sv);
f337b084 2073 STRLEN extend;
7918f24d
NC
2074
2075 PERL_ARGS_ASSERT_SV_EXP_GROW;
2076
f337b084
TH
2077 if (len - cur > needed) return SvPVX(sv);
2078 extend = needed > len ? needed : len;
2079 return SvGROW(sv, len+extend+1);
2080}
49704364 2081
93f6e112 2082static SV *
b197e565 2083S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2084{
2085 SvGETMAGIC(sv);
93f6e112 2086 if (UNLIKELY(SvAMAGIC(sv)))
1604cfb0 2087 sv = sv_2num(sv);
99f450cc 2088 if (UNLIKELY(isinfnansv(sv))) {
1604cfb0
MS
2089 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2090 const NV nv = SvNV_nomg(sv);
2091 if (c == 'w')
2092 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2093 else
2094 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2095 }
93f6e112 2096 return sv;
354b74ae
FC
2097}
2098
93f6e112 2099#define SvIV_no_inf(sv,d) \
1604cfb0 2100 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
93f6e112 2101#define SvUV_no_inf(sv,d) \
1604cfb0 2102 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2103
49704364
WL
2104STATIC
2105SV **
f337b084 2106S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2107{
49704364 2108 tempsym_t lookahead;
e1b825c1 2109 SSize_t items = endlist - beglist;
f337b084
TH
2110 bool found = next_symbol(symptr);
2111 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2112 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2113 char* from;
f337b084 2114
7918f24d
NC
2115 PERL_ARGS_ASSERT_PACK_REC;
2116
f337b084 2117 if (symptr->level == 0 && found && symptr->code == 'U') {
1604cfb0
MS
2118 marked_upgrade(aTHX_ cat, symptr);
2119 symptr->flags |= FLAG_DO_UTF8;
2120 utf8 = 0;
49704364 2121 }
f337b084 2122 symptr->strbeg = SvCUR(cat);
49704364
WL
2123
2124 while (found) {
1604cfb0
MS
2125 SV *fromstr;
2126 STRLEN fromlen;
2127 SSize_t len;
2128 SV *lengthcode = NULL;
49704364 2129 I32 datumtype = symptr->code;
f337b084 2130 howlen_t howlen = symptr->howlen;
1604cfb0
MS
2131 char *start = SvPVX(cat);
2132 char *cur = start + SvCUR(cat);
a1219b5e 2133 bool needs_swap;
49704364 2134
fc1bb3f2 2135#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2136#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2137
2138 switch (howlen) {
1604cfb0
MS
2139 case e_star:
2140 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2141 0 : items;
2142 break;
2143 default:
2144 /* e_no_len and e_number */
2145 len = symptr->length;
2146 break;
49704364
WL
2147 }
2148
1604cfb0
MS
2149 if (len) {
2150 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2151
1604cfb0
MS
2152 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2153 /* We can process this letter. */
2154 STRLEN size = props & PACK_SIZE_MASK;
2155 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2156 }
f337b084
TH
2157 }
2158
49704364
WL
2159 /* Look ahead for next symbol. Do we have code/code? */
2160 lookahead = *symptr;
2161 found = next_symbol(&lookahead);
1604cfb0
MS
2162 if (symptr->flags & FLAG_SLASH) {
2163 IV count;
2164 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2165 if (memCHRs("aAZ", lookahead.code)) {
2166 if (lookahead.howlen == e_number) count = lookahead.length;
2167 else {
2168 if (items > 0) {
2169 count = sv_len_utf8(*beglist);
2170 }
2171 else count = 0;
2172 if (lookahead.code == 'Z') count++;
2173 }
2174 } else {
2175 if (lookahead.howlen == e_number && lookahead.length < items)
2176 count = lookahead.length;
2177 else count = items;
2178 }
2179 lookahead.howlen = e_number;
2180 lookahead.length = count;
2181 lengthcode = sv_2mortal(newSViv(count));
2182 }
49704364 2183
a1219b5e
NC
2184 needs_swap = NEEDS_SWAP(datumtype);
2185
1604cfb0
MS
2186 /* Code inside the switch must take care to properly update
2187 cat (CUR length and '\0' termination) if it updated *cur and
2188 doesn't simply leave using break */
2189 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2190 default:
d3cdb721 2191 /* diag_listed_as: Invalid type '%s' in %s */
1604cfb0
MS
2192 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2193 (int) TYPE_NO_MODIFIERS(datumtype));
2194 case '%':
2195 Perl_croak(aTHX_ "'%%' may not be used in pack");
2196
2197 case '.' | TYPE_IS_SHRIEKING:
2198 case '.':
2199 if (howlen == e_star) from = start;
2200 else if (len == 0) from = cur;
2201 else {
2202 tempsym_t *group = symptr;
2203
2204 while (--len && group) group = group->previous;
2205 from = group ? start + group->strbeg : start;
2206 }
2207 fromstr = NEXTFROM;
2208 len = SvIV_no_inf(fromstr, datumtype);
2209 goto resize;
2210 case '@' | TYPE_IS_SHRIEKING:
2211 case '@':
2212 from = start + symptr->strbeg;
2213 resize:
2214 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2215 if (len >= 0) {
2216 while (len && from < cur) {
2217 from += UTF8SKIP(from);
2218 len--;
2219 }
2220 if (from > cur)
2221 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2222 if (len) {
2223 /* Here we know from == cur */
2224 grow:
2225 GROWING(0, cat, start, cur, len);
2226 Zero(cur, len, char);
2227 cur += len;
2228 } else if (from < cur) {
2229 len = cur - from;
2230 goto shrink;
2231 } else goto no_change;
2232 } else {
2233 cur = from;
2234 len = -len;
2235 goto utf8_shrink;
2236 }
2237 else {
2238 len -= cur - from;
2239 if (len > 0) goto grow;
2240 if (len == 0) goto no_change;
2241 len = -len;
2242 goto shrink;
2243 }
2244 break;
2245
2246 case '(': {
49704364 2247 tempsym_t savsym = *symptr;
1604cfb0
MS
2248 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2249 symptr->flags |= group_modifiers;
49704364
WL
2250 symptr->patend = savsym.grpend;
2251 symptr->level++;
1604cfb0
MS
2252 symptr->previous = &lookahead;
2253 while (len--) {
2254 U32 was_utf8;
2255 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2256 else symptr->flags &= ~FLAG_PARSE_UTF8;
2257 was_utf8 = SvUTF8(cat);
2258 symptr->patptr = savsym.grpbeg;
2259 beglist = pack_rec(cat, symptr, beglist, endlist);
2260 if (SvUTF8(cat) != was_utf8)
2261 /* This had better be an upgrade while in utf8==0 mode */
2262 utf8 = 1;
2263
2264 if (savsym.howlen == e_star && beglist == endlist)
2265 break; /* No way to continue */
2266 }
2267 items = endlist - beglist;
2268 lookahead.flags = symptr->flags & ~group_modifiers;
2269 goto no_change;
2270 }
2271 case 'X' | TYPE_IS_SHRIEKING:
2272 if (!len) /* Avoid division by 0 */
2273 len = 1;
2274 if (utf8) {
2275 char *hop, *last;
2276 SSize_t l = len;
2277 hop = last = start;
2278 while (hop < cur) {
2279 hop += UTF8SKIP(hop);
2280 if (--l == 0) {
2281 last = hop;
2282 l = len;
2283 }
2284 }
2285 if (last > cur)
2286 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2287 cur = last;
2288 break;
2289 }
2290 len = (cur-start) % len;
2291 /* FALLTHROUGH */
2292 case 'X':
2293 if (utf8) {
2294 if (len < 1) goto no_change;
2295 utf8_shrink:
2296 while (len > 0) {
2297 if (cur <= start)
2298 Perl_croak(aTHX_ "'%c' outside of string in pack",
2299 (int) TYPE_NO_MODIFIERS(datumtype));
2300 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2301 if (cur <= start)
2302 Perl_croak(aTHX_ "'%c' outside of string in pack",
2303 (int) TYPE_NO_MODIFIERS(datumtype));
2304 }
2305 len--;
2306 }
2307 } else {
2308 shrink:
2309 if (cur - start < len)
2310 Perl_croak(aTHX_ "'%c' outside of string in pack",
2311 (int) TYPE_NO_MODIFIERS(datumtype));
2312 cur -= len;
2313 }
2314 if (cur < start+symptr->strbeg) {
2315 /* Make sure group starts don't point into the void */
2316 tempsym_t *group;
2317 const STRLEN length = cur-start;
2318 for (group = symptr;
2319 group && length < group->strbeg;
2320 group = group->previous) group->strbeg = length;
2321 lookahead.strbeg = length;
2322 }
2323 break;
2324 case 'x' | TYPE_IS_SHRIEKING: {
2325 SSize_t ai32;
2326 if (!len) /* Avoid division by 0 */
2327 len = 1;
2328 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2329 else ai32 = (cur - start) % len;
2330 if (ai32 == 0) goto no_change;
2331 len -= ai32;
2332 }
2333 /* FALLTHROUGH */
2334 case 'x':
2335 goto grow;
2336 case 'A':
2337 case 'Z':
2338 case 'a': {
2339 const char *aptr;
2340
2341 fromstr = NEXTFROM;
2342 aptr = SvPV_const(fromstr, fromlen);
2343 if (DO_UTF8(fromstr)) {
f7fe979e 2344 const char *end, *s;
f337b084 2345
1604cfb0
MS
2346 if (!utf8 && !SvUTF8(cat)) {
2347 marked_upgrade(aTHX_ cat, symptr);
2348 lookahead.flags |= FLAG_DO_UTF8;
2349 lookahead.strbeg = symptr->strbeg;
2350 utf8 = 1;
2351 start = SvPVX(cat);
2352 cur = start + SvCUR(cat);
2353 }
2354 if (howlen == e_star) {
2355 if (utf8) goto string_copy;
2356 len = fromlen+1;
2357 }
2358 s = aptr;
2359 end = aptr + fromlen;
2360 fromlen = datumtype == 'Z' ? len-1 : len;
2361 while ((SSize_t) fromlen > 0 && s < end) {
2362 s += UTF8SKIP(s);
2363 fromlen--;
2364 }
2365 if (s > end)
2366 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2367 if (utf8) {
2368 len = fromlen;
2369 if (datumtype == 'Z') len++;
2370 fromlen = s-aptr;
2371 len += fromlen;
2372
2373 goto string_copy;
2374 }
2375 fromlen = len - fromlen;
2376 if (datumtype == 'Z') fromlen--;
2377 if (howlen == e_star) {
2378 len = fromlen;
2379 if (datumtype == 'Z') len++;
2380 }
2381 GROWING(0, cat, start, cur, len);
2382 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2383 datumtype | TYPE_IS_PACK))
2384 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2385 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2386 (int)datumtype, aptr, end, cur, fromlen);
2387 cur += fromlen;
2388 len -= fromlen;
2389 } else if (utf8) {
2390 if (howlen == e_star) {
2391 len = fromlen;
2392 if (datumtype == 'Z') len++;
2393 }
2394 if (len <= (SSize_t) fromlen) {
2395 fromlen = len;
2396 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2397 }
2398 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2399 upgrade, so:
2400 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2401 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2402 len -= fromlen;
2403 while (fromlen > 0) {
2404 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2405 aptr++;
2406 fromlen--;
2407 }
2408 } else {
2409 string_copy:
2410 if (howlen == e_star) {
2411 len = fromlen;
2412 if (datumtype == 'Z') len++;
2413 }
2414 if (len <= (SSize_t) fromlen) {
2415 fromlen = len;
2416 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2417 }
2418 GROWING(0, cat, start, cur, len);
2419 Copy(aptr, cur, fromlen, char);
2420 cur += fromlen;
2421 len -= fromlen;
2422 }
2423 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2424 cur += len;
2425 SvTAINT(cat);
2426 break;
2427 }
2428 case 'B':
2429 case 'b': {
2430 const char *str, *end;
2431 SSize_t l, field_len;
2432 U8 bits;
2433 bool utf8_source;
2434 U32 utf8_flags;
2435
2436 fromstr = NEXTFROM;
2437 str = SvPV_const(fromstr, fromlen);
2438 end = str + fromlen;
2439 if (DO_UTF8(fromstr)) {
2440 utf8_source = TRUE;
2441 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2442 } else {
2443 utf8_source = FALSE;
2444 utf8_flags = 0; /* Unused, but keep compilers happy */
2445 }
2446 if (howlen == e_star) len = fromlen;
2447 field_len = (len+7)/8;
2448 GROWING(utf8, cat, start, cur, field_len);
2449 if (len > (SSize_t)fromlen) len = fromlen;
2450 bits = 0;
2451 l = 0;
2452 if (datumtype == 'B')
2453 while (l++ < len) {
2454 if (utf8_source) {
2455 UV val = 0;
2456 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2457 bits |= val & 1;
2458 } else bits |= *str++ & 1;
2459 if (l & 7) bits <<= 1;
2460 else {
2461 PUSH_BYTE(utf8, cur, bits);
2462 bits = 0;
2463 }
2464 }
2465 else
2466 /* datumtype == 'b' */
2467 while (l++ < len) {
2468 if (utf8_source) {
2469 UV val = 0;
2470 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2471 if (val & 1) bits |= 0x80;
2472 } else if (*str++ & 1)
2473 bits |= 0x80;
2474 if (l & 7) bits >>= 1;
2475 else {
2476 PUSH_BYTE(utf8, cur, bits);
2477 bits = 0;
2478 }
2479 }
2480 l--;
2481 if (l & 7) {
2482 if (datumtype == 'B')
2483 bits <<= 7 - (l & 7);
2484 else
2485 bits >>= 7 - (l & 7);
2486 PUSH_BYTE(utf8, cur, bits);
2487 l += 7;
2488 }
2489 /* Determine how many chars are left in the requested field */
2490 l /= 8;
2491 if (howlen == e_star) field_len = 0;
2492 else field_len -= l;
2493 Zero(cur, field_len, char);
2494 cur += field_len;
2495 break;
2496 }
2497 case 'H':
2498 case 'h': {
2499 const char *str, *end;
2500 SSize_t l, field_len;
2501 U8 bits;
2502 bool utf8_source;
2503 U32 utf8_flags;
2504
2505 fromstr = NEXTFROM;
2506 str = SvPV_const(fromstr, fromlen);
2507 end = str + fromlen;
2508 if (DO_UTF8(fromstr)) {
2509 utf8_source = TRUE;
2510 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2511 } else {
2512 utf8_source = FALSE;
2513 utf8_flags = 0; /* Unused, but keep compilers happy */
2514 }
2515 if (howlen == e_star) len = fromlen;
2516 field_len = (len+1)/2;
2517 GROWING(utf8, cat, start, cur, field_len);
2518 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2519 bits = 0;
2520 l = 0;
2521 if (datumtype == 'H')
2522 while (l++ < len) {
2523 if (utf8_source) {
2524 UV val = 0;
2525 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2526 if (val < 256 && isALPHA(val))
2527 bits |= (val + 9) & 0xf;
2528 else
2529 bits |= val & 0xf;
2530 } else if (isALPHA(*str))
2531 bits |= (*str++ + 9) & 0xf;
2532 else
2533 bits |= *str++ & 0xf;
2534 if (l & 1) bits <<= 4;
2535 else {
2536 PUSH_BYTE(utf8, cur, bits);
2537 bits = 0;
2538 }
2539 }
2540 else
2541 while (l++ < len) {
2542 if (utf8_source) {
2543 UV val = 0;
2544 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2545 if (val < 256 && isALPHA(val))
2546 bits |= ((val + 9) & 0xf) << 4;
2547 else
2548 bits |= (val & 0xf) << 4;
2549 } else if (isALPHA(*str))
2550 bits |= ((*str++ + 9) & 0xf) << 4;
2551 else
2552 bits |= (*str++ & 0xf) << 4;
2553 if (l & 1) bits >>= 4;
2554 else {
2555 PUSH_BYTE(utf8, cur, bits);
2556 bits = 0;
2557 }
2558 }
2559 l--;
2560 if (l & 1) {
2561 PUSH_BYTE(utf8, cur, bits);
2562 l++;
2563 }
2564 /* Determine how many chars are left in the requested field */
2565 l /= 2;
2566 if (howlen == e_star) field_len = 0;
2567 else field_len -= l;
2568 Zero(cur, field_len, char);
2569 cur += field_len;
2570 break;
2571 }
2572 case 'c':
2573 while (len-- > 0) {
2574 IV aiv;
2575 fromstr = NEXTFROM;
354b74ae 2576 aiv = SvIV_no_inf(fromstr, datumtype);
1604cfb0
MS
2577 if ((-128 > aiv || aiv > 127))
2578 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2579 "Character in 'c' format wrapped in pack");
a5d875a7 2580 PUSH_BYTE(utf8, cur, (U8)aiv);
1604cfb0
MS
2581 }
2582 break;
2583 case 'C':
2584 if (len == 0) {
2585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2586 break;
2587 }
2588 while (len-- > 0) {
2589 IV aiv;
2590 fromstr = NEXTFROM;
354b74ae 2591 aiv = SvIV_no_inf(fromstr, datumtype);
1604cfb0
MS
2592 if ((0 > aiv || aiv > 0xff))
2593 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2594 "Character in 'C' format wrapped in pack");
a5d875a7 2595 PUSH_BYTE(utf8, cur, (U8)aiv);
1604cfb0
MS
2596 }
2597 break;
2598 case 'W': {
2599 char *end;
2600 U8 in_bytes = (U8)IN_BYTES;
2601
2602 end = start+SvLEN(cat)-1;
2603 if (utf8) end -= UTF8_MAXLEN-1;
2604 while (len-- > 0) {
2605 UV auv;
2606 fromstr = NEXTFROM;
2607 auv = SvUV_no_inf(fromstr, datumtype);
2608 if (in_bytes) auv = auv % 0x100;
2609 if (utf8) {
2610 W_utf8:
2611 if (cur >= end) {
2612 *cur = '\0';
2613 SvCUR_set(cat, cur - start);
2614
2615 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2616 end = start+SvLEN(cat)-UTF8_MAXLEN;
2617 }
2618 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2619 } else {
2620 if (auv >= 0x100) {
2621 if (!SvUTF8(cat)) {
2622 *cur = '\0';
2623 SvCUR_set(cat, cur - start);
2624 marked_upgrade(aTHX_ cat, symptr);
2625 lookahead.flags |= FLAG_DO_UTF8;
2626 lookahead.strbeg = symptr->strbeg;
2627 utf8 = 1;
2628 start = SvPVX(cat);
2629 cur = start + SvCUR(cat);
2630 end = start+SvLEN(cat)-UTF8_MAXLEN;
2631 goto W_utf8;
2632 }
2633 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2634 "Character in 'W' format wrapped in pack");
a5d875a7 2635 auv = (U8) auv;
1604cfb0
MS
2636 }
2637 if (cur >= end) {
2638 *cur = '\0';
2639 SvCUR_set(cat, cur - start);
2640 GROWING(0, cat, start, cur, len+1);
2641 end = start+SvLEN(cat)-1;
2642 }
2643 *(U8 *) cur++ = (U8)auv;
2644 }
2645 }
2646 break;
2647 }
2648 case 'U': {
2649 char *end;
2650
2651 if (len == 0) {
2652 if (!(symptr->flags & FLAG_DO_UTF8)) {
2653 marked_upgrade(aTHX_ cat, symptr);
2654 lookahead.flags |= FLAG_DO_UTF8;
2655 lookahead.strbeg = symptr->strbeg;
2656 }
2657 utf8 = 0;
2658 goto no_change;
2659 }
2660
2661 end = start+SvLEN(cat);
2662 if (!utf8) end -= UTF8_MAXLEN;
2663 while (len-- > 0) {
2664 UV auv;
2665 fromstr = NEXTFROM;
2666 auv = SvUV_no_inf(fromstr, datumtype);
2667 if (utf8) {
2668 U8 buffer[UTF8_MAXLEN+1], *endb;
3c2b2fcb 2669 endb = uvchr_to_utf8_flags(buffer, auv, 0);
1604cfb0
MS
2670 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2671 *cur = '\0';
2672 SvCUR_set(cat, cur - start);
2673 GROWING(0, cat, start, cur,
2674 len+(endb-buffer)*UTF8_EXPAND);
2675 end = start+SvLEN(cat);
2676 }
d21d7215 2677 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
1604cfb0
MS
2678 } else {
2679 if (cur >= end) {
2680 *cur = '\0';
2681 SvCUR_set(cat, cur - start);
2682 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2683 end = start+SvLEN(cat)-UTF8_MAXLEN;
2684 }
3c2b2fcb 2685 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
1604cfb0
MS
2686 }
2687 }
2688 break;
2689 }
2690 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2691 case 'f':
2692 while (len-- > 0) {
2693 float afloat;
2694 NV anv;
2695 fromstr = NEXTFROM;
2696 anv = SvNV(fromstr);
a7157111 2697# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
1604cfb0
MS
2698 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2699 * on Alpha; fake it if we don't have them.
2700 */
2701 if (anv > FLT_MAX)
2702 afloat = FLT_MAX;
2703 else if (anv < -FLT_MAX)
2704 afloat = -FLT_MAX;
2705 else afloat = (float)anv;
baf3cf9c 2706# else
a7157111 2707# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1604cfb0
MS
2708 if(Perl_isnan(anv))
2709 afloat = (float)NV_NAN;
2710 else
a7157111
JH
2711# endif
2712# ifdef NV_INF
919894b7
DM
2713 /* a simple cast to float is undefined if outside
2714 * the range of values that can be represented */
1604cfb0 2715 afloat = (float)(anv > FLT_MAX ? NV_INF :
919894b7 2716 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2717# endif
baf3cf9c 2718# endif
3a88beaa 2719 PUSH_VAR(utf8, cur, afloat, needs_swap);
1604cfb0
MS
2720 }
2721 break;
2722 case 'd':
2723 while (len-- > 0) {
2724 double adouble;
2725 NV anv;
2726 fromstr = NEXTFROM;
2727 anv = SvNV(fromstr);
a7157111 2728# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
1604cfb0
MS
2729 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2730 * on Alpha; fake it if we don't have them.
2731 */
2732 if (anv > DBL_MAX)
2733 adouble = DBL_MAX;
2734 else if (anv < -DBL_MAX)
2735 adouble = -DBL_MAX;
2736 else adouble = (double)anv;
baf3cf9c 2737# else
1604cfb0 2738 adouble = (double)anv;
baf3cf9c 2739# endif
3a88beaa 2740 PUSH_VAR(utf8, cur, adouble, needs_swap);
1604cfb0
MS
2741 }
2742 break;
2743 case 'F': {
2744 NV_bytes anv;
2745 Zero(&anv, 1, NV); /* can be long double with unused bits */
2746 while (len-- > 0) {
2747 fromstr = NEXTFROM;
cd07c537 2748#ifdef __GNUC__
1604cfb0
MS
2749 /* to work round a gcc/x86 bug; don't use SvNV */
2750 anv.nv = sv_2nv(fromstr);
070e2677
TC
2751# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2752 && LONG_DOUBLESIZE > 10
2753 /* GCC sometimes overwrites the padding in the
2754 assignment above */
2755 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2756# endif
cd07c537 2757#else
1604cfb0 2758 anv.nv = SvNV(fromstr);
cd07c537 2759#endif
3a88beaa 2760 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
1604cfb0
MS
2761 }
2762 break;
2763 }
77a2054e 2764#if defined(HAS_LONG_DOUBLE)
1604cfb0
MS
2765 case 'D': {
2766 ld_bytes aldouble;
2767 /* long doubles can have unused bits, which may be nonzero */
2768 Zero(&aldouble, 1, long double);
2769 while (len-- > 0) {
2770 fromstr = NEXTFROM;
cd07c537 2771# ifdef __GNUC__
1604cfb0
MS
2772 /* to work round a gcc/x86 bug; don't use SvNV */
2773 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2774# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2775 /* GCC sometimes overwrites the padding in the
2776 assignment above */
2777 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2778# endif
cd07c537 2779# else
1604cfb0 2780 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2781# endif
3a88beaa
NC
2782 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2783 needs_swap);
1604cfb0
MS
2784 }
2785 break;
2786 }
92d41999 2787#endif
1604cfb0
MS
2788 case 'n' | TYPE_IS_SHRIEKING:
2789 case 'n':
2790 while (len-- > 0) {
2791 I16 ai16;
2792 fromstr = NEXTFROM;
2793 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2794 ai16 = PerlSock_htons(ai16);
3a88beaa 2795 PUSH16(utf8, cur, &ai16, FALSE);
1604cfb0
MS
2796 }
2797 break;
2798 case 'v' | TYPE_IS_SHRIEKING:
2799 case 'v':
2800 while (len-- > 0) {
2801 I16 ai16;
2802 fromstr = NEXTFROM;
2803 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2804 ai16 = htovs(ai16);
3a88beaa 2805 PUSH16(utf8, cur, &ai16, FALSE);
1604cfb0
MS
2806 }
2807 break;
49704364 2808 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2809#if SHORTSIZE != SIZE16
1604cfb0
MS
2810 while (len-- > 0) {
2811 unsigned short aushort;
2812 fromstr = NEXTFROM;
2813 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2814 PUSH_VAR(utf8, cur, aushort, needs_swap);
1604cfb0 2815 }
49704364
WL
2816 break;
2817#else
924ba076 2818 /* FALLTHROUGH */
a6ec74c1 2819#endif
1604cfb0
MS
2820 case 'S':
2821 while (len-- > 0) {
2822 U16 au16;
2823 fromstr = NEXTFROM;
2824 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2825 PUSH16(utf8, cur, &au16, needs_swap);
1604cfb0
MS
2826 }
2827 break;
2828 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2829#if SHORTSIZE != SIZE16
1604cfb0
MS
2830 while (len-- > 0) {
2831 short ashort;
2832 fromstr = NEXTFROM;
2833 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2834 PUSH_VAR(utf8, cur, ashort, needs_swap);
1604cfb0 2835 }
49704364
WL
2836 break;
2837#else
924ba076 2838 /* FALLTHROUGH */
a6ec74c1 2839#endif
1604cfb0
MS
2840 case 's':
2841 while (len-- > 0) {
2842 I16 ai16;
2843 fromstr = NEXTFROM;
2844 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2845 PUSH16(utf8, cur, &ai16, needs_swap);
1604cfb0
MS
2846 }
2847 break;
2848 case 'I':
2849 case 'I' | TYPE_IS_SHRIEKING:
2850 while (len-- > 0) {
2851 unsigned int auint;
2852 fromstr = NEXTFROM;
2853 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2854 PUSH_VAR(utf8, cur, auint, needs_swap);
1604cfb0
MS
2855 }
2856 break;
2857 case 'j':
2858 while (len-- > 0) {
2859 IV aiv;
2860 fromstr = NEXTFROM;
2861 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2862 PUSH_VAR(utf8, cur, aiv, needs_swap);
1604cfb0
MS
2863 }
2864 break;
2865 case 'J':
2866 while (len-- > 0) {
2867 UV auv;
2868 fromstr = NEXTFROM;
2869 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2870 PUSH_VAR(utf8, cur, auv, needs_swap);
1604cfb0
MS
2871 }
2872 break;
2873 case 'w':
a6ec74c1 2874 while (len-- > 0) {
1604cfb0
MS
2875 NV anv;
2876 fromstr = NEXTFROM;
2877 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2878 anv = SvNV_nomg(fromstr);
2879
2880 if (anv < 0) {
2881 *cur = '\0';
2882 SvCUR_set(cat, cur - start);
2883 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2884 }
a6ec74c1 2885
196b62db
NC
2886 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2887 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2888 any negative IVs will have already been got by the croak()
2889 above. IOK is untrue for fractions, so we test them
2890 against UV_MAX_P1. */
1604cfb0
MS
2891 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2892 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2893 char *in = buf + sizeof(buf);
2894 UV auv = SvUV_nomg(fromstr);
2895
2896 do {
2897 *--in = (char)((auv & 0x7f) | 0x80);
2898 auv >>= 7;
2899 } while (auv);
2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2903 } else if (SvPOKp(fromstr))
2904 goto w_string;
2905 else if (SvNOKp(fromstr)) {
2906 /* 10**NV_MAX_10_EXP is the largest power of 10
2907 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2908 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2909 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2910 And with that many bytes only Inf can overflow.
2911 Some C compilers are strict about integral constant
2912 expressions so we conservatively divide by a slightly
2913 smaller integer instead of multiplying by the exact
2914 floating-point value.
2915 */
0258719b 2916#ifdef NV_MAX_10_EXP
1604cfb0
MS
2917 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2918 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2919#else
1604cfb0
MS
2920 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2921 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2922#endif
1604cfb0
MS
2923 char *in = buf + sizeof(buf);
2924
2925 anv = Perl_floor(anv);
2926 do {
2927 const NV next = Perl_floor(anv / 128);
2928 if (in <= buf) /* this cannot happen ;-) */
2929 Perl_croak(aTHX_ "Cannot compress integer in pack");
2930 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2931 anv = next;
2932 } while (anv > 0);
2933 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2934 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 in, (buf + sizeof(buf)) - in);
2936 } else {
2937 const char *from;
2938 char *result, *in;
2939 SV *norm;
2940 STRLEN len;
2941 bool done;
2942
2943 w_string:
2944 /* Copy string and check for compliance */
2945 from = SvPV_nomg_const(fromstr, len);
2946 if ((norm = is_an_int(from, len)) == NULL)
2947 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2948
2949 Newx(result, len, char);
2950 in = result + len;
2951 done = FALSE;
2952 while (!done) *--in = div128(norm, &done) | 0x80;
2953 result[len - 1] &= 0x7F; /* clear continue bit */
2954 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2955 in, (result + len) - in);
2956 Safefree(result);
2957 SvREFCNT_dec(norm); /* free norm */
2958 }
2959 }
2960 break;
2961 case 'i':
2962 case 'i' | TYPE_IS_SHRIEKING:
2963 while (len-- > 0) {
2964 int aint;
2965 fromstr = NEXTFROM;
2966 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2967 PUSH_VAR(utf8, cur, aint, needs_swap);
1604cfb0
MS
2968 }
2969 break;
2970 case 'N' | TYPE_IS_SHRIEKING:
2971 case 'N':
2972 while (len-- > 0) {
2973 U32 au32;
2974 fromstr = NEXTFROM;
2975 au32 = SvUV_no_inf(fromstr, datumtype);
2976 au32 = PerlSock_htonl(au32);
3a88beaa 2977 PUSH32(utf8, cur, &au32, FALSE);
1604cfb0
MS
2978 }
2979 break;
2980 case 'V' | TYPE_IS_SHRIEKING:
2981 case 'V':
2982 while (len-- > 0) {
2983 U32 au32;
2984 fromstr = NEXTFROM;
2985 au32 = SvUV_no_inf(fromstr, datumtype);
2986 au32 = htovl(au32);
3a88beaa 2987 PUSH32(utf8, cur, &au32, FALSE);
1604cfb0
MS
2988 }
2989 break;
2990 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2991#if LONGSIZE != SIZE32
1604cfb0
MS
2992 while (len-- > 0) {
2993 unsigned long aulong;
2994 fromstr = NEXTFROM;
2995 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2996 PUSH_VAR(utf8, cur, aulong, needs_swap);
1604cfb0
MS
2997 }
2998 break;
49704364
WL
2999#else
3000 /* Fall though! */
a6ec74c1 3001#endif
1604cfb0
MS
3002 case 'L':
3003 while (len-- > 0) {
3004 U32 au32;
3005 fromstr = NEXTFROM;
3006 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3007 PUSH32(utf8, cur, &au32, needs_swap);
1604cfb0
MS
3008 }
3009 break;
3010 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3011#if LONGSIZE != SIZE32
1604cfb0
MS
3012 while (len-- > 0) {
3013 long along;
3014 fromstr = NEXTFROM;
3015 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3016 PUSH_VAR(utf8, cur, along, needs_swap);
1604cfb0
MS
3017 }
3018 break;
49704364
WL
3019#else
3020 /* Fall though! */
a6ec74c1 3021#endif
1604cfb0 3022 case 'l':
49704364 3023 while (len-- > 0) {
1604cfb0
MS
3024 I32 ai32;
3025 fromstr = NEXTFROM;
3026 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3027 PUSH32(utf8, cur, &ai32, needs_swap);
1604cfb0
MS
3028 }
3029 break;
c174bf3b 3030#if defined(HAS_QUAD) && IVSIZE >= 8
1604cfb0
MS
3031 case 'Q':
3032 while (len-- > 0) {
3033 Uquad_t auquad;
3034 fromstr = NEXTFROM;
3035 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3036 PUSH_VAR(utf8, cur, auquad, needs_swap);
1604cfb0
MS
3037 }
3038 break;
3039 case 'q':
3040 while (len-- > 0) {
3041 Quad_t aquad;
3042 fromstr = NEXTFROM;
3043 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3044 PUSH_VAR(utf8, cur, aquad, needs_swap);
1604cfb0
MS
3045 }
3046 break;
1640b983 3047#endif
1604cfb0
MS
3048 case 'P':
3049 len = 1; /* assume SV is correct length */
3050 GROWING(utf8, cat, start, cur, sizeof(char *));
3051 /* FALLTHROUGH */
3052 case 'p':
3053 while (len-- > 0) {
3054 const char *aptr;
3055
3056 fromstr = NEXTFROM;
3057 SvGETMAGIC(fromstr);
3058 if (!SvOK(fromstr)) aptr = NULL;
3059 else {
3060 /* XXX better yet, could spirit away the string to
3061 * a safe spot and hang on to it until the result
3062 * of pack() (and all copies of the result) are
3063 * gone.
3064 */
3065 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3066 || (SvPADTMP(fromstr) &&
3067 !SvREADONLY(fromstr)))) {
3068 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3069 "Attempt to pack pointer to temporary value");
3070 }
3071 if (SvPOK(fromstr) || SvNIOK(fromstr))
3072 aptr = SvPV_nomg_const_nolen(fromstr);
3073 else
3074 aptr = SvPV_force_flags_nolen(fromstr, 0);
3075 }
3a88beaa 3076 PUSH_VAR(utf8, cur, aptr, needs_swap);
1604cfb0
MS
3077 }
3078 break;
3079 case 'u': {
3080 const char *aptr, *aend;
3081 bool from_utf8;
3082
3083 fromstr = NEXTFROM;
3084 if (len <= 2) len = 45;
3085 else len = len / 3 * 3;
3086 if (len >= 64) {
3087 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3088 "Field too wide in 'u' format in pack");
3089 len = 63;
3090 }
3091 aptr = SvPV_const(fromstr, fromlen);
3092 from_utf8 = DO_UTF8(fromstr);
3093 if (from_utf8) {
3094 aend = aptr + fromlen;
3095 fromlen = sv_len_utf8_nomg(fromstr);
3096 } else aend = NULL; /* Unused, but keep compilers happy */
3097 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3098 while (fromlen > 0) {
3099 U8 *end;
3100 SSize_t todo;
3101 U8 hunk[1+63/3*4+1];
3102
3103 if ((SSize_t)fromlen > len)
3104 todo = len;
3105 else
3106 todo = fromlen;
3107 if (from_utf8) {
3108 char buffer[64];
3109 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3110 'u' | TYPE_IS_PACK)) {
3111 *cur = '\0';
3112 SvCUR_set(cat, cur - start);
3113 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3114 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3115 aptr, aend, buffer, todo);
3116 }
3117 end = doencodes(hunk, (const U8 *)buffer, todo);
3118 } else {
3119 end = doencodes(hunk, (const U8 *)aptr, todo);
3120 aptr += todo;
3121 }
3122 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3123 fromlen -= todo;
3124 }
3125 break;
3126 }
3127 }
3128 *cur = '\0';
3129 SvCUR_set(cat, cur - start);
f337b084 3130 no_change:
1604cfb0 3131 *symptr = lookahead;
a6ec74c1 3132 }
49704364 3133 return beglist;
18529408
IZ
3134}
3135#undef NEXTFROM
3136
3137
3138PP(pp_pack)
3139{
20b7effb 3140 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3141 SV *cat = TARG;
18529408 3142 STRLEN fromlen;
349d4f2f 3143 SV *pat_sv = *++MARK;
eb578fdb
KW
3144 const char *pat = SvPV_const(pat_sv, fromlen);
3145 const char *patend = pat + fromlen;
18529408
IZ
3146
3147 MARK++;
e8f7c79f 3148 SvPVCLEAR(cat);
f337b084 3149 SvUTF8_off(cat);
18529408 3150
7accc089 3151 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3152
fd879d93
KW
3153 if (SvUTF8(cat)) {
3154 STRLEN result_len;
3155 const char * result = SvPV_nomg(cat, result_len);
3156 const U8 * error_pos;
3157
3158 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3159 _force_out_malformed_utf8_message(error_pos,
3160 (U8 *) result + result_len,
3161 0, /* no flags */
3162 1 /* Die */
3163 );
3164 NOT_REACHED; /* NOTREACHED */
3165 }
3166 }
3167
a6ec74c1
JH
3168 SvSETMAGIC(cat);
3169 SP = ORIGMARK;
3170 PUSHs(cat);
3171 RETURN;
3172}
a6ec74c1 3173
73cb7263 3174/*
14d04a33 3175 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3176 */