This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.c: Make safer a deprecated function
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
f7fe979e
AL
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
e1b825c1
Z
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
f7fe979e
AL
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
41f35ffd 115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
08ca2aa3
TH
116# define OFF16(p) ((char *) (p))
117# define OFF32(p) ((char *) (p))
41f35ffd
AC
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119# define OFF16(p) ((char*)(p))
120# define OFF32(p) ((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125# error "bad cray byte order"
a6ec74c1
JH
126#endif
127
3a88beaa
NC
128#define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
08ca2aa3 132
a1219b5e
NC
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138# error "Unsupported byteorder"
20aa3a38
NC
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
9df874cd 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
20aa3a38
NC
142 logic adding to deal with any mixed-endian transformations needed.
143 */
a1219b5e
NC
144#endif
145
08ca2aa3 146/* Only to be used inside a loop (see the break) */
aaec8192 147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
08ca2aa3 148STMT_START { \
228e69a7 149 if (UNLIKELY(utf8)) { \
9df874cd 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
275663fa 151 (char *) (buf), len, datumtype)) break; \
08ca2aa3 152 } else { \
228e69a7 153 if (UNLIKELY(needs_swap)) \
20aa3a38
NC
154 S_reverse_copy(s, (char *) (buf), len); \
155 else \
156 Copy(s, (char *) (buf), len, char); \
275663fa 157 s += len; \
08ca2aa3 158 } \
08ca2aa3
TH
159} STMT_END
160
aaec8192
NC
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
7285e3f4 163
aaec8192
NC
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
7285e3f4 166
aaec8192
NC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
275663fa 169
3a88beaa
NC
170#define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
0f12654f 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
e6006968
KW
215#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
216 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
a6ec74c1 217
66c611c5 218/* type modifiers */
62f95557 219#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
220#define TYPE_IS_BIG_ENDIAN 0x200
221#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 222#define TYPE_IS_PACK 0x800
1109a392 223#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 224#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
225#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
226
7212898e
NC
227# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
228# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
229
230# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
231
78d46eaa 232#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 233#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
234#define PACK_SIZE_MASK 0x3F
235
4ad176ef 236#include "packsizetables.inc"
78d46eaa 237
20aa3a38
NC
238static void
239S_reverse_copy(const char *src, char *dest, STRLEN len)
240{
241 dest += len;
242 while (len--)
243 *--dest = *src++;
244}
245
08ca2aa3 246STATIC U8
9df874cd 247utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 248{
08ca2aa3 249 STRLEN retlen;
df337714
KW
250 UV val;
251
252 if (*s >= end) {
253 goto croak;
254 }
255 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714
KW
257 if (retlen == (STRLEN) -1)
258 croak:
f337b084
TH
259 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
260 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 261 if (val >= 0x100) {
a2a5de95
NC
262 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
263 "Character in '%c' format wrapped in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
265 val &= 0xff;
266 }
267 *s += retlen;
fe2774ed 268 return (U8)val;
08ca2aa3
TH
269}
270
f337b084 271#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
9df874cd 272 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
273 *(U8 *)(s)++)
274
275STATIC bool
e1b825c1 276S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
08ca2aa3
TH
277{
278 UV val;
279 STRLEN retlen;
f7fe979e 280 const char *from = *s;
08ca2aa3 281 int bad = 0;
f7fe979e 282 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3 283 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
20aa3a38
NC
284 const bool needs_swap = NEEDS_SWAP(datumtype);
285
228e69a7 286 if (UNLIKELY(needs_swap))
20aa3a38
NC
287 buf += buf_len;
288
08ca2aa3
TH
289 for (;buf_len > 0; buf_len--) {
290 if (from >= end) return FALSE;
f337b084 291 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
df337714 292 if (retlen == (STRLEN) -1) {
08ca2aa3
TH
293 from += UTF8SKIP(from);
294 bad |= 1;
295 } else from += retlen;
296 if (val >= 0x100) {
297 bad |= 2;
298 val &= 0xff;
299 }
228e69a7 300 if (UNLIKELY(needs_swap))
20aa3a38
NC
301 *(U8 *)--buf = (U8)val;
302 else
303 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
304 }
305 /* We have enough characters for the buffer. Did we have problems ? */
306 if (bad) {
307 if (bad & 1) {
308 /* Rewalk the string fragment while warning */
f7fe979e 309 const char *ptr;
e1b825c1 310 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
311 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
312 if (ptr >= end) break;
c80e42f3 313 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
314 }
315 if (from > end) from = end;
316 }
a2a5de95
NC
317 if ((bad & 2))
318 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 319 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
320 "Character(s) in '%c' format wrapped in %s",
321 (int) TYPE_NO_MODIFIERS(datumtype),
322 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
323 }
324 *s = from;
325 return TRUE;
326}
327
64844641 328STATIC char *
d21d7215
KW
329S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
330 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
7918f24d 331
228e69a7 332 if (UNLIKELY(needs_swap)) {
3a88beaa
NC
333 const U8 *p = start + len;
334 while (p-- > start) {
55d09dc8 335 append_utf8_from_native_byte(*p, (U8 **) & dest);
3a88beaa
NC
336 }
337 } else {
338 const U8 * const end = start + len;
339 while (start < end) {
55d09dc8 340 append_utf8_from_native_byte(*start, (U8 **) & dest);
3a88beaa
NC
341 start++;
342 }
f337b084 343 }
64844641 344 return dest;
f337b084
TH
345}
346
3a88beaa 347#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
230e1fce 348STMT_START { \
228e69a7 349 if (UNLIKELY(utf8)) \
d21d7215 350 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
230e1fce 351 else { \
228e69a7 352 if (UNLIKELY(needs_swap)) \
3a88beaa
NC
353 S_reverse_copy((char *)(buf), cur, len); \
354 else \
355 Copy(buf, cur, len, char); \
230e1fce
NC
356 (cur) += (len); \
357 } \
f337b084
TH
358} STMT_END
359
f17fed50
TC
360#define SAFE_UTF8_EXPAND(var) \
361STMT_START { \
362 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
363 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
364 (var) = (var) * UTF8_EXPAND; \
365} STMT_END
366
367#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
368STMT_START { \
369 if (SSize_t_MAX / (item_size) < (item_count)) \
370 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
371 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
372} STMT_END
373
f337b084
TH
374#define GROWING(utf8, cat, start, cur, in_len) \
375STMT_START { \
376 STRLEN glen = (in_len); \
f17fed50
TC
377 STRLEN catcur = (STRLEN)((cur) - (start)); \
378 if (utf8) SAFE_UTF8_EXPAND(glen); \
379 if (SSize_t_MAX - glen < catcur) \
380 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
381 if (catcur + glen >= SvLEN(cat)) { \
0bd48802 382 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
383 (cur) = (start) + SvCUR(cat); \
384 } \
385} STMT_END
386
387#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
388STMT_START { \
f7fe979e 389 const STRLEN glen = (in_len); \
f337b084 390 STRLEN gl = glen; \
f17fed50 391 if (utf8) SAFE_UTF8_EXPAND(gl); \
f337b084
TH
392 if ((cur) + gl >= (start) + SvLEN(cat)) { \
393 *cur = '\0'; \
b162af07 394 SvCUR_set((cat), (cur) - (start)); \
0bd48802 395 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
396 (cur) = (start) + SvCUR(cat); \
397 } \
3a88beaa 398 PUSH_BYTES(utf8, cur, buf, glen, 0); \
f337b084
TH
399} STMT_END
400
401#define PUSH_BYTE(utf8, s, byte) \
402STMT_START { \
403 if (utf8) { \
f7fe979e 404 const U8 au8 = (byte); \
d21d7215 405 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
f337b084
TH
406 } else *(U8 *)(s)++ = (byte); \
407} STMT_END
408
409/* Only to be used inside a loop (see the break) */
410#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
411STMT_START { \
412 STRLEN retlen; \
413 if (str >= end) break; \
414 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
df337714 415 if (retlen == (STRLEN) -1) { \
f337b084
TH
416 *cur = '\0'; \
417 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
418 } \
419 str += retlen; \
420} STMT_END
421
f7fe979e
AL
422static const char *_action( const tempsym_t* symptr )
423{
10edeb5d 424 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
425}
426
206947d2 427/* Returns the sizeof() struct described by pat */
e1b825c1 428STATIC SSize_t
f337b084 429S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 430{
e1b825c1 431 SSize_t total = 0;
206947d2 432
7918f24d
NC
433 PERL_ARGS_ASSERT_MEASURE_STRUCT;
434
49704364 435 while (next_symbol(symptr)) {
e1b825c1 436 SSize_t len, size;
f337b084
TH
437
438 switch (symptr->howlen) {
fc241834 439 case e_star:
49704364 440 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 441 _action( symptr ) );
81d52ecd 442
f337b084
TH
443 default:
444 /* e_no_len and e_number */
445 len = symptr->length;
446 break;
49704364
WL
447 }
448
a7a3cfaa 449 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 450 if (!size) {
e1b825c1 451 SSize_t star;
80a13697
NC
452 /* endianness doesn't influence the size of a type */
453 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
454 default:
455 Perl_croak(aTHX_ "Invalid type '%c' in %s",
456 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 457 _action( symptr ) );
28be1210
TH
458 case '.' | TYPE_IS_SHRIEKING:
459 case '@' | TYPE_IS_SHRIEKING:
80a13697 460 case '@':
28be1210 461 case '.':
80a13697
NC
462 case '/':
463 case 'U': /* XXXX Is it correct? */
464 case 'w':
465 case 'u':
466 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 467 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 468 _action( symptr ) );
80a13697
NC
469 case '%':
470 size = 0;
471 break;
472 case '(':
fc241834
RGS
473 {
474 tempsym_t savsym = *symptr;
475 symptr->patptr = savsym.grpbeg;
476 symptr->patend = savsym.grpend;
477 /* XXXX Theoretically, we need to measure many times at
478 different positions, since the subexpression may contain
479 alignment commands, but be not of aligned length.
480 Need to detect this and croak(). */
481 size = measure_struct(symptr);
482 *symptr = savsym;
483 break;
484 }
80a13697
NC
485 case 'X' | TYPE_IS_SHRIEKING:
486 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
487 */
488 if (!len) /* Avoid division by 0 */
489 len = 1;
490 len = total % len; /* Assumed: the start is aligned. */
924ba076 491 /* FALLTHROUGH */
80a13697
NC
492 case 'X':
493 size = -1;
494 if (total < len)
f7fe979e 495 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
496 break;
497 case 'x' | TYPE_IS_SHRIEKING:
498 if (!len) /* Avoid division by 0 */
499 len = 1;
500 star = total % len; /* Assumed: the start is aligned. */
501 if (star) /* Other portable ways? */
502 len = len - star;
503 else
504 len = 0;
924ba076 505 /* FALLTHROUGH */
80a13697
NC
506 case 'x':
507 case 'A':
508 case 'Z':
509 case 'a':
80a13697
NC
510 size = 1;
511 break;
512 case 'B':
513 case 'b':
514 len = (len + 7)/8;
515 size = 1;
516 break;
517 case 'H':
518 case 'h':
519 len = (len + 1)/2;
520 size = 1;
521 break;
78d46eaa 522
80a13697
NC
523 case 'P':
524 len = 1;
525 size = sizeof(char*);
78d46eaa
NC
526 break;
527 }
206947d2
IZ
528 }
529 total += len * size;
530 }
531 return total;
532}
533
49704364
WL
534
535/* locate matching closing parenthesis or bracket
536 * returns char pointer to char after match, or NULL
537 */
f7fe979e 538STATIC const char *
5aaab254 539S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 540{
7918f24d
NC
541 PERL_ARGS_ASSERT_GROUP_END;
542
49704364 543 while (patptr < patend) {
f7fe979e 544 const char c = *patptr++;
49704364
WL
545
546 if (isSPACE(c))
547 continue;
548 else if (c == ender)
549 return patptr-1;
550 else if (c == '#') {
551 while (patptr < patend && *patptr != '\n')
552 patptr++;
553 continue;
554 } else if (c == '(')
555 patptr = group_end(patptr, patend, ')') + 1;
556 else if (c == '[')
557 patptr = group_end(patptr, patend, ']') + 1;
18529408 558 }
49704364
WL
559 Perl_croak(aTHX_ "No group ending character '%c' found in template",
560 ender);
a25b5927 561 NOT_REACHED; /* NOTREACHED */
18529408
IZ
562}
563
49704364
WL
564
565/* Convert unsigned decimal number to binary.
566 * Expects a pointer to the first digit and address of length variable
567 * Advances char pointer to 1st non-digit char and returns number
fc241834 568 */
f7fe979e 569STATIC const char *
e1b825c1 570S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
49704364 571{
e1b825c1 572 SSize_t len = *patptr++ - '0';
7918f24d
NC
573
574 PERL_ARGS_ASSERT_GET_NUM;
575
49704364 576 while (isDIGIT(*patptr)) {
e1b825c1
Z
577 SSize_t nlen = (len * 10) + (*patptr++ - '0');
578 if (nlen < 0 || nlen/10 != len)
49704364 579 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
e1b825c1 580 len = nlen;
49704364
WL
581 }
582 *lenptr = len;
583 return patptr;
584}
585
586/* The marvellous template parsing routine: Using state stored in *symptr,
587 * locates next template code and count
588 */
589STATIC bool
f337b084 590S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 591{
f7fe979e 592 const char* patptr = symptr->patptr;
0bcc34c2 593 const char* const patend = symptr->patend;
49704364 594
7918f24d
NC
595 PERL_ARGS_ASSERT_NEXT_SYMBOL;
596
49704364
WL
597 symptr->flags &= ~FLAG_SLASH;
598
599 while (patptr < patend) {
600 if (isSPACE(*patptr))
601 patptr++;
602 else if (*patptr == '#') {
603 patptr++;
604 while (patptr < patend && *patptr != '\n')
605 patptr++;
606 if (patptr < patend)
607 patptr++;
608 } else {
fc241834 609 /* We should have found a template code */
49704364 610 I32 code = *patptr++ & 0xFF;
66c611c5 611 U32 inherited_modifiers = 0;
49704364
WL
612
613 if (code == ','){ /* grandfather in commas but with a warning */
614 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
615 symptr->flags |= FLAG_COMMA;
616 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 617 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
618 }
619 continue;
620 }
fc241834 621
49704364 622 /* for '(', skip to ')' */
fc241834 623 if (code == '(') {
49704364
WL
624 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
625 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 626 _action( symptr ) );
49704364
WL
627 symptr->grpbeg = patptr;
628 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
629 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
630 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 631 _action( symptr ) );
49704364
WL
632 }
633
66c611c5
MHM
634 /* look for group modifiers to inherit */
635 if (TYPE_ENDIANNESS(symptr->flags)) {
636 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
637 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
638 }
639
1109a392
MHM
640 /* look for modifiers */
641 while (patptr < patend) {
b7787f18
AL
642 const char *allowed;
643 I32 modifier;
1109a392
MHM
644 switch (*patptr) {
645 case '!':
646 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 647 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
648 break;
649 case '>':
650 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 651 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
652 break;
653 case '<':
654 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 655 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
656 break;
657 default:
b7787f18
AL
658 allowed = "";
659 modifier = 0;
1109a392
MHM
660 break;
661 }
66c611c5 662
1109a392
MHM
663 if (modifier == 0)
664 break;
66c611c5 665
1109a392
MHM
666 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
667 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 668 allowed, _action( symptr ) );
66c611c5
MHM
669
670 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 671 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 672 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
673 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
674 TYPE_ENDIANNESS_MASK)
675 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 676 *patptr, _action( symptr ) );
66c611c5 677
a2a5de95
NC
678 if ((code & modifier)) {
679 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
680 "Duplicate modifier '%c' after '%c' in %s",
681 *patptr, (int) TYPE_NO_MODIFIERS(code),
682 _action( symptr ) );
1109a392 683 }
66c611c5 684
1109a392
MHM
685 code |= modifier;
686 patptr++;
49704364
WL
687 }
688
66c611c5
MHM
689 /* inherit modifiers */
690 code |= inherited_modifiers;
691
fc241834 692 /* look for count and/or / */
49704364
WL
693 if (patptr < patend) {
694 if (isDIGIT(*patptr)) {
695 patptr = get_num( patptr, &symptr->length );
696 symptr->howlen = e_number;
697
698 } else if (*patptr == '*') {
699 patptr++;
700 symptr->howlen = e_star;
701
702 } else if (*patptr == '[') {
f7fe979e 703 const char* lenptr = ++patptr;
49704364
WL
704 symptr->howlen = e_number;
705 patptr = group_end( patptr, patend, ']' ) + 1;
706 /* what kind of [] is it? */
707 if (isDIGIT(*lenptr)) {
708 lenptr = get_num( lenptr, &symptr->length );
709 if( *lenptr != ']' )
710 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 711 _action( symptr ) );
49704364
WL
712 } else {
713 tempsym_t savsym = *symptr;
714 symptr->patend = patptr-1;
715 symptr->patptr = lenptr;
716 savsym.length = measure_struct(symptr);
717 *symptr = savsym;
718 }
719 } else {
720 symptr->howlen = e_no_len;
721 symptr->length = 1;
722 }
723
724 /* try to find / */
725 while (patptr < patend) {
726 if (isSPACE(*patptr))
727 patptr++;
728 else if (*patptr == '#') {
729 patptr++;
730 while (patptr < patend && *patptr != '\n')
731 patptr++;
732 if (patptr < patend)
733 patptr++;
734 } else {
66c611c5 735 if (*patptr == '/') {
49704364
WL
736 symptr->flags |= FLAG_SLASH;
737 patptr++;
66c611c5
MHM
738 if (patptr < patend &&
739 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 740 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 741 _action( symptr ) );
49704364
WL
742 }
743 break;
744 }
18529408 745 }
49704364
WL
746 } else {
747 /* at end - no count, no / */
748 symptr->howlen = e_no_len;
749 symptr->length = 1;
750 }
751
752 symptr->code = code;
fc241834 753 symptr->patptr = patptr;
49704364 754 return TRUE;
18529408 755 }
49704364 756 }
fc241834 757 symptr->patptr = patptr;
49704364 758 return FALSE;
18529408
IZ
759}
760
18529408 761/*
fc241834 762 There is no way to cleanly handle the case where we should process the
08ca2aa3 763 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
764 (e.g. estimates like strend-s as an upper bound for the number of
765 characters left wouldn't work). So if we foresee the need of this
766 (pattern starts with U or contains U0), we want to work on the encoded
767 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
768 themselves if they need to do a lot of unpacks like this on it
769*/
fc241834 770STATIC bool
08ca2aa3
TH
771need_utf8(const char *pat, const char *patend)
772{
773 bool first = TRUE;
7918f24d
NC
774
775 PERL_ARGS_ASSERT_NEED_UTF8;
776
08ca2aa3
TH
777 while (pat < patend) {
778 if (pat[0] == '#') {
779 pat++;
f7fe979e 780 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
781 if (!pat) return FALSE;
782 } else if (pat[0] == 'U') {
783 if (first || pat[1] == '0') return TRUE;
784 } else first = FALSE;
785 pat++;
786 }
787 return FALSE;
788}
789
790STATIC char
791first_symbol(const char *pat, const char *patend) {
7918f24d
NC
792 PERL_ARGS_ASSERT_FIRST_SYMBOL;
793
08ca2aa3
TH
794 while (pat < patend) {
795 if (pat[0] != '#') return pat[0];
796 pat++;
f7fe979e 797 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
798 if (!pat) return 0;
799 pat++;
800 }
801 return 0;
802}
803
804/*
dcccc8ff
KW
805
806=head1 Pack and Unpack
807
7accc089
JH
808=for apidoc unpackstring
809
796b6530 810The engine implementing the C<unpack()> Perl function.
21ebfc7a 811
796b6530
KW
812Using the template C<pat..patend>, this function unpacks the string
813C<s..strend> into a number of mortal SVs, which it pushes onto the perl
814argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
72d33970 815C<SPAGAIN> after the call to this function). It returns the number of
21ebfc7a
DM
816pushed elements.
817
796b6530
KW
818The C<strend> and C<patend> pointers should point to the byte following the
819last character of each string.
21ebfc7a
DM
820
821Although this function returns its values on the perl argument stack, it
822doesn't take any parameters from that stack (and thus in particular
796b6530 823there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
21ebfc7a 824example).
7accc089
JH
825
826=cut */
827
e1b825c1 828SSize_t
f7fe979e 829Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 830{
f7fe979e 831 tempsym_t sym;
08ca2aa3 832
7918f24d
NC
833 PERL_ARGS_ASSERT_UNPACKSTRING;
834
f337b084 835 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
836 else if (need_utf8(pat, patend)) {
837 /* We probably should try to avoid this in case a scalar context call
838 wouldn't get to the "U0" */
839 STRLEN len = strend - s;
230e1fce 840 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
841 SAVEFREEPV(s);
842 strend = s + len;
f337b084 843 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
844 }
845
f337b084
TH
846 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
847 flags |= FLAG_PARSE_UTF8;
08ca2aa3 848
f7fe979e 849 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
850
851 return unpack_rec(&sym, s, s, strend, NULL );
852}
853
e1b825c1 854STATIC SSize_t
f7fe979e 855S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 856{
20b7effb 857 dSP;
3297d27d 858 SV *sv = NULL;
e1b825c1 859 const SSize_t start_sp_offset = SP - PL_stack_base;
49704364 860 howlen_t howlen;
e1b825c1 861 SSize_t checksum = 0;
92d41999 862 UV cuv = 0;
a6ec74c1 863 NV cdouble = 0.0;
e1b825c1 864 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 865 bool beyond = FALSE;
21c16052 866 bool explicit_length;
9e27e96a 867 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 868 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
869
870 PERL_ARGS_ASSERT_UNPACK_REC;
871
28be1210 872 symptr->strbeg = s - strbeg;
49704364 873
49704364 874 while (next_symbol(symptr)) {
a7a3cfaa 875 packprops_t props;
e1b825c1 876 SSize_t len;
f337b084 877 I32 datumtype = symptr->code;
a1219b5e 878 bool needs_swap;
206947d2 879 /* do first one only unless in list context
08ca2aa3 880 / is implemented by unpacking the count, then popping it from the
206947d2 881 stack, so must check that we're not in the middle of a / */
49704364 882 if ( unpack_only_one
206947d2 883 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 884 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 885 break;
49704364 886
f337b084 887 switch (howlen = symptr->howlen) {
fc241834
RGS
888 case e_star:
889 len = strend - strbeg; /* long enough */
49704364 890 break;
f337b084
TH
891 default:
892 /* e_no_len and e_number */
893 len = symptr->length;
894 break;
49704364 895 }
18529408 896
21c16052 897 explicit_length = TRUE;
a6ec74c1 898 redo_switch:
49704364 899 beyond = s >= strend;
a7a3cfaa
TH
900
901 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
902 if (props) {
903 /* props nonzero means we can process this letter. */
e1b825c1
Z
904 const SSize_t size = props & PACK_SIZE_MASK;
905 const SSize_t howmany = (strend - s) / size;
a7a3cfaa
TH
906 if (len > howmany)
907 len = howmany;
908
909 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
910 if (len && unpack_only_one) len = 1;
911 EXTEND(SP, len);
912 EXTEND_MORTAL(len);
78d46eaa
NC
913 }
914 }
a7a3cfaa 915
a1219b5e
NC
916 needs_swap = NEEDS_SWAP(datumtype);
917
1109a392 918 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 919 default:
1109a392 920 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 921
a6ec74c1 922 case '%':
49704364 923 if (howlen == e_no_len)
18529408 924 len = 16; /* len is not specified */
a6ec74c1 925 checksum = len;
92d41999 926 cuv = 0;
a6ec74c1 927 cdouble = 0;
18529408 928 continue;
81d52ecd 929
18529408
IZ
930 case '(':
931 {
49704364 932 tempsym_t savsym = *symptr;
9e27e96a 933 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 934 symptr->flags |= group_modifiers;
49704364 935 symptr->patend = savsym.grpend;
28be1210 936 symptr->previous = &savsym;
49704364 937 symptr->level++;
18529408 938 PUTBACK;
c6f750d1 939 if (len && unpack_only_one) len = 1;
18529408 940 while (len--) {
49704364 941 symptr->patptr = savsym.grpbeg;
f337b084
TH
942 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
943 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
944 unpack_rec(symptr, s, strbeg, strend, &s);
945 if (s == strend && savsym.howlen == e_star)
49704364 946 break; /* No way to continue */
18529408
IZ
947 }
948 SPAGAIN;
28be1210 949 savsym.flags = symptr->flags & ~group_modifiers;
49704364 950 *symptr = savsym;
18529408
IZ
951 break;
952 }
28be1210 953 case '.' | TYPE_IS_SHRIEKING:
28be1210 954 case '.': {
9e27e96a 955 const char *from;
28be1210 956 SV *sv;
9e27e96a 957 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
958 if (howlen == e_star) from = strbeg;
959 else if (len <= 0) from = s;
960 else {
961 tempsym_t *group = symptr;
962
963 while (--len && group) group = group->previous;
964 from = group ? strbeg + group->strbeg : strbeg;
965 }
966 sv = from <= s ?
00646304
CB
967 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
968 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 969 mXPUSHs(sv);
28be1210
TH
970 break;
971 }
28be1210 972 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 973 case '@':
28be1210 974 s = strbeg + symptr->strbeg;
28be1210 975 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 976 {
08ca2aa3
TH
977 while (len > 0) {
978 if (s >= strend)
979 Perl_croak(aTHX_ "'@' outside of string in unpack");
980 s += UTF8SKIP(s);
981 len--;
982 }
983 if (s > strend)
984 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
985 } else {
28be1210 986 if (strend-s < len)
fc241834 987 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 988 s += len;
08ca2aa3 989 }
a6ec74c1 990 break;
62f95557
IZ
991 case 'X' | TYPE_IS_SHRIEKING:
992 if (!len) /* Avoid division by 0 */
993 len = 1;
08ca2aa3 994 if (utf8) {
f7fe979e 995 const char *hop, *last;
e1b825c1 996 SSize_t l = len;
f337b084
TH
997 hop = last = strbeg;
998 while (hop < s) {
999 hop += UTF8SKIP(hop);
1000 if (--l == 0) {
08ca2aa3 1001 last = hop;
f337b084
TH
1002 l = len;
1003 }
fc241834 1004 }
f337b084
TH
1005 if (last > s)
1006 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1007 s = last;
1008 break;
f337b084
TH
1009 }
1010 len = (s - strbeg) % len;
924ba076 1011 /* FALLTHROUGH */
a6ec74c1 1012 case 'X':
08ca2aa3
TH
1013 if (utf8) {
1014 while (len > 0) {
1015 if (s <= strbeg)
1016 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1017 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1018 if (s <= strbeg)
1019 Perl_croak(aTHX_ "'X' outside of string in unpack");
1020 }
1021 len--;
1022 }
1023 } else {
fc241834
RGS
1024 if (len > s - strbeg)
1025 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1026 s -= len;
08ca2aa3 1027 }
a6ec74c1 1028 break;
9e27e96a 1029 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 1030 SSize_t ai32;
62f95557
IZ
1031 if (!len) /* Avoid division by 0 */
1032 len = 1;
230e1fce
NC
1033 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1034 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1035 if (ai32 == 0) break;
1036 len -= ai32;
9e27e96a 1037 }
924ba076 1038 /* FALLTHROUGH */
a6ec74c1 1039 case 'x':
08ca2aa3
TH
1040 if (utf8) {
1041 while (len>0) {
1042 if (s >= strend)
1043 Perl_croak(aTHX_ "'x' outside of string in unpack");
1044 s += UTF8SKIP(s);
1045 len--;
1046 }
1047 } else {
fc241834
RGS
1048 if (len > strend - s)
1049 Perl_croak(aTHX_ "'x' outside of string in unpack");
1050 s += len;
f337b084 1051 }
a6ec74c1
JH
1052 break;
1053 case '/':
49704364 1054 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
81d52ecd 1055
a6ec74c1
JH
1056 case 'A':
1057 case 'Z':
1058 case 'a':
08ca2aa3
TH
1059 if (checksum) {
1060 /* Preliminary length estimate is assumed done in 'W' */
1061 if (len > strend - s) len = strend - s;
1062 goto W_checksum;
1063 }
1064 if (utf8) {
e1b825c1 1065 SSize_t l;
f7fe979e 1066 const char *hop;
08ca2aa3
TH
1067 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1068 if (hop >= strend) {
1069 if (hop > strend)
1070 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1071 break;
fc241834 1072 }
a6ec74c1 1073 }
08ca2aa3
TH
1074 if (hop > strend)
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 len = hop - s;
1077 } else if (len > strend - s)
1078 len = strend - s;
1079
1080 if (datumtype == 'Z') {
1081 /* 'Z' strips stuff after first null */
f7fe979e 1082 const char *ptr, *end;
f337b084
TH
1083 end = s + len;
1084 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1085 sv = newSVpvn(s, ptr-s);
1086 if (howlen == e_star) /* exact for 'Z*' */
1087 len = ptr-s + (ptr != strend ? 1 : 0);
1088 } else if (datumtype == 'A') {
1089 /* 'A' strips both nulls and spaces */
f7fe979e 1090 const char *ptr;
18bdf90a 1091 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
7a207065
KW
1092 for (ptr = s+len-1; ptr >= s; ptr--) {
1093 if ( *ptr != 0
1094 && !UTF8_IS_CONTINUATION(*ptr)
1095 && !isSPACE_utf8_safe(ptr, strend))
1096 {
1097 break;
1098 }
1099 }
18bdf90a
TH
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1101 else ptr++;
28be1210 1102 if (ptr > s+len)
18bdf90a
TH
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 } else {
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1107 ptr++;
1108 }
08ca2aa3
TH
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1111
1112 if (utf8) {
1113 SvUTF8_on(sv);
1114 /* Undo any upgrade done due to need_utf8() */
f337b084 1115 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1116 sv_utf8_downgrade(sv, 0);
a6ec74c1 1117 }
6e449a3a 1118 mXPUSHs(sv);
08ca2aa3 1119 s += len;
a6ec74c1
JH
1120 break;
1121 case 'B':
08ca2aa3
TH
1122 case 'b': {
1123 char *str;
49704364 1124 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1125 len = (strend - s) * 8;
1126 if (checksum) {
f337b084 1127 if (utf8)
08ca2aa3 1128 while (len >= 8 && s < strend) {
9df874cd 1129 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1130 len -= 8;
1131 }
f337b084 1132 else
fc241834 1133 while (len >= 8) {
08ca2aa3 1134 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1135 len -= 8;
1136 }
08ca2aa3
TH
1137 if (len && s < strend) {
1138 U8 bits;
f337b084
TH
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
a6ec74c1 1141 while (len-- > 0) {
92d41999 1142 if (bits & 1) cuv++;
a6ec74c1
JH
1143 bits >>= 1;
1144 }
f337b084 1145 else
a6ec74c1 1146 while (len-- > 0) {
08ca2aa3 1147 if (bits & 0x80) cuv++;
a6ec74c1
JH
1148 bits <<= 1;
1149 }
fc241834 1150 }
a6ec74c1
JH
1151 break;
1152 }
08ca2aa3 1153
561b68a9 1154 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1155 SvPOK_on(sv);
1156 str = SvPVX(sv);
1157 if (datumtype == 'b') {
f337b084 1158 U8 bits = 0;
e1b825c1 1159 const SSize_t ai32 = len;
08ca2aa3
TH
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1162 else if (utf8) {
1163 if (s >= strend) break;
9df874cd 1164 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1167 }
08ca2aa3 1168 } else {
f337b084 1169 U8 bits = 0;
e1b825c1 1170 const SSize_t ai32 = len;
08ca2aa3
TH
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1173 else if (utf8) {
1174 if (s >= strend) break;
9df874cd 1175 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1178 }
1179 }
1180 *str = '\0';
aa07b2f6 1181 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1182 XPUSHs(sv);
a6ec74c1 1183 break;
08ca2aa3 1184 }
a6ec74c1 1185 case 'H':
08ca2aa3 1186 case 'h': {
3297d27d 1187 char *str = NULL;
fc241834 1188 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1189 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1190 len = (strend - s) * 2;
858fe5e1
TC
1191 if (!checksum) {
1192 sv = sv_2mortal(newSV(len ? len : 1));
1193 SvPOK_on(sv);
1194 str = SvPVX(sv);
1195 }
a6ec74c1 1196 if (datumtype == 'h') {
f337b084 1197 U8 bits = 0;
e1b825c1 1198 SSize_t ai32 = len;
fc241834
RGS
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1201 else if (utf8) {
1202 if (s >= strend) break;
9df874cd 1203 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1204 } else bits = * (U8 *) s++;
858fe5e1
TC
1205 if (!checksum)
1206 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1207 }
08ca2aa3 1208 } else {
f337b084 1209 U8 bits = 0;
e1b825c1 1210 const SSize_t ai32 = len;
08ca2aa3
TH
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1213 else if (utf8) {
1214 if (s >= strend) break;
9df874cd 1215 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1216 } else bits = *(U8 *) s++;
858fe5e1
TC
1217 if (!checksum)
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1219 }
1220 }
858fe5e1
TC
1221 if (!checksum) {
1222 *str = '\0';
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1224 XPUSHs(sv);
1225 }
a6ec74c1 1226 break;
08ca2aa3 1227 }
1651fc44
ML
1228 case 'C':
1229 if (len == 0) {
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1233 break;
1234 }
924ba076 1235 /* FALLTHROUGH */
a6ec74c1 1236 case 'c':
1651fc44
ML
1237 while (len-- > 0 && s < strend) {
1238 int aint;
1239 if (utf8)
1240 {
1241 STRLEN retlen;
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1244 if (retlen == (STRLEN) -1)
1651fc44
ML
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1246 s += retlen;
1247 }
1248 else
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1251 aint -= 256;
08ca2aa3 1252 if (!checksum)
6e449a3a 1253 mPUSHi(aint);
73cb7263
NC
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1256 else
1257 cuv += aint;
a6ec74c1
JH
1258 }
1259 break;
08ca2aa3
TH
1260 case 'W':
1261 W_checksum:
1651fc44 1262 if (utf8) {
08ca2aa3 1263 while (len-- > 0 && s < strend) {
08ca2aa3 1264 STRLEN retlen;
f7fe979e 1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
df337714 1267 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1269 s += retlen;
1270 if (!checksum)
6e449a3a 1271 mPUSHu(val);
08ca2aa3
TH
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
d6d3e8bd 1274 else
08ca2aa3 1275 cuv += val;
fc241834 1276 }
08ca2aa3 1277 } else if (!checksum)
a6ec74c1 1278 while (len-- > 0) {
f7fe979e 1279 const U8 ch = *(U8 *) s++;
6e449a3a 1280 mPUSHu(ch);
a6ec74c1 1281 }
08ca2aa3
TH
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 else
1285 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1286 break;
1287 case 'U':
35bcd338 1288 if (len == 0) {
c5333953 1289 if (explicit_length && howlen != e_star) {
08ca2aa3 1290 /* Switch to "bytes in UTF-8" mode */
f337b084 1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1292 else
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1295 }
35bcd338
JH
1296 break;
1297 }
08ca2aa3 1298 if (len > strend - s) len = strend - s;
fc241834 1299 if (!checksum) {
08ca2aa3
TH
1300 if (len && unpack_only_one) len = 1;
1301 EXTEND(SP, len);
1302 EXTEND_MORTAL(len);
fc241834 1303 }
08ca2aa3
TH
1304 while (len-- > 0 && s < strend) {
1305 STRLEN retlen;
1306 UV auv;
1307 if (utf8) {
81b3f640 1308 U8 result[UTF8_MAXLEN+1];
f7fe979e 1309 const char *ptr = s;
08ca2aa3 1310 STRLEN len;
08ca2aa3
TH
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
9df874cd 1313 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
230e1fce 1314 'U'))
08ca2aa3
TH
1315 break;
1316 len = UTF8SKIP(result);
9df874cd 1317 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
230e1fce 1318 (char *) &result[1], len-1, 'U')) break;
3ece276e
KW
1319 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1320 len,
1321 &retlen,
1322 UTF8_ALLOW_DEFAULT));
08ca2aa3
TH
1323 s = ptr;
1324 } else {
3ece276e
KW
1325 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1326 strend - s,
1327 &retlen,
1328 UTF8_ALLOW_DEFAULT));
df337714 1329 if (retlen == (STRLEN) -1)
08ca2aa3
TH
1330 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1331 s += retlen;
1332 }
1333 if (!checksum)
6e449a3a 1334 mPUSHu(auv);
73cb7263 1335 else if (checksum > bits_in_uv)
08ca2aa3 1336 cdouble += (NV) auv;
73cb7263 1337 else
08ca2aa3 1338 cuv += auv;
a6ec74c1
JH
1339 }
1340 break;
49704364
WL
1341 case 's' | TYPE_IS_SHRIEKING:
1342#if SHORTSIZE != SIZE16
73cb7263 1343 while (len-- > 0) {
08ca2aa3 1344 short ashort;
aaec8192 1345 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
08ca2aa3 1346 if (!checksum)
6e449a3a 1347 mPUSHi(ashort);
73cb7263
NC
1348 else if (checksum > bits_in_uv)
1349 cdouble += (NV)ashort;
1350 else
1351 cuv += ashort;
49704364
WL
1352 }
1353 break;
1354#else
924ba076 1355 /* FALLTHROUGH */
a6ec74c1 1356#endif
49704364 1357 case 's':
73cb7263 1358 while (len-- > 0) {
08ca2aa3
TH
1359 I16 ai16;
1360
1361#if U16SIZE > SIZE16
1362 ai16 = 0;
1363#endif
aaec8192 1364 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1109a392 1365#if U16SIZE > SIZE16
73cb7263
NC
1366 if (ai16 > 32767)
1367 ai16 -= 65536;
a6ec74c1 1368#endif
08ca2aa3 1369 if (!checksum)
6e449a3a 1370 mPUSHi(ai16);
73cb7263
NC
1371 else if (checksum > bits_in_uv)
1372 cdouble += (NV)ai16;
1373 else
1374 cuv += ai16;
a6ec74c1
JH
1375 }
1376 break;
49704364
WL
1377 case 'S' | TYPE_IS_SHRIEKING:
1378#if SHORTSIZE != SIZE16
73cb7263 1379 while (len-- > 0) {
08ca2aa3 1380 unsigned short aushort;
aaec8192
NC
1381 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1382 needs_swap);
08ca2aa3 1383 if (!checksum)
6e449a3a 1384 mPUSHu(aushort);
73cb7263
NC
1385 else if (checksum > bits_in_uv)
1386 cdouble += (NV)aushort;
1387 else
1388 cuv += aushort;
49704364
WL
1389 }
1390 break;
1391#else
924ba076 1392 /* FALLTHROUGH */
49704364 1393#endif
a6ec74c1
JH
1394 case 'v':
1395 case 'n':
1396 case 'S':
73cb7263 1397 while (len-- > 0) {
08ca2aa3
TH
1398 U16 au16;
1399#if U16SIZE > SIZE16
1400 au16 = 0;
1401#endif
aaec8192 1402 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
73cb7263
NC
1403 if (datumtype == 'n')
1404 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1405 if (datumtype == 'v')
1406 au16 = vtohs(au16);
08ca2aa3 1407 if (!checksum)
6e449a3a 1408 mPUSHu(au16);
73cb7263 1409 else if (checksum > bits_in_uv)
f337b084 1410 cdouble += (NV) au16;
73cb7263
NC
1411 else
1412 cuv += au16;
a6ec74c1
JH
1413 }
1414 break;
068bd2e7
MHM
1415 case 'v' | TYPE_IS_SHRIEKING:
1416 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1417 while (len-- > 0) {
08ca2aa3
TH
1418 I16 ai16;
1419# if U16SIZE > SIZE16
1420 ai16 = 0;
1421# endif
aaec8192 1422 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
e396d235
NC
1423 /* There should never be any byte-swapping here. */
1424 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1425 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1426 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1427 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1428 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1429 if (!checksum)
6e449a3a 1430 mPUSHi(ai16);
73cb7263 1431 else if (checksum > bits_in_uv)
08ca2aa3 1432 cdouble += (NV) ai16;
73cb7263
NC
1433 else
1434 cuv += ai16;
068bd2e7
MHM
1435 }
1436 break;
a6ec74c1 1437 case 'i':
49704364 1438 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1439 while (len-- > 0) {
08ca2aa3 1440 int aint;
aaec8192 1441 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
08ca2aa3 1442 if (!checksum)
6e449a3a 1443 mPUSHi(aint);
73cb7263
NC
1444 else if (checksum > bits_in_uv)
1445 cdouble += (NV)aint;
1446 else
1447 cuv += aint;
a6ec74c1
JH
1448 }
1449 break;
1450 case 'I':
49704364 1451 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1452 while (len-- > 0) {
08ca2aa3 1453 unsigned int auint;
aaec8192 1454 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
08ca2aa3 1455 if (!checksum)
6e449a3a 1456 mPUSHu(auint);
73cb7263
NC
1457 else if (checksum > bits_in_uv)
1458 cdouble += (NV)auint;
1459 else
1460 cuv += auint;
a6ec74c1
JH
1461 }
1462 break;
92d41999 1463 case 'j':
73cb7263 1464 while (len-- > 0) {
08ca2aa3 1465 IV aiv;
aaec8192 1466 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
08ca2aa3 1467 if (!checksum)
6e449a3a 1468 mPUSHi(aiv);
73cb7263
NC
1469 else if (checksum > bits_in_uv)
1470 cdouble += (NV)aiv;
1471 else
1472 cuv += aiv;
92d41999
JH
1473 }
1474 break;
1475 case 'J':
73cb7263 1476 while (len-- > 0) {
08ca2aa3 1477 UV auv;
aaec8192 1478 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
08ca2aa3 1479 if (!checksum)
6e449a3a 1480 mPUSHu(auv);
73cb7263
NC
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)auv;
1483 else
1484 cuv += auv;
92d41999
JH
1485 }
1486 break;
49704364
WL
1487 case 'l' | TYPE_IS_SHRIEKING:
1488#if LONGSIZE != SIZE32
73cb7263 1489 while (len-- > 0) {
08ca2aa3 1490 long along;
aaec8192 1491 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
08ca2aa3 1492 if (!checksum)
6e449a3a 1493 mPUSHi(along);
73cb7263
NC
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)along;
1496 else
1497 cuv += along;
49704364
WL
1498 }
1499 break;
1500#else
924ba076 1501 /* FALLTHROUGH */
a6ec74c1 1502#endif
49704364 1503 case 'l':
73cb7263 1504 while (len-- > 0) {
08ca2aa3
TH
1505 I32 ai32;
1506#if U32SIZE > SIZE32
1507 ai32 = 0;
1508#endif
aaec8192 1509 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
25a9bd2a 1510#if U32SIZE > SIZE32
08ca2aa3 1511 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1512#endif
08ca2aa3 1513 if (!checksum)
6e449a3a 1514 mPUSHi(ai32);
73cb7263
NC
1515 else if (checksum > bits_in_uv)
1516 cdouble += (NV)ai32;
1517 else
1518 cuv += ai32;
a6ec74c1
JH
1519 }
1520 break;
49704364
WL
1521 case 'L' | TYPE_IS_SHRIEKING:
1522#if LONGSIZE != SIZE32
73cb7263 1523 while (len-- > 0) {
08ca2aa3 1524 unsigned long aulong;
aaec8192 1525 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
08ca2aa3 1526 if (!checksum)
6e449a3a 1527 mPUSHu(aulong);
73cb7263
NC
1528 else if (checksum > bits_in_uv)
1529 cdouble += (NV)aulong;
1530 else
1531 cuv += aulong;
49704364
WL
1532 }
1533 break;
1534#else
924ba076 1535 /* FALLTHROUGH */
49704364 1536#endif
a6ec74c1
JH
1537 case 'V':
1538 case 'N':
1539 case 'L':
73cb7263 1540 while (len-- > 0) {
08ca2aa3
TH
1541 U32 au32;
1542#if U32SIZE > SIZE32
1543 au32 = 0;
1544#endif
aaec8192 1545 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
73cb7263
NC
1546 if (datumtype == 'N')
1547 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1548 if (datumtype == 'V')
1549 au32 = vtohl(au32);
08ca2aa3 1550 if (!checksum)
6e449a3a 1551 mPUSHu(au32);
fc241834
RGS
1552 else if (checksum > bits_in_uv)
1553 cdouble += (NV)au32;
1554 else
1555 cuv += au32;
a6ec74c1
JH
1556 }
1557 break;
068bd2e7
MHM
1558 case 'V' | TYPE_IS_SHRIEKING:
1559 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1560 while (len-- > 0) {
08ca2aa3 1561 I32 ai32;
f8e5a5db 1562#if U32SIZE > SIZE32
08ca2aa3 1563 ai32 = 0;
f8e5a5db 1564#endif
aaec8192 1565 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
e396d235
NC
1566 /* There should never be any byte swapping here. */
1567 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1568 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1569 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1570 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1572 if (!checksum)
6e449a3a 1573 mPUSHi(ai32);
73cb7263
NC
1574 else if (checksum > bits_in_uv)
1575 cdouble += (NV)ai32;
1576 else
1577 cuv += ai32;
068bd2e7
MHM
1578 }
1579 break;
a6ec74c1 1580 case 'p':
a6ec74c1 1581 while (len-- > 0) {
f7fe979e 1582 const char *aptr;
aaec8192 1583 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
c4c5f44a 1584 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1585 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1586 }
1587 break;
1588 case 'w':
a6ec74c1
JH
1589 {
1590 UV auv = 0;
e1b825c1 1591 size_t bytes = 0;
fc241834 1592
08ca2aa3
TH
1593 while (len > 0 && s < strend) {
1594 U8 ch;
f337b084 1595 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1596 auv = (auv << 7) | (ch & 0x7f);
47b08251
KW
1597 /* UTF8_IS_XXXXX not right here because this is a BER, not
1598 * UTF-8 format - using constant 0x80 */
08ca2aa3 1599 if (ch < 0x80) {
a6ec74c1 1600 bytes = 0;
6e449a3a 1601 mPUSHu(auv);
a6ec74c1
JH
1602 len--;
1603 auv = 0;
08ca2aa3 1604 continue;
a6ec74c1 1605 }
08ca2aa3 1606 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1607 const char *t;
a6ec74c1 1608
147e3846
KW
1609 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1610 (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1611 while (s < strend) {
f337b084 1612 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1613 sv = mul128(sv, (U8)(ch & 0x7f));
1614 if (!(ch & 0x80)) {
a6ec74c1
JH
1615 bytes = 0;
1616 break;
1617 }
1618 }
10516c54 1619 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1620 while (*t == '0')
1621 t++;
1622 sv_chop(sv, t);
6e449a3a 1623 mPUSHs(sv);
a6ec74c1
JH
1624 len--;
1625 auv = 0;
1626 }
1627 }
1628 if ((s >= strend) && bytes)
49704364 1629 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1630 }
1631 break;
1632 case 'P':
49704364
WL
1633 if (symptr->howlen == e_star)
1634 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1635 EXTEND(SP, 1);
2d3e0934 1636 if (s + sizeof(char*) <= strend) {
08ca2aa3 1637 char *aptr;
aaec8192 1638 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
fc241834 1639 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1640 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1641 }
a6ec74c1 1642 break;
c174bf3b 1643#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1 1644 case 'q':
73cb7263 1645 while (len-- > 0) {
08ca2aa3 1646 Quad_t aquad;
aaec8192 1647 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
08ca2aa3 1648 if (!checksum)
c174bf3b 1649 mPUSHs(newSViv((IV)aquad));
73cb7263
NC
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aquad;
1652 else
1653 cuv += aquad;
1654 }
a6ec74c1
JH
1655 break;
1656 case 'Q':
73cb7263 1657 while (len-- > 0) {
08ca2aa3 1658 Uquad_t auquad;
aaec8192 1659 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
08ca2aa3 1660 if (!checksum)
c174bf3b 1661 mPUSHs(newSVuv((UV)auquad));
73cb7263
NC
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)auquad;
1664 else
1665 cuv += auquad;
a6ec74c1
JH
1666 }
1667 break;
1640b983 1668#endif
a6ec74c1
JH
1669 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1670 case 'f':
73cb7263 1671 while (len-- > 0) {
08ca2aa3 1672 float afloat;
aaec8192 1673 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
08ca2aa3 1674 if (!checksum)
6e449a3a 1675 mPUSHn(afloat);
08ca2aa3 1676 else
73cb7263 1677 cdouble += afloat;
fc241834 1678 }
a6ec74c1
JH
1679 break;
1680 case 'd':
73cb7263 1681 while (len-- > 0) {
08ca2aa3 1682 double adouble;
aaec8192 1683 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
08ca2aa3 1684 if (!checksum)
6e449a3a 1685 mPUSHn(adouble);
08ca2aa3 1686 else
73cb7263 1687 cdouble += adouble;
fc241834 1688 }
a6ec74c1 1689 break;
92d41999 1690 case 'F':
73cb7263 1691 while (len-- > 0) {
275663fa 1692 NV_bytes anv;
aaec8192
NC
1693 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1694 datumtype, needs_swap);
08ca2aa3 1695 if (!checksum)
275663fa 1696 mPUSHn(anv.nv);
08ca2aa3 1697 else
275663fa 1698 cdouble += anv.nv;
fc241834 1699 }
92d41999
JH
1700 break;
1701#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1702 case 'D':
73cb7263 1703 while (len-- > 0) {
275663fa 1704 ld_bytes aldouble;
aaec8192
NC
1705 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1706 sizeof(aldouble.bytes), datumtype, needs_swap);
42262fd3
JH
1707 /* The most common long double format, the x86 80-bit
1708 * extended precision, has either 2 or 6 unused bytes,
1709 * which may contain garbage, which may contain
1710 * unintentional data. While we do zero the bytes of
1711 * the long double data in pack(), here in unpack() we
1712 * don't, because it's really hard to envision that
1713 * reading the long double off aldouble would be
e075ae47 1714 * affected by the unused bytes.
42262fd3
JH
1715 *
1716 * Note that trying to unpack 'long doubles' of 'long
1717 * doubles' packed in another system is in the general
1718 * case doomed without having more detail. */
08ca2aa3 1719 if (!checksum)
275663fa 1720 mPUSHn(aldouble.ld);
08ca2aa3 1721 else
275663fa 1722 cdouble += aldouble.ld;
92d41999
JH
1723 }
1724 break;
1725#endif
a6ec74c1 1726 case 'u':
858fe5e1 1727 if (!checksum) {
f7fe979e 1728 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1729 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1730 if (l) SvPOK_on(sv);
1731 }
08ca2aa3 1732
bee81d36
KW
1733 /* Note that all legal uuencoded strings are ASCII printables, so
1734 * have the same representation under UTF-8 vs not. This means we
1735 * can ignore UTF8ness on legal input. For illegal we stop at the
1736 * first failure, and don't report where/what that is, so again we
1737 * can ignore UTF8ness */
1738
9233b56b
KW
1739 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1740 I32 a, b, c, d;
1741 char hunk[3];
1742
1743 len = PL_uudmap[*(U8*)s++] & 077;
1744 while (len > 0) {
1745 if (s < strend && ISUUCHAR(*s))
1746 a = PL_uudmap[*(U8*)s++] & 077;
1747 else
1748 a = 0;
1749 if (s < strend && ISUUCHAR(*s))
1750 b = PL_uudmap[*(U8*)s++] & 077;
1751 else
1752 b = 0;
1753 if (s < strend && ISUUCHAR(*s))
1754 c = PL_uudmap[*(U8*)s++] & 077;
1755 else
1756 c = 0;
1757 if (s < strend && ISUUCHAR(*s))
1758 d = PL_uudmap[*(U8*)s++] & 077;
1759 else
1760 d = 0;
1761 hunk[0] = (char)((a << 2) | (b >> 4));
1762 hunk[1] = (char)((b << 4) | (c >> 2));
1763 hunk[2] = (char)((c << 6) | d);
1764 if (!checksum)
1765 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1766 len -= 3;
1767 }
1768 if (*s == '\n')
1769 s++;
1770 else /* possible checksum byte */
1771 if (s + 1 < strend && s[1] == '\n')
1772 s += 2;
1773 }
858fe5e1
TC
1774 if (!checksum)
1775 XPUSHs(sv);
a6ec74c1 1776 break;
99f862a1 1777 } /* End of switch */
49704364 1778
a6ec74c1 1779 if (checksum) {
1109a392 1780 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1781 (checksum > bits_in_uv &&
08ca2aa3
TH
1782 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1783 NV trouble, anv;
a6ec74c1 1784
08ca2aa3 1785 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1786 while (checksum >= 16) {
1787 checksum -= 16;
08ca2aa3 1788 anv *= 65536.0;
a6ec74c1 1789 }
a6ec74c1 1790 while (cdouble < 0.0)
08ca2aa3 1791 cdouble += anv;
09b94b1f
S
1792 cdouble = Perl_modf(cdouble / anv, &trouble);
1793#ifdef LONGDOUBLE_DOUBLEDOUBLE
1794 /* Workaround for powerpc doubledouble modfl bug:
1795 * close to 1.0L and -1.0L cdouble is 0, and trouble
1796 * is cdouble / anv. */
1797 if (trouble != Perl_ceil(trouble)) {
1798 cdouble = trouble;
1799 if (cdouble > 1.0L) cdouble -= 1.0L;
1800 if (cdouble < -1.0L) cdouble += 1.0L;
1801 }
1802#endif
1803 cdouble *= anv;
c4c5f44a 1804 sv = newSVnv(cdouble);
a6ec74c1
JH
1805 }
1806 else {
fa8ec7c1
NC
1807 if (checksum < bits_in_uv) {
1808 UV mask = ((UV)1 << checksum) - 1;
92d41999 1809 cuv &= mask;
a6ec74c1 1810 }
c4c5f44a 1811 sv = newSVuv(cuv);
a6ec74c1 1812 }
6e449a3a 1813 mXPUSHs(sv);
a6ec74c1
JH
1814 checksum = 0;
1815 }
fc241834 1816
49704364
WL
1817 if (symptr->flags & FLAG_SLASH){
1818 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1819 break;
49704364
WL
1820 if( next_symbol(symptr) ){
1821 if( symptr->howlen == e_number )
1822 Perl_croak(aTHX_ "Count after length/code in unpack" );
1823 if( beyond ){
1824 /* ...end of char buffer then no decent length available */
1825 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1826 } else {
1827 /* take top of stack (hope it's numeric) */
1828 len = POPi;
1829 if( len < 0 )
1830 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1831 }
1832 } else {
1833 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1834 }
1835 datumtype = symptr->code;
21c16052 1836 explicit_length = FALSE;
49704364
WL
1837 goto redo_switch;
1838 }
a6ec74c1 1839 }
49704364 1840
18529408
IZ
1841 if (new_s)
1842 *new_s = s;
1843 PUTBACK;
1844 return SP - PL_stack_base - start_sp_offset;
1845}
1846
1847PP(pp_unpack)
1848{
1849 dSP;
bab9c0ac 1850 dPOPPOPssrl;
1c23e2bd 1851 U8 gimme = GIMME_V;
18529408
IZ
1852 STRLEN llen;
1853 STRLEN rlen;
5c144d81
NC
1854 const char *pat = SvPV_const(left, llen);
1855 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1856 const char *strend = s + rlen;
1857 const char *patend = pat + llen;
e1b825c1 1858 SSize_t cnt;
18529408
IZ
1859
1860 PUTBACK;
7accc089 1861 cnt = unpackstring(pat, patend, s, strend,
49704364 1862 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1863 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1864
18529408
IZ
1865 SPAGAIN;
1866 if ( !cnt && gimme == G_SCALAR )
1867 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1868 RETURN;
1869}
1870
f337b084 1871STATIC U8 *
e1b825c1 1872doencodes(U8 *h, const U8 *s, SSize_t len)
a6ec74c1 1873{
f337b084 1874 *h++ = PL_uuemap[len];
a6ec74c1 1875 while (len > 2) {
f337b084
TH
1876 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1877 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1878 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1879 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1880 s += 3;
1881 len -= 3;
1882 }
1883 if (len > 0) {
e68aed92 1884 const U8 r = (len > 1 ? s[1] : '\0');
f337b084
TH
1885 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1886 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1887 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1888 *h++ = PL_uuemap[0];
a6ec74c1 1889 }
f337b084
TH
1890 *h++ = '\n';
1891 return h;
a6ec74c1
JH
1892}
1893
1894STATIC SV *
f7fe979e 1895S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1896{
8b6e33c7
AL
1897 SV *result = newSVpvn(s, l);
1898 char *const result_c = SvPV_nolen(result); /* convenience */
1899 char *out = result_c;
1900 bool skip = 1;
1901 bool ignore = 0;
a6ec74c1 1902
7918f24d
NC
1903 PERL_ARGS_ASSERT_IS_AN_INT;
1904
a6ec74c1
JH
1905 while (*s) {
1906 switch (*s) {
1907 case ' ':
1908 break;
1909 case '+':
1910 if (!skip) {
1911 SvREFCNT_dec(result);
1912 return (NULL);
1913 }
1914 break;
1915 case '0':
1916 case '1':
1917 case '2':
1918 case '3':
1919 case '4':
1920 case '5':
1921 case '6':
1922 case '7':
1923 case '8':
1924 case '9':
1925 skip = 0;
1926 if (!ignore) {
1927 *(out++) = *s;
1928 }
1929 break;
1930 case '.':
1931 ignore = 1;
1932 break;
1933 default:
1934 SvREFCNT_dec(result);
1935 return (NULL);
1936 }
1937 s++;
1938 }
1939 *(out++) = '\0';
1940 SvCUR_set(result, out - result_c);
1941 return (result);
1942}
1943
1944/* pnum must be '\0' terminated */
1945STATIC int
1946S_div128(pTHX_ SV *pnum, bool *done)
1947{
8b6e33c7
AL
1948 STRLEN len;
1949 char * const s = SvPV(pnum, len);
1950 char *t = s;
1951 int m = 0;
1952
7918f24d
NC
1953 PERL_ARGS_ASSERT_DIV128;
1954
8b6e33c7
AL
1955 *done = 1;
1956 while (*t) {
1957 const int i = m * 10 + (*t - '0');
1958 const int r = (i >> 7); /* r < 10 */
1959 m = i & 0x7F;
1960 if (r) {
1961 *done = 0;
1962 }
1963 *(t++) = '0' + r;
a6ec74c1 1964 }
8b6e33c7
AL
1965 *(t++) = '\0';
1966 SvCUR_set(pnum, (STRLEN) (t - s));
1967 return (m);
a6ec74c1
JH
1968}
1969
18529408 1970/*
7accc089
JH
1971=for apidoc packlist
1972
796b6530 1973The engine implementing C<pack()> Perl function.
7accc089 1974
bfce84ec
AL
1975=cut
1976*/
7accc089
JH
1977
1978void
5aaab254 1979Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1980{
aadb217d
JH
1981 tempsym_t sym;
1982
7918f24d
NC
1983 PERL_ARGS_ASSERT_PACKLIST;
1984
f7fe979e 1985 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1986
f337b084
TH
1987 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1988 Also make sure any UTF8 flag is loaded */
56eb0262 1989 SvPV_force_nolen(cat);
bfce84ec
AL
1990 if (DO_UTF8(cat))
1991 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1992
49704364
WL
1993 (void)pack_rec( cat, &sym, beglist, endlist );
1994}
1995
f337b084
TH
1996/* like sv_utf8_upgrade, but also repoint the group start markers */
1997STATIC void
1998marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1999 STRLEN len;
2000 tempsym_t *group;
f7fe979e
AL
2001 const char *from_ptr, *from_start, *from_end, **marks, **m;
2002 char *to_start, *to_ptr;
f337b084
TH
2003
2004 if (SvUTF8(sv)) return;
2005
aa07b2f6 2006 from_start = SvPVX_const(sv);
f337b084
TH
2007 from_end = from_start + SvCUR(sv);
2008 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
6f2d5cbc 2009 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
f337b084
TH
2010 if (from_ptr == from_end) {
2011 /* Simple case: no character needs to be changed */
2012 SvUTF8_on(sv);
2013 return;
2014 }
2015
3473cf63 2016 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2017 Newx(to_start, len, char);
f337b084
TH
2018 Copy(from_start, to_start, from_ptr-from_start, char);
2019 to_ptr = to_start + (from_ptr-from_start);
2020
a02a5408 2021 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2022 for (group=sym_ptr; group; group = group->previous)
2023 marks[group->level] = from_start + group->strbeg;
2024 marks[sym_ptr->level+1] = from_end+1;
2025 for (m = marks; *m < from_ptr; m++)
2026 *m = to_start + (*m-from_start);
2027
2028 for (;from_ptr < from_end; from_ptr++) {
2029 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2030 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2031 }
2032 *to_ptr = 0;
2033
2034 while (*m == from_ptr) *m++ = to_ptr;
2035 if (m != marks + sym_ptr->level+1) {
2036 Safefree(marks);
2037 Safefree(to_start);
5637ef5b
NC
2038 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2039 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2040 }
2041 for (group=sym_ptr; group; group = group->previous)
2042 group->strbeg = marks[group->level] - to_start;
2043 Safefree(marks);
2044
2045 if (SvOOK(sv)) {
2046 if (SvIVX(sv)) {
b162af07 2047 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2048 from_start -= SvIVX(sv);
2049 SvIV_set(sv, 0);
2050 }
2051 SvFLAGS(sv) &= ~SVf_OOK;
2052 }
2053 if (SvLEN(sv) != 0)
2054 Safefree(from_start);
f880fe2f 2055 SvPV_set(sv, to_start);
b162af07
SP
2056 SvCUR_set(sv, to_ptr - to_start);
2057 SvLEN_set(sv, len);
f337b084
TH
2058 SvUTF8_on(sv);
2059}
2060
2061/* Exponential string grower. Makes string extension effectively O(n)
2062 needed says how many extra bytes we need (not counting the final '\0')
2063 Only grows the string if there is an actual lack of space
2064*/
2065STATIC char *
0bd48802 2066S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2067 const STRLEN cur = SvCUR(sv);
2068 const STRLEN len = SvLEN(sv);
f337b084 2069 STRLEN extend;
7918f24d
NC
2070
2071 PERL_ARGS_ASSERT_SV_EXP_GROW;
2072
f337b084
TH
2073 if (len - cur > needed) return SvPVX(sv);
2074 extend = needed > len ? needed : len;
2075 return SvGROW(sv, len+extend+1);
2076}
49704364 2077
93f6e112 2078static SV *
b197e565 2079S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
354b74ae
FC
2080{
2081 SvGETMAGIC(sv);
93f6e112
FC
2082 if (UNLIKELY(SvAMAGIC(sv)))
2083 sv = sv_2num(sv);
99f450cc 2084 if (UNLIKELY(isinfnansv(sv))) {
354b74ae
FC
2085 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2086 const NV nv = SvNV_nomg(sv);
2087 if (c == 'w')
147e3846 2088 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
354b74ae 2089 else
147e3846 2090 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
354b74ae 2091 }
93f6e112 2092 return sv;
354b74ae
FC
2093}
2094
93f6e112
FC
2095#define SvIV_no_inf(sv,d) \
2096 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2097#define SvUV_no_inf(sv,d) \
2098 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
354b74ae 2099
49704364
WL
2100STATIC
2101SV **
f337b084 2102S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2103{
49704364 2104 tempsym_t lookahead;
e1b825c1 2105 SSize_t items = endlist - beglist;
f337b084
TH
2106 bool found = next_symbol(symptr);
2107 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2108 bool warn_utf8 = ckWARN(WARN_UTF8);
81d52ecd 2109 char* from;
f337b084 2110
7918f24d
NC
2111 PERL_ARGS_ASSERT_PACK_REC;
2112
f337b084
TH
2113 if (symptr->level == 0 && found && symptr->code == 'U') {
2114 marked_upgrade(aTHX_ cat, symptr);
2115 symptr->flags |= FLAG_DO_UTF8;
2116 utf8 = 0;
49704364 2117 }
f337b084 2118 symptr->strbeg = SvCUR(cat);
49704364
WL
2119
2120 while (found) {
f337b084
TH
2121 SV *fromstr;
2122 STRLEN fromlen;
e1b825c1 2123 SSize_t len;
a0714e2c 2124 SV *lengthcode = NULL;
49704364 2125 I32 datumtype = symptr->code;
f337b084
TH
2126 howlen_t howlen = symptr->howlen;
2127 char *start = SvPVX(cat);
2128 char *cur = start + SvCUR(cat);
a1219b5e 2129 bool needs_swap;
49704364 2130
fc1bb3f2 2131#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
0c7df902 2132#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
f337b084
TH
2133
2134 switch (howlen) {
fc241834 2135 case e_star:
f337b084
TH
2136 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2137 0 : items;
2138 break;
2139 default:
2140 /* e_no_len and e_number */
2141 len = symptr->length;
49704364
WL
2142 break;
2143 }
2144
f337b084 2145 if (len) {
a7a3cfaa 2146 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2147
a7a3cfaa
TH
2148 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2149 /* We can process this letter. */
2150 STRLEN size = props & PACK_SIZE_MASK;
f17fed50 2151 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
a7a3cfaa 2152 }
f337b084
TH
2153 }
2154
49704364
WL
2155 /* Look ahead for next symbol. Do we have code/code? */
2156 lookahead = *symptr;
2157 found = next_symbol(&lookahead);
246f24af
TH
2158 if (symptr->flags & FLAG_SLASH) {
2159 IV count;
f337b084 2160 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2161 if (strchr("aAZ", lookahead.code)) {
2162 if (lookahead.howlen == e_number) count = lookahead.length;
2163 else {
ce399ba6 2164 if (items > 0) {
48a5da33 2165 count = sv_len_utf8(*beglist);
ce399ba6 2166 }
246f24af
TH
2167 else count = 0;
2168 if (lookahead.code == 'Z') count++;
2169 }
2170 } else {
2171 if (lookahead.howlen == e_number && lookahead.length < items)
2172 count = lookahead.length;
2173 else count = items;
2174 }
2175 lookahead.howlen = e_number;
2176 lookahead.length = count;
2177 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2178 }
49704364 2179
a1219b5e
NC
2180 needs_swap = NEEDS_SWAP(datumtype);
2181
fc241834
RGS
2182 /* Code inside the switch must take care to properly update
2183 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2184 doesn't simply leave using break */
0c7df902 2185 switch (TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2186 default:
f337b084
TH
2187 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2188 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2189 case '%':
49704364 2190 Perl_croak(aTHX_ "'%%' may not be used in pack");
81d52ecd 2191
28be1210 2192 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2193 case '.':
2194 if (howlen == e_star) from = start;
2195 else if (len == 0) from = cur;
2196 else {
2197 tempsym_t *group = symptr;
2198
2199 while (--len && group) group = group->previous;
2200 from = group ? start + group->strbeg : start;
2201 }
2202 fromstr = NEXTFROM;
354b74ae 2203 len = SvIV_no_inf(fromstr, datumtype);
28be1210 2204 goto resize;
28be1210 2205 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2206 case '@':
28be1210
TH
2207 from = start + symptr->strbeg;
2208 resize:
28be1210 2209 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2210 if (len >= 0) {
2211 while (len && from < cur) {
2212 from += UTF8SKIP(from);
2213 len--;
2214 }
2215 if (from > cur)
2216 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2217 if (len) {
2218 /* Here we know from == cur */
2219 grow:
2220 GROWING(0, cat, start, cur, len);
2221 Zero(cur, len, char);
2222 cur += len;
2223 } else if (from < cur) {
2224 len = cur - from;
2225 goto shrink;
2226 } else goto no_change;
2227 } else {
2228 cur = from;
2229 len = -len;
2230 goto utf8_shrink;
f337b084 2231 }
28be1210
TH
2232 else {
2233 len -= cur - from;
f337b084 2234 if (len > 0) goto grow;
28be1210 2235 if (len == 0) goto no_change;
fc241834 2236 len = -len;
28be1210 2237 goto shrink;
f337b084 2238 }
a6ec74c1 2239 break;
81d52ecd 2240
fc241834 2241 case '(': {
49704364 2242 tempsym_t savsym = *symptr;
66c611c5
MHM
2243 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2244 symptr->flags |= group_modifiers;
49704364
WL
2245 symptr->patend = savsym.grpend;
2246 symptr->level++;
f337b084 2247 symptr->previous = &lookahead;
18529408 2248 while (len--) {
f337b084
TH
2249 U32 was_utf8;
2250 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2251 else symptr->flags &= ~FLAG_PARSE_UTF8;
2252 was_utf8 = SvUTF8(cat);
49704364 2253 symptr->patptr = savsym.grpbeg;
f337b084
TH
2254 beglist = pack_rec(cat, symptr, beglist, endlist);
2255 if (SvUTF8(cat) != was_utf8)
2256 /* This had better be an upgrade while in utf8==0 mode */
2257 utf8 = 1;
2258
49704364 2259 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2260 break; /* No way to continue */
2261 }
ee790063 2262 items = endlist - beglist;
f337b084
TH
2263 lookahead.flags = symptr->flags & ~group_modifiers;
2264 goto no_change;
18529408 2265 }
62f95557
IZ
2266 case 'X' | TYPE_IS_SHRIEKING:
2267 if (!len) /* Avoid division by 0 */
2268 len = 1;
f337b084
TH
2269 if (utf8) {
2270 char *hop, *last;
e1b825c1 2271 SSize_t l = len;
f337b084
TH
2272 hop = last = start;
2273 while (hop < cur) {
2274 hop += UTF8SKIP(hop);
2275 if (--l == 0) {
2276 last = hop;
2277 l = len;
2278 }
2279 }
2280 if (last > cur)
2281 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2282 cur = last;
2283 break;
2284 }
2285 len = (cur-start) % len;
924ba076 2286 /* FALLTHROUGH */
a6ec74c1 2287 case 'X':
f337b084
TH
2288 if (utf8) {
2289 if (len < 1) goto no_change;
28be1210 2290 utf8_shrink:
f337b084
TH
2291 while (len > 0) {
2292 if (cur <= start)
28be1210
TH
2293 Perl_croak(aTHX_ "'%c' outside of string in pack",
2294 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2295 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2296 if (cur <= start)
28be1210
TH
2297 Perl_croak(aTHX_ "'%c' outside of string in pack",
2298 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2299 }
2300 len--;
2301 }
2302 } else {
fc241834 2303 shrink:
f337b084 2304 if (cur - start < len)
28be1210
TH
2305 Perl_croak(aTHX_ "'%c' outside of string in pack",
2306 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2307 cur -= len;
2308 }
2309 if (cur < start+symptr->strbeg) {
2310 /* Make sure group starts don't point into the void */
2311 tempsym_t *group;
9e27e96a 2312 const STRLEN length = cur-start;
f337b084
TH
2313 for (group = symptr;
2314 group && length < group->strbeg;
2315 group = group->previous) group->strbeg = length;
2316 lookahead.strbeg = length;
2317 }
a6ec74c1 2318 break;
fc241834 2319 case 'x' | TYPE_IS_SHRIEKING: {
e1b825c1 2320 SSize_t ai32;
62f95557
IZ
2321 if (!len) /* Avoid division by 0 */
2322 len = 1;
230e1fce 2323 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2324 else ai32 = (cur - start) % len;
2325 if (ai32 == 0) goto no_change;
2326 len -= ai32;
2327 }
924ba076 2328 /* FALLTHROUGH */
a6ec74c1 2329 case 'x':
f337b084 2330 goto grow;
a6ec74c1
JH
2331 case 'A':
2332 case 'Z':
f337b084 2333 case 'a': {
f7fe979e 2334 const char *aptr;
f337b084 2335
a6ec74c1 2336 fromstr = NEXTFROM;
e62f0680 2337 aptr = SvPV_const(fromstr, fromlen);
f337b084 2338 if (DO_UTF8(fromstr)) {
f7fe979e 2339 const char *end, *s;
f337b084
TH
2340
2341 if (!utf8 && !SvUTF8(cat)) {
2342 marked_upgrade(aTHX_ cat, symptr);
2343 lookahead.flags |= FLAG_DO_UTF8;
2344 lookahead.strbeg = symptr->strbeg;
2345 utf8 = 1;
2346 start = SvPVX(cat);
2347 cur = start + SvCUR(cat);
2348 }
fc241834 2349 if (howlen == e_star) {
f337b084
TH
2350 if (utf8) goto string_copy;
2351 len = fromlen+1;
2352 }
2353 s = aptr;
2354 end = aptr + fromlen;
2355 fromlen = datumtype == 'Z' ? len-1 : len;
e1b825c1 2356 while ((SSize_t) fromlen > 0 && s < end) {
f337b084
TH
2357 s += UTF8SKIP(s);
2358 fromlen--;
2359 }
2360 if (s > end)
2361 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2362 if (utf8) {
fc241834 2363 len = fromlen;
f337b084
TH
2364 if (datumtype == 'Z') len++;
2365 fromlen = s-aptr;
2366 len += fromlen;
fc241834 2367
f337b084 2368 goto string_copy;
fc241834 2369 }
f337b084
TH
2370 fromlen = len - fromlen;
2371 if (datumtype == 'Z') fromlen--;
2372 if (howlen == e_star) {
2373 len = fromlen;
2374 if (datumtype == 'Z') len++;
fc241834 2375 }
f337b084 2376 GROWING(0, cat, start, cur, len);
9df874cd 2377 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2378 datumtype | TYPE_IS_PACK))
5637ef5b 2379 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
e1b825c1
Z
2380 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2381 (int)datumtype, aptr, end, cur, fromlen);
f337b084 2382 cur += fromlen;
a6ec74c1 2383 len -= fromlen;
f337b084
TH
2384 } else if (utf8) {
2385 if (howlen == e_star) {
2386 len = fromlen;
2387 if (datumtype == 'Z') len++;
a6ec74c1 2388 }
e1b825c1 2389 if (len <= (SSize_t) fromlen) {
f337b084
TH
2390 fromlen = len;
2391 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2392 }
fc241834 2393 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2394 upgrade, so:
2395 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2396 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2397 len -= fromlen;
2398 while (fromlen > 0) {
230e1fce 2399 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2400 aptr++;
2401 fromlen--;
fc241834 2402 }
f337b084
TH
2403 } else {
2404 string_copy:
2405 if (howlen == e_star) {
2406 len = fromlen;
2407 if (datumtype == 'Z') len++;
2408 }
e1b825c1 2409 if (len <= (SSize_t) fromlen) {
f337b084
TH
2410 fromlen = len;
2411 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2412 }
f337b084
TH
2413 GROWING(0, cat, start, cur, len);
2414 Copy(aptr, cur, fromlen, char);
2415 cur += fromlen;
2416 len -= fromlen;
a6ec74c1 2417 }
f337b084
TH
2418 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2419 cur += len;
3c4fb04a 2420 SvTAINT(cat);
a6ec74c1 2421 break;
f337b084 2422 }
a6ec74c1 2423 case 'B':
f337b084 2424 case 'b': {
b83604b4 2425 const char *str, *end;
e1b825c1 2426 SSize_t l, field_len;
f337b084
TH
2427 U8 bits;
2428 bool utf8_source;
2429 U32 utf8_flags;
a6ec74c1 2430
fc241834 2431 fromstr = NEXTFROM;
b83604b4 2432 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2433 end = str + fromlen;
2434 if (DO_UTF8(fromstr)) {
2435 utf8_source = TRUE;
041457d9 2436 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2437 } else {
2438 utf8_source = FALSE;
2439 utf8_flags = 0; /* Unused, but keep compilers happy */
2440 }
2441 if (howlen == e_star) len = fromlen;
2442 field_len = (len+7)/8;
2443 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2444 if (len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2445 bits = 0;
2446 l = 0;
2447 if (datumtype == 'B')
2448 while (l++ < len) {
2449 if (utf8_source) {
95b63a38 2450 UV val = 0;
f337b084
TH
2451 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2452 bits |= val & 1;
2453 } else bits |= *str++ & 1;
2454 if (l & 7) bits <<= 1;
fc241834 2455 else {
f337b084
TH
2456 PUSH_BYTE(utf8, cur, bits);
2457 bits = 0;
a6ec74c1
JH
2458 }
2459 }
f337b084
TH
2460 else
2461 /* datumtype == 'b' */
2462 while (l++ < len) {
2463 if (utf8_source) {
95b63a38 2464 UV val = 0;
f337b084
TH
2465 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2466 if (val & 1) bits |= 0x80;
2467 } else if (*str++ & 1)
2468 bits |= 0x80;
2469 if (l & 7) bits >>= 1;
fc241834 2470 else {
f337b084
TH
2471 PUSH_BYTE(utf8, cur, bits);
2472 bits = 0;
a6ec74c1
JH
2473 }
2474 }
f337b084
TH
2475 l--;
2476 if (l & 7) {
fc241834 2477 if (datumtype == 'B')
f337b084 2478 bits <<= 7 - (l & 7);
fc241834 2479 else
f337b084
TH
2480 bits >>= 7 - (l & 7);
2481 PUSH_BYTE(utf8, cur, bits);
2482 l += 7;
a6ec74c1 2483 }
f337b084
TH
2484 /* Determine how many chars are left in the requested field */
2485 l /= 8;
2486 if (howlen == e_star) field_len = 0;
2487 else field_len -= l;
2488 Zero(cur, field_len, char);
2489 cur += field_len;
a6ec74c1 2490 break;
f337b084 2491 }
a6ec74c1 2492 case 'H':
f337b084 2493 case 'h': {
10516c54 2494 const char *str, *end;
e1b825c1 2495 SSize_t l, field_len;
f337b084
TH
2496 U8 bits;
2497 bool utf8_source;
2498 U32 utf8_flags;
a6ec74c1 2499
fc241834 2500 fromstr = NEXTFROM;
10516c54 2501 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2502 end = str + fromlen;
2503 if (DO_UTF8(fromstr)) {
2504 utf8_source = TRUE;
041457d9 2505 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2506 } else {
2507 utf8_source = FALSE;
2508 utf8_flags = 0; /* Unused, but keep compilers happy */
2509 }
2510 if (howlen == e_star) len = fromlen;
2511 field_len = (len+1)/2;
2512 GROWING(utf8, cat, start, cur, field_len);
e1b825c1 2513 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
f337b084
TH
2514 bits = 0;
2515 l = 0;
2516 if (datumtype == 'H')
2517 while (l++ < len) {
2518 if (utf8_source) {
95b63a38 2519 UV val = 0;
f337b084
TH
2520 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2521 if (val < 256 && isALPHA(val))
2522 bits |= (val + 9) & 0xf;
a6ec74c1 2523 else
f337b084
TH
2524 bits |= val & 0xf;
2525 } else if (isALPHA(*str))
2526 bits |= (*str++ + 9) & 0xf;
2527 else
2528 bits |= *str++ & 0xf;
2529 if (l & 1) bits <<= 4;
fc241834 2530 else {
f337b084
TH
2531 PUSH_BYTE(utf8, cur, bits);
2532 bits = 0;
a6ec74c1
JH
2533 }
2534 }
f337b084
TH
2535 else
2536 while (l++ < len) {
2537 if (utf8_source) {
95b63a38 2538 UV val = 0;
f337b084
TH
2539 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2540 if (val < 256 && isALPHA(val))
2541 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2542 else
f337b084
TH
2543 bits |= (val & 0xf) << 4;
2544 } else if (isALPHA(*str))
2545 bits |= ((*str++ + 9) & 0xf) << 4;
2546 else
2547 bits |= (*str++ & 0xf) << 4;
2548 if (l & 1) bits >>= 4;
fc241834 2549 else {
f337b084
TH
2550 PUSH_BYTE(utf8, cur, bits);
2551 bits = 0;
a6ec74c1 2552 }
fc241834 2553 }
f337b084
TH
2554 l--;
2555 if (l & 1) {
2556 PUSH_BYTE(utf8, cur, bits);
2557 l++;
2558 }
2559 /* Determine how many chars are left in the requested field */
2560 l /= 2;
2561 if (howlen == e_star) field_len = 0;
2562 else field_len -= l;
2563 Zero(cur, field_len, char);
2564 cur += field_len;
2565 break;
fc241834
RGS
2566 }
2567 case 'c':
f337b084
TH
2568 while (len-- > 0) {
2569 IV aiv;
2570 fromstr = NEXTFROM;
354b74ae 2571 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2572 if ((-128 > aiv || aiv > 127))
2573 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2574 "Character in 'c' format wrapped in pack");
585ec06d 2575 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2576 }
2577 break;
2578 case 'C':
f337b084
TH
2579 if (len == 0) {
2580 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2581 break;
2582 }
a6ec74c1 2583 while (len-- > 0) {
f337b084 2584 IV aiv;
a6ec74c1 2585 fromstr = NEXTFROM;
354b74ae 2586 aiv = SvIV_no_inf(fromstr, datumtype);
a2a5de95
NC
2587 if ((0 > aiv || aiv > 0xff))
2588 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2589 "Character in 'C' format wrapped in pack");
1651fc44 2590 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2591 }
fc241834
RGS
2592 break;
2593 case 'W': {
2594 char *end;
670f1322 2595 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2596
2597 end = start+SvLEN(cat)-1;
2598 if (utf8) end -= UTF8_MAXLEN-1;
2599 while (len-- > 0) {
2600 UV auv;
2601 fromstr = NEXTFROM;
354b74ae 2602 auv = SvUV_no_inf(fromstr, datumtype);
fc241834
RGS
2603 if (in_bytes) auv = auv % 0x100;
2604 if (utf8) {
2605 W_utf8:
bf4a926a 2606 if (cur >= end) {
fc241834 2607 *cur = '\0';
b162af07 2608 SvCUR_set(cat, cur - start);
fc241834
RGS
2609
2610 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2611 end = start+SvLEN(cat)-UTF8_MAXLEN;
2612 }
05fefba9 2613 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
fc241834
RGS
2614 } else {
2615 if (auv >= 0x100) {
2616 if (!SvUTF8(cat)) {
2617 *cur = '\0';
b162af07 2618 SvCUR_set(cat, cur - start);
fc241834
RGS
2619 marked_upgrade(aTHX_ cat, symptr);
2620 lookahead.flags |= FLAG_DO_UTF8;
2621 lookahead.strbeg = symptr->strbeg;
2622 utf8 = 1;
2623 start = SvPVX(cat);
2624 cur = start + SvCUR(cat);
2625 end = start+SvLEN(cat)-UTF8_MAXLEN;
2626 goto W_utf8;
2627 }
a2a5de95
NC
2628 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2629 "Character in 'W' format wrapped in pack");
fc241834
RGS
2630 auv &= 0xff;
2631 }
2632 if (cur >= end) {
2633 *cur = '\0';
b162af07 2634 SvCUR_set(cat, cur - start);
fc241834
RGS
2635 GROWING(0, cat, start, cur, len+1);
2636 end = start+SvLEN(cat)-1;
2637 }
fe2774ed 2638 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2639 }
2640 }
2641 break;
fc241834
RGS
2642 }
2643 case 'U': {
2644 char *end;
2645
2646 if (len == 0) {
2647 if (!(symptr->flags & FLAG_DO_UTF8)) {
2648 marked_upgrade(aTHX_ cat, symptr);
2649 lookahead.flags |= FLAG_DO_UTF8;
2650 lookahead.strbeg = symptr->strbeg;
2651 }
2652 utf8 = 0;
2653 goto no_change;
2654 }
2655
2656 end = start+SvLEN(cat);
2657 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2658 while (len-- > 0) {
fc241834 2659 UV auv;
a6ec74c1 2660 fromstr = NEXTFROM;
354b74ae 2661 auv = SvUV_no_inf(fromstr, datumtype);
fc241834 2662 if (utf8) {
81b3f640 2663 U8 buffer[UTF8_MAXLEN+1], *endb;
05fefba9 2664 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
fc241834
RGS
2665 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2666 *cur = '\0';
b162af07 2667 SvCUR_set(cat, cur - start);
fc241834
RGS
2668 GROWING(0, cat, start, cur,
2669 len+(endb-buffer)*UTF8_EXPAND);
2670 end = start+SvLEN(cat);
2671 }
d21d7215 2672 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
fc241834
RGS
2673 } else {
2674 if (cur >= end) {
2675 *cur = '\0';
b162af07 2676 SvCUR_set(cat, cur - start);
fc241834
RGS
2677 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2678 end = start+SvLEN(cat)-UTF8_MAXLEN;
2679 }
05fefba9
KW
2680 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2681 UNI_TO_NATIVE(auv),
2682 0);
fc241834 2683 }
a6ec74c1 2684 }
a6ec74c1 2685 break;
fc241834 2686 }
a6ec74c1
JH
2687 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2688 case 'f':
a6ec74c1 2689 while (len-- > 0) {
f337b084
TH
2690 float afloat;
2691 NV anv;
a6ec74c1 2692 fromstr = NEXTFROM;
f337b084 2693 anv = SvNV(fromstr);
a7157111 2694# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2695 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2696 * on Alpha; fake it if we don't have them.
2697 */
f337b084 2698 if (anv > FLT_MAX)
fc241834 2699 afloat = FLT_MAX;
f337b084 2700 else if (anv < -FLT_MAX)
fc241834 2701 afloat = -FLT_MAX;
f337b084 2702 else afloat = (float)anv;
baf3cf9c 2703# else
a7157111 2704# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
d22fb6ca
DD
2705 if(Perl_isnan(anv))
2706 afloat = (float)NV_NAN;
2707 else
a7157111
JH
2708# endif
2709# ifdef NV_INF
919894b7
DM
2710 /* a simple cast to float is undefined if outside
2711 * the range of values that can be represented */
2712 afloat = (float)(anv > FLT_MAX ? NV_INF :
2713 anv < -FLT_MAX ? -NV_INF : anv);
a7157111 2714# endif
baf3cf9c 2715# endif
3a88beaa 2716 PUSH_VAR(utf8, cur, afloat, needs_swap);
a6ec74c1
JH
2717 }
2718 break;
2719 case 'd':
a6ec74c1 2720 while (len-- > 0) {
f337b084
TH
2721 double adouble;
2722 NV anv;
a6ec74c1 2723 fromstr = NEXTFROM;
f337b084 2724 anv = SvNV(fromstr);
a7157111 2725# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
f337b084 2726 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2727 * on Alpha; fake it if we don't have them.
2728 */
f337b084 2729 if (anv > DBL_MAX)
fc241834 2730 adouble = DBL_MAX;
f337b084 2731 else if (anv < -DBL_MAX)
fc241834 2732 adouble = -DBL_MAX;
f337b084 2733 else adouble = (double)anv;
baf3cf9c 2734# else
f337b084 2735 adouble = (double)anv;
baf3cf9c 2736# endif
3a88beaa 2737 PUSH_VAR(utf8, cur, adouble, needs_swap);
a6ec74c1
JH
2738 }
2739 break;
fc241834 2740 case 'F': {
275663fa 2741 NV_bytes anv;
1109a392 2742 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2743 while (len-- > 0) {
2744 fromstr = NEXTFROM;
cd07c537
DM
2745#ifdef __GNUC__
2746 /* to work round a gcc/x86 bug; don't use SvNV */
2747 anv.nv = sv_2nv(fromstr);
070e2677
TC
2748# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2749 && LONG_DOUBLESIZE > 10
2750 /* GCC sometimes overwrites the padding in the
2751 assignment above */
2752 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2753# endif
cd07c537 2754#else
275663fa 2755 anv.nv = SvNV(fromstr);
cd07c537 2756#endif
3a88beaa 2757 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
92d41999
JH
2758 }
2759 break;
fc241834 2760 }
92d41999 2761#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2762 case 'D': {
275663fa 2763 ld_bytes aldouble;
1109a392
MHM
2764 /* long doubles can have unused bits, which may be nonzero */
2765 Zero(&aldouble, 1, long double);
92d41999
JH
2766 while (len-- > 0) {
2767 fromstr = NEXTFROM;
cd07c537
DM
2768# ifdef __GNUC__
2769 /* to work round a gcc/x86 bug; don't use SvNV */
2770 aldouble.ld = (long double)sv_2nv(fromstr);
070e2677
TC
2771# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2772 /* GCC sometimes overwrites the padding in the
2773 assignment above */
2774 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2775# endif
cd07c537 2776# else
275663fa 2777 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2778# endif
3a88beaa
NC
2779 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2780 needs_swap);
92d41999
JH
2781 }
2782 break;
fc241834 2783 }
92d41999 2784#endif
068bd2e7 2785 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2786 case 'n':
2787 while (len-- > 0) {
f337b084 2788 I16 ai16;
a6ec74c1 2789 fromstr = NEXTFROM;
354b74ae 2790 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2791 ai16 = PerlSock_htons(ai16);
3a88beaa 2792 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2793 }
2794 break;
068bd2e7 2795 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2796 case 'v':
2797 while (len-- > 0) {
f337b084 2798 I16 ai16;
a6ec74c1 2799 fromstr = NEXTFROM;
354b74ae 2800 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
ef108786 2801 ai16 = htovs(ai16);
3a88beaa 2802 PUSH16(utf8, cur, &ai16, FALSE);
a6ec74c1
JH
2803 }
2804 break;
49704364 2805 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2806#if SHORTSIZE != SIZE16
fc241834 2807 while (len-- > 0) {
f337b084 2808 unsigned short aushort;
fc241834 2809 fromstr = NEXTFROM;
354b74ae 2810 aushort = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2811 PUSH_VAR(utf8, cur, aushort, needs_swap);
fc241834 2812 }
49704364
WL
2813 break;
2814#else
924ba076 2815 /* FALLTHROUGH */
a6ec74c1 2816#endif
49704364 2817 case 'S':
fc241834 2818 while (len-- > 0) {
f337b084 2819 U16 au16;
fc241834 2820 fromstr = NEXTFROM;
354b74ae 2821 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
3a88beaa 2822 PUSH16(utf8, cur, &au16, needs_swap);
a6ec74c1
JH
2823 }
2824 break;
49704364 2825 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2826#if SHORTSIZE != SIZE16
fc241834 2827 while (len-- > 0) {
f337b084 2828 short ashort;
fc241834 2829 fromstr = NEXTFROM;
354b74ae 2830 ashort = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2831 PUSH_VAR(utf8, cur, ashort, needs_swap);
a6ec74c1 2832 }
49704364
WL
2833 break;
2834#else
924ba076 2835 /* FALLTHROUGH */
a6ec74c1 2836#endif
49704364
WL
2837 case 's':
2838 while (len-- > 0) {
f337b084 2839 I16 ai16;
49704364 2840 fromstr = NEXTFROM;
354b74ae 2841 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
3a88beaa 2842 PUSH16(utf8, cur, &ai16, needs_swap);
a6ec74c1
JH
2843 }
2844 break;
2845 case 'I':
49704364 2846 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2847 while (len-- > 0) {
f337b084 2848 unsigned int auint;
a6ec74c1 2849 fromstr = NEXTFROM;
354b74ae 2850 auint = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2851 PUSH_VAR(utf8, cur, auint, needs_swap);
a6ec74c1
JH
2852 }
2853 break;
92d41999
JH
2854 case 'j':
2855 while (len-- > 0) {
f337b084 2856 IV aiv;
92d41999 2857 fromstr = NEXTFROM;
354b74ae 2858 aiv = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2859 PUSH_VAR(utf8, cur, aiv, needs_swap);
92d41999
JH
2860 }
2861 break;
2862 case 'J':
2863 while (len-- > 0) {
f337b084 2864 UV auv;
92d41999 2865 fromstr = NEXTFROM;
354b74ae 2866 auv = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2867 PUSH_VAR(utf8, cur, auv, needs_swap);
92d41999
JH
2868 }
2869 break;
a6ec74c1
JH
2870 case 'w':
2871 while (len-- > 0) {
f337b084 2872 NV anv;
a6ec74c1 2873 fromstr = NEXTFROM;
83388ae2 2874 S_sv_check_infnan(aTHX_ fromstr, datumtype);
354b74ae 2875 anv = SvNV_nomg(fromstr);
a6ec74c1 2876
f337b084
TH
2877 if (anv < 0) {
2878 *cur = '\0';
b162af07 2879 SvCUR_set(cat, cur - start);
49704364 2880 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2881 }
a6ec74c1 2882
196b62db
NC
2883 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2884 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2885 any negative IVs will have already been got by the croak()
2886 above. IOK is untrue for fractions, so we test them
2887 against UV_MAX_P1. */
f337b084
TH
2888 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2889 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2890 char *in = buf + sizeof(buf);
1e9a122e 2891 UV auv = SvUV_nomg(fromstr);
a6ec74c1
JH
2892
2893 do {
eb160463 2894 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2895 auv >>= 7;
2896 } while (auv);
2897 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2898 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2899 in, (buf + sizeof(buf)) - in);
2900 } else if (SvPOKp(fromstr))
2901 goto w_string;
a6ec74c1 2902 else if (SvNOKp(fromstr)) {
0258719b 2903 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2904 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2905 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2906 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2907 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2908 Some C compilers are strict about integral constant
2909 expressions so we conservatively divide by a slightly
2910 smaller integer instead of multiplying by the exact
2911 floating-point value.
0258719b
NC
2912 */
2913#ifdef NV_MAX_10_EXP
f337b084 2914 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2915 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2916#else
f337b084 2917 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2918 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2919#endif
a6ec74c1
JH
2920 char *in = buf + sizeof(buf);
2921
8b6e33c7 2922 anv = Perl_floor(anv);
a6ec74c1 2923 do {
8b6e33c7 2924 const NV next = Perl_floor(anv / 128);
a6ec74c1 2925 if (in <= buf) /* this cannot happen ;-) */
0c7df902 2926 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2927 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2928 anv = next;
2929 } while (anv > 0);
a6ec74c1 2930 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2931 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2932 in, (buf + sizeof(buf)) - in);
2933 } else {
8b6e33c7
AL
2934 const char *from;
2935 char *result, *in;
735b914b
JH
2936 SV *norm;
2937 STRLEN len;
2938 bool done;
2939
f337b084 2940 w_string:
735b914b 2941 /* Copy string and check for compliance */
1e9a122e 2942 from = SvPV_nomg_const(fromstr, len);
735b914b 2943 if ((norm = is_an_int(from, len)) == NULL)
49704364 2944 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2945
a02a5408 2946 Newx(result, len, char);
735b914b
JH
2947 in = result + len;
2948 done = FALSE;
f337b084 2949 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2950 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2951 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2952 in, (result + len) - in);
735b914b
JH
2953 Safefree(result);
2954 SvREFCNT_dec(norm); /* free norm */
fc241834 2955 }
a6ec74c1
JH
2956 }
2957 break;
2958 case 'i':
49704364 2959 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2960 while (len-- > 0) {
f337b084 2961 int aint;
a6ec74c1 2962 fromstr = NEXTFROM;
354b74ae 2963 aint = SvIV_no_inf(fromstr, datumtype);
3a88beaa 2964 PUSH_VAR(utf8, cur, aint, needs_swap);
a6ec74c1
JH
2965 }
2966 break;
068bd2e7 2967 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2968 case 'N':
2969 while (len-- > 0) {
f337b084 2970 U32 au32;
a6ec74c1 2971 fromstr = NEXTFROM;
354b74ae 2972 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2973 au32 = PerlSock_htonl(au32);
3a88beaa 2974 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2975 }
2976 break;
068bd2e7 2977 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2978 case 'V':
2979 while (len-- > 0) {
f337b084 2980 U32 au32;
a6ec74c1 2981 fromstr = NEXTFROM;
354b74ae 2982 au32 = SvUV_no_inf(fromstr, datumtype);
ef108786 2983 au32 = htovl(au32);
3a88beaa 2984 PUSH32(utf8, cur, &au32, FALSE);
a6ec74c1
JH
2985 }
2986 break;
49704364 2987 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2988#if LONGSIZE != SIZE32
fc241834 2989 while (len-- > 0) {
f337b084 2990 unsigned long aulong;
fc241834 2991 fromstr = NEXTFROM;
354b74ae 2992 aulong = SvUV_no_inf(fromstr, datumtype);
3a88beaa 2993 PUSH_VAR(utf8, cur, aulong, needs_swap);
a6ec74c1 2994 }
49704364
WL
2995 break;
2996#else
2997 /* Fall though! */
a6ec74c1 2998#endif
49704364 2999 case 'L':
fc241834 3000 while (len-- > 0) {
f337b084 3001 U32 au32;
fc241834 3002 fromstr = NEXTFROM;
354b74ae 3003 au32 = SvUV_no_inf(fromstr, datumtype);
3a88beaa 3004 PUSH32(utf8, cur, &au32, needs_swap);
a6ec74c1
JH
3005 }
3006 break;
49704364 3007 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3008#if LONGSIZE != SIZE32
fc241834 3009 while (len-- > 0) {
f337b084 3010 long along;
fc241834 3011 fromstr = NEXTFROM;
354b74ae 3012 along = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3013 PUSH_VAR(utf8, cur, along, needs_swap);
a6ec74c1 3014 }
49704364
WL
3015 break;
3016#else
3017 /* Fall though! */
a6ec74c1 3018#endif
49704364
WL
3019 case 'l':
3020 while (len-- > 0) {
f337b084 3021 I32 ai32;
49704364 3022 fromstr = NEXTFROM;
354b74ae 3023 ai32 = SvIV_no_inf(fromstr, datumtype);
3a88beaa 3024 PUSH32(utf8, cur, &ai32, needs_swap);
a6ec74c1
JH
3025 }
3026 break;
c174bf3b 3027#if defined(HAS_QUAD) && IVSIZE >= 8
a6ec74c1
JH
3028 case 'Q':
3029 while (len-- > 0) {
f337b084 3030 Uquad_t auquad;
a6ec74c1 3031 fromstr = NEXTFROM;
354b74ae 3032 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3a88beaa 3033 PUSH_VAR(utf8, cur, auquad, needs_swap);
a6ec74c1
JH
3034 }
3035 break;
3036 case 'q':
3037 while (len-- > 0) {
f337b084 3038 Quad_t aquad;
a6ec74c1 3039 fromstr = NEXTFROM;
354b74ae 3040 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3a88beaa 3041 PUSH_VAR(utf8, cur, aquad, needs_swap);
a6ec74c1
JH
3042 }
3043 break;
1640b983 3044#endif
a6ec74c1
JH
3045 case 'P':
3046 len = 1; /* assume SV is correct length */
f337b084 3047 GROWING(utf8, cat, start, cur, sizeof(char *));
924ba076 3048 /* FALLTHROUGH */
a6ec74c1
JH
3049 case 'p':
3050 while (len-- > 0) {
83003860 3051 const char *aptr;
f337b084 3052
a6ec74c1 3053 fromstr = NEXTFROM;
28a4f200
TH
3054 SvGETMAGIC(fromstr);
3055 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3056 else {
a6ec74c1
JH
3057 /* XXX better yet, could spirit away the string to
3058 * a safe spot and hang on to it until the result
3059 * of pack() (and all copies of the result) are
3060 * gone.
3061 */
b199a398
FC
3062 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3063 || (SvPADTMP(fromstr) &&
a2a5de95
NC
3064 !SvREADONLY(fromstr)))) {
3065 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3066 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3067 }
3068 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3069 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3070 else
2596d9fe 3071 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3072 }
3a88beaa 3073 PUSH_VAR(utf8, cur, aptr, needs_swap);
a6ec74c1
JH
3074 }
3075 break;
fc241834 3076 case 'u': {
f7fe979e 3077 const char *aptr, *aend;
fc241834 3078 bool from_utf8;
f337b084 3079
a6ec74c1 3080 fromstr = NEXTFROM;
fc241834
RGS
3081 if (len <= 2) len = 45;
3082 else len = len / 3 * 3;
3083 if (len >= 64) {
a2a5de95
NC
3084 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3085 "Field too wide in 'u' format in pack");
fc241834
RGS
3086 len = 63;
3087 }
83003860 3088 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3089 from_utf8 = DO_UTF8(fromstr);
3090 if (from_utf8) {
3091 aend = aptr + fromlen;
3f63b0e5 3092 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3093 } else aend = NULL; /* Unused, but keep compilers happy */
3094 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3095 while (fromlen > 0) {
fc241834 3096 U8 *end;
e1b825c1 3097 SSize_t todo;
fc241834 3098 U8 hunk[1+63/3*4+1];
a6ec74c1 3099
e1b825c1 3100 if ((SSize_t)fromlen > len)
a6ec74c1
JH
3101 todo = len;
3102 else
3103 todo = fromlen;
fc241834
RGS
3104 if (from_utf8) {
3105 char buffer[64];
9df874cd 3106 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
fc241834
RGS
3107 'u' | TYPE_IS_PACK)) {
3108 *cur = '\0';
b162af07 3109 SvCUR_set(cat, cur - start);
5637ef5b 3110 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
e1b825c1
Z
3111 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3112 aptr, aend, buffer, todo);
fc241834 3113 }
e68aed92 3114 end = doencodes(hunk, (const U8 *)buffer, todo);
fc241834 3115 } else {
e68aed92 3116 end = doencodes(hunk, (const U8 *)aptr, todo);
fc241834
RGS
3117 aptr += todo;
3118 }
3a88beaa 3119 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
fc241834
RGS
3120 fromlen -= todo;
3121 }
a6ec74c1
JH
3122 break;
3123 }
f337b084
TH
3124 }
3125 *cur = '\0';
b162af07 3126 SvCUR_set(cat, cur - start);
f337b084 3127 no_change:
49704364 3128 *symptr = lookahead;
a6ec74c1 3129 }
49704364 3130 return beglist;
18529408
IZ
3131}
3132#undef NEXTFROM
3133
3134
3135PP(pp_pack)
3136{
20b7effb 3137 dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3138 SV *cat = TARG;
18529408 3139 STRLEN fromlen;
349d4f2f 3140 SV *pat_sv = *++MARK;
eb578fdb
KW
3141 const char *pat = SvPV_const(pat_sv, fromlen);
3142 const char *patend = pat + fromlen;
18529408
IZ
3143
3144 MARK++;
e8f7c79f 3145 SvPVCLEAR(cat);
f337b084 3146 SvUTF8_off(cat);
18529408 3147
7accc089 3148 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3149
a6ec74c1
JH
3150 SvSETMAGIC(cat);
3151 SP = ORIGMARK;
3152 PUSHs(cat);
3153 RETURN;
3154}
a6ec74c1 3155
73cb7263 3156/*
14d04a33 3157 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3158 */