This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
DO_BO_{UN,}PACK can use my_swabn() directly.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132/* Only to be used inside a loop (see the break) */
133#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (utf8) { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 } else { \
137 Copy(s, OFF16(p), SIZE16, char); \
138 (s) += SIZE16; \
139 } \
140} STMT_END
141
142/* Only to be used inside a loop (see the break) */
143#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (utf8) { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 } else { \
147 Copy(s, OFF32(p), SIZE32, char); \
148 (s) += SIZE32; \
149 } \
150} STMT_END
151
152#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
154
155/* Only to be used inside a loop (see the break) */
275663fa 156#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
157STMT_START { \
158 if (utf8) { \
f337b084 159 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 160 (char *) (buf), len, datumtype)) break; \
08ca2aa3 161 } else { \
275663fa
TC
162 Copy(s, (char *) (buf), len, char); \
163 s += len; \
08ca2aa3 164 } \
08ca2aa3
TH
165} STMT_END
166
275663fa
TC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
169
f337b084 170#define PUSH_VAR(utf8, aptr, var) \
230e1fce 171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 172
49704364
LW
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
a6ec74c1 194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
215#if 'I' == 73 && 'J' == 74
216/* On an ASCII/ISO kind of system */
217#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
218#else
219/*
220 Some other sort of character set - use memchr() so we don't match
221 the null byte.
222 */
223#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
224#endif
225
66c611c5 226/* type modifiers */
62f95557 227#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
228#define TYPE_IS_BIG_ENDIAN 0x200
229#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 230#define TYPE_IS_PACK 0x800
1109a392 231#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 232#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
233#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
234
7212898e
NC
235# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
237
238# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
239
3cc90e2e
NC
240#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
241
7212898e 242# define DO_BO_UNPACK(var, type) \
1109a392 243 STMT_START { \
3cc90e2e 244 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
e8fda8c4 245 my_swabn(&var, sizeof(var)); \
1109a392
MHM
246 } \
247 } STMT_END
248
7212898e 249# define DO_BO_PACK(var, type) \
1109a392 250 STMT_START { \
3cc90e2e 251 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
e8fda8c4 252 my_swabn(&var, sizeof(var)); \
1109a392
MHM
253 } \
254 } STMT_END
255
3cc90e2e
NC
256# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
257
258# define DO_BO_UNPACK(var, type) \
259 STMT_START { \
260 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
e8fda8c4 261 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
262 } \
263 } STMT_END
264
265# define DO_BO_PACK(var, type) \
266 STMT_START { \
267 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
e8fda8c4 268 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
269 } \
270 } STMT_END
271
3cc90e2e
NC
272#else
273# define DO_BO_UNPACK(var, type) BO_CANT_DOIT(unpack, type)
274# define DO_BO_PACK(var, type) BO_CANT_DOIT(pack, type)
3cc90e2e
NC
275#endif
276
7212898e 277# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
278 STMT_START { \
279 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
280 case TYPE_IS_BIG_ENDIAN: \
281 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
282 "platform", #action, #type); \
283 break; \
284 case TYPE_IS_LITTLE_ENDIAN: \
285 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
286 "platform", #action, #type); \
287 break; \
288 default: \
289 break; \
290 } \
291 } STMT_END
292
78d46eaa 293#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 294#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
295#define PACK_SIZE_MASK 0x3F
296
298bc19c 297#include "packsizetables.c"
78d46eaa 298
08ca2aa3 299STATIC U8
f7fe979e 300uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 301{
08ca2aa3 302 STRLEN retlen;
0bcc34c2 303 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 304 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 305 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
306 warnings), but these two mean we make no progress in the string and
307 might enter an infinite loop */
308 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
309 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
310 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 311 if (val >= 0x100) {
a2a5de95
NC
312 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
313 "Character in '%c' format wrapped in unpack",
314 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
315 val &= 0xff;
316 }
317 *s += retlen;
fe2774ed 318 return (U8)val;
08ca2aa3
TH
319}
320
f337b084
TH
321#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
322 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
323 *(U8 *)(s)++)
324
325STATIC bool
f7fe979e 326uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
327{
328 UV val;
329 STRLEN retlen;
f7fe979e 330 const char *from = *s;
08ca2aa3 331 int bad = 0;
f7fe979e 332 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
333 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
334 for (;buf_len > 0; buf_len--) {
335 if (from >= end) return FALSE;
f337b084 336 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
337 if (retlen == (STRLEN) -1 || retlen == 0) {
338 from += UTF8SKIP(from);
339 bad |= 1;
340 } else from += retlen;
341 if (val >= 0x100) {
342 bad |= 2;
343 val &= 0xff;
344 }
fe2774ed 345 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
346 }
347 /* We have enough characters for the buffer. Did we have problems ? */
348 if (bad) {
349 if (bad & 1) {
350 /* Rewalk the string fragment while warning */
f7fe979e 351 const char *ptr;
9e27e96a 352 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
353 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
354 if (ptr >= end) break;
f337b084 355 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
356 }
357 if (from > end) from = end;
358 }
a2a5de95
NC
359 if ((bad & 2))
360 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 361 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
362 "Character(s) in '%c' format wrapped in %s",
363 (int) TYPE_NO_MODIFIERS(datumtype),
364 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
365 }
366 *s = from;
367 return TRUE;
368}
369
370STATIC bool
f7fe979e 371next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 372{
97aff369 373 dVAR;
08ca2aa3 374 STRLEN retlen;
0bcc34c2 375 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
376 if (val >= 0x100 || !ISUUCHAR(val) ||
377 retlen == (STRLEN) -1 || retlen == 0) {
378 *out = 0;
379 return FALSE;
380 }
381 *out = PL_uudmap[val] & 077;
f337b084 382 *s += retlen;
08ca2aa3
TH
383 return TRUE;
384}
78d46eaa 385
64844641 386STATIC char *
14333449 387S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 388 const U8 * const end = start + len;
64844641 389
7918f24d
NC
390 PERL_ARGS_ASSERT_BYTES_TO_UNI;
391
f337b084 392 while (start < end) {
48fa4626
ST
393 const UV uv = NATIVE_TO_ASCII(*start);
394 if (UNI_IS_INVARIANT(uv))
395 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
396 else {
397 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
398 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
399 }
400 start++;
401 }
64844641 402 return dest;
f337b084
TH
403}
404
230e1fce
NC
405#define PUSH_BYTES(utf8, cur, buf, len) \
406STMT_START { \
64844641
AL
407 if (utf8) \
408 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
409 else { \
410 Copy(buf, cur, len, char); \
411 (cur) += (len); \
412 } \
f337b084
TH
413} STMT_END
414
415#define GROWING(utf8, cat, start, cur, in_len) \
416STMT_START { \
417 STRLEN glen = (in_len); \
3473cf63 418 if (utf8) glen *= UTF8_EXPAND; \
f337b084 419 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 420 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
421 (cur) = (start) + SvCUR(cat); \
422 } \
423} STMT_END
424
425#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
426STMT_START { \
f7fe979e 427 const STRLEN glen = (in_len); \
f337b084 428 STRLEN gl = glen; \
3473cf63 429 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
430 if ((cur) + gl >= (start) + SvLEN(cat)) { \
431 *cur = '\0'; \
b162af07 432 SvCUR_set((cat), (cur) - (start)); \
0bd48802 433 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
434 (cur) = (start) + SvCUR(cat); \
435 } \
436 PUSH_BYTES(utf8, cur, buf, glen); \
437} STMT_END
438
439#define PUSH_BYTE(utf8, s, byte) \
440STMT_START { \
441 if (utf8) { \
f7fe979e 442 const U8 au8 = (byte); \
64844641 443 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
444 } else *(U8 *)(s)++ = (byte); \
445} STMT_END
446
447/* Only to be used inside a loop (see the break) */
448#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
449STMT_START { \
450 STRLEN retlen; \
451 if (str >= end) break; \
452 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
453 if (retlen == (STRLEN) -1 || retlen == 0) { \
454 *cur = '\0'; \
455 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
456 } \
457 str += retlen; \
458} STMT_END
459
f7fe979e
AL
460static const char *_action( const tempsym_t* symptr )
461{
10edeb5d 462 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
463}
464
206947d2 465/* Returns the sizeof() struct described by pat */
028d1f6d 466STATIC I32
f337b084 467S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 468{
f337b084 469 I32 total = 0;
206947d2 470
7918f24d
NC
471 PERL_ARGS_ASSERT_MEASURE_STRUCT;
472
49704364 473 while (next_symbol(symptr)) {
f337b084 474 I32 len;
f7fe979e 475 int size;
f337b084
TH
476
477 switch (symptr->howlen) {
fc241834 478 case e_star:
49704364 479 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 480 _action( symptr ) );
49704364 481 break;
f337b084
TH
482 default:
483 /* e_no_len and e_number */
484 len = symptr->length;
485 break;
49704364
LW
486 }
487
a7a3cfaa 488 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 489 if (!size) {
f7fe979e 490 int star;
80a13697
NC
491 /* endianness doesn't influence the size of a type */
492 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
493 default:
494 Perl_croak(aTHX_ "Invalid type '%c' in %s",
495 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 496 _action( symptr ) );
28be1210
TH
497 case '.' | TYPE_IS_SHRIEKING:
498 case '@' | TYPE_IS_SHRIEKING:
80a13697 499 case '@':
28be1210 500 case '.':
80a13697
NC
501 case '/':
502 case 'U': /* XXXX Is it correct? */
503 case 'w':
504 case 'u':
505 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 506 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 507 _action( symptr ) );
80a13697
NC
508 case '%':
509 size = 0;
510 break;
511 case '(':
fc241834
RGS
512 {
513 tempsym_t savsym = *symptr;
514 symptr->patptr = savsym.grpbeg;
515 symptr->patend = savsym.grpend;
516 /* XXXX Theoretically, we need to measure many times at
517 different positions, since the subexpression may contain
518 alignment commands, but be not of aligned length.
519 Need to detect this and croak(). */
520 size = measure_struct(symptr);
521 *symptr = savsym;
522 break;
523 }
80a13697
NC
524 case 'X' | TYPE_IS_SHRIEKING:
525 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
526 */
527 if (!len) /* Avoid division by 0 */
528 len = 1;
529 len = total % len; /* Assumed: the start is aligned. */
530 /* FALL THROUGH */
531 case 'X':
532 size = -1;
533 if (total < len)
f7fe979e 534 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
535 break;
536 case 'x' | TYPE_IS_SHRIEKING:
537 if (!len) /* Avoid division by 0 */
538 len = 1;
539 star = total % len; /* Assumed: the start is aligned. */
540 if (star) /* Other portable ways? */
541 len = len - star;
542 else
543 len = 0;
544 /* FALL THROUGH */
545 case 'x':
546 case 'A':
547 case 'Z':
548 case 'a':
80a13697
NC
549 size = 1;
550 break;
551 case 'B':
552 case 'b':
553 len = (len + 7)/8;
554 size = 1;
555 break;
556 case 'H':
557 case 'h':
558 len = (len + 1)/2;
559 size = 1;
560 break;
78d46eaa 561
80a13697
NC
562 case 'P':
563 len = 1;
564 size = sizeof(char*);
78d46eaa
NC
565 break;
566 }
206947d2
IZ
567 }
568 total += len * size;
569 }
570 return total;
571}
572
49704364
LW
573
574/* locate matching closing parenthesis or bracket
575 * returns char pointer to char after match, or NULL
576 */
f7fe979e 577STATIC const char *
5aaab254 578S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 579{
7918f24d
NC
580 PERL_ARGS_ASSERT_GROUP_END;
581
49704364 582 while (patptr < patend) {
f7fe979e 583 const char c = *patptr++;
49704364
LW
584
585 if (isSPACE(c))
586 continue;
587 else if (c == ender)
588 return patptr-1;
589 else if (c == '#') {
590 while (patptr < patend && *patptr != '\n')
591 patptr++;
592 continue;
593 } else if (c == '(')
594 patptr = group_end(patptr, patend, ')') + 1;
595 else if (c == '[')
596 patptr = group_end(patptr, patend, ']') + 1;
18529408 597 }
49704364
LW
598 Perl_croak(aTHX_ "No group ending character '%c' found in template",
599 ender);
600 return 0;
18529408
IZ
601}
602
49704364
LW
603
604/* Convert unsigned decimal number to binary.
605 * Expects a pointer to the first digit and address of length variable
606 * Advances char pointer to 1st non-digit char and returns number
fc241834 607 */
f7fe979e 608STATIC const char *
5aaab254 609S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
610{
611 I32 len = *patptr++ - '0';
7918f24d
NC
612
613 PERL_ARGS_ASSERT_GET_NUM;
614
49704364
LW
615 while (isDIGIT(*patptr)) {
616 if (len >= 0x7FFFFFFF/10)
617 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
618 len = (len * 10) + (*patptr++ - '0');
619 }
620 *lenptr = len;
621 return patptr;
622}
623
624/* The marvellous template parsing routine: Using state stored in *symptr,
625 * locates next template code and count
626 */
627STATIC bool
f337b084 628S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 629{
f7fe979e 630 const char* patptr = symptr->patptr;
0bcc34c2 631 const char* const patend = symptr->patend;
49704364 632
7918f24d
NC
633 PERL_ARGS_ASSERT_NEXT_SYMBOL;
634
49704364
LW
635 symptr->flags &= ~FLAG_SLASH;
636
637 while (patptr < patend) {
638 if (isSPACE(*patptr))
639 patptr++;
640 else if (*patptr == '#') {
641 patptr++;
642 while (patptr < patend && *patptr != '\n')
643 patptr++;
644 if (patptr < patend)
645 patptr++;
646 } else {
fc241834 647 /* We should have found a template code */
49704364 648 I32 code = *patptr++ & 0xFF;
66c611c5 649 U32 inherited_modifiers = 0;
49704364
LW
650
651 if (code == ','){ /* grandfather in commas but with a warning */
652 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
653 symptr->flags |= FLAG_COMMA;
654 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 655 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
656 }
657 continue;
658 }
fc241834 659
49704364 660 /* for '(', skip to ')' */
fc241834 661 if (code == '(') {
49704364
LW
662 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
663 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 664 _action( symptr ) );
49704364
LW
665 symptr->grpbeg = patptr;
666 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
667 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
668 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 669 _action( symptr ) );
49704364
LW
670 }
671
66c611c5
MHM
672 /* look for group modifiers to inherit */
673 if (TYPE_ENDIANNESS(symptr->flags)) {
674 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
675 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
676 }
677
1109a392
MHM
678 /* look for modifiers */
679 while (patptr < patend) {
b7787f18
AL
680 const char *allowed;
681 I32 modifier;
1109a392
MHM
682 switch (*patptr) {
683 case '!':
684 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 685 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
686 break;
687 case '>':
688 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 689 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
690 break;
691 case '<':
692 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 693 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
694 break;
695 default:
b7787f18
AL
696 allowed = "";
697 modifier = 0;
1109a392
MHM
698 break;
699 }
66c611c5 700
1109a392
MHM
701 if (modifier == 0)
702 break;
66c611c5 703
1109a392
MHM
704 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
705 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 706 allowed, _action( symptr ) );
66c611c5
MHM
707
708 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 709 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 710 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
711 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
712 TYPE_ENDIANNESS_MASK)
713 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 714 *patptr, _action( symptr ) );
66c611c5 715
a2a5de95
NC
716 if ((code & modifier)) {
717 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
718 "Duplicate modifier '%c' after '%c' in %s",
719 *patptr, (int) TYPE_NO_MODIFIERS(code),
720 _action( symptr ) );
1109a392 721 }
66c611c5 722
1109a392
MHM
723 code |= modifier;
724 patptr++;
49704364
LW
725 }
726
66c611c5
MHM
727 /* inherit modifiers */
728 code |= inherited_modifiers;
729
fc241834 730 /* look for count and/or / */
49704364
LW
731 if (patptr < patend) {
732 if (isDIGIT(*patptr)) {
733 patptr = get_num( patptr, &symptr->length );
734 symptr->howlen = e_number;
735
736 } else if (*patptr == '*') {
737 patptr++;
738 symptr->howlen = e_star;
739
740 } else if (*patptr == '[') {
f7fe979e 741 const char* lenptr = ++patptr;
49704364
LW
742 symptr->howlen = e_number;
743 patptr = group_end( patptr, patend, ']' ) + 1;
744 /* what kind of [] is it? */
745 if (isDIGIT(*lenptr)) {
746 lenptr = get_num( lenptr, &symptr->length );
747 if( *lenptr != ']' )
748 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 749 _action( symptr ) );
49704364
LW
750 } else {
751 tempsym_t savsym = *symptr;
752 symptr->patend = patptr-1;
753 symptr->patptr = lenptr;
754 savsym.length = measure_struct(symptr);
755 *symptr = savsym;
756 }
757 } else {
758 symptr->howlen = e_no_len;
759 symptr->length = 1;
760 }
761
762 /* try to find / */
763 while (patptr < patend) {
764 if (isSPACE(*patptr))
765 patptr++;
766 else if (*patptr == '#') {
767 patptr++;
768 while (patptr < patend && *patptr != '\n')
769 patptr++;
770 if (patptr < patend)
771 patptr++;
772 } else {
66c611c5 773 if (*patptr == '/') {
49704364
LW
774 symptr->flags |= FLAG_SLASH;
775 patptr++;
66c611c5
MHM
776 if (patptr < patend &&
777 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 778 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 779 _action( symptr ) );
49704364
LW
780 }
781 break;
782 }
18529408 783 }
49704364
LW
784 } else {
785 /* at end - no count, no / */
786 symptr->howlen = e_no_len;
787 symptr->length = 1;
788 }
789
790 symptr->code = code;
fc241834 791 symptr->patptr = patptr;
49704364 792 return TRUE;
18529408 793 }
49704364 794 }
fc241834 795 symptr->patptr = patptr;
49704364 796 return FALSE;
18529408
IZ
797}
798
18529408 799/*
fc241834 800 There is no way to cleanly handle the case where we should process the
08ca2aa3 801 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
802 (e.g. estimates like strend-s as an upper bound for the number of
803 characters left wouldn't work). So if we foresee the need of this
804 (pattern starts with U or contains U0), we want to work on the encoded
805 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
806 themselves if they need to do a lot of unpacks like this on it
807*/
fc241834 808STATIC bool
08ca2aa3
TH
809need_utf8(const char *pat, const char *patend)
810{
811 bool first = TRUE;
7918f24d
NC
812
813 PERL_ARGS_ASSERT_NEED_UTF8;
814
08ca2aa3
TH
815 while (pat < patend) {
816 if (pat[0] == '#') {
817 pat++;
f7fe979e 818 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
819 if (!pat) return FALSE;
820 } else if (pat[0] == 'U') {
821 if (first || pat[1] == '0') return TRUE;
822 } else first = FALSE;
823 pat++;
824 }
825 return FALSE;
826}
827
828STATIC char
829first_symbol(const char *pat, const char *patend) {
7918f24d
NC
830 PERL_ARGS_ASSERT_FIRST_SYMBOL;
831
08ca2aa3
TH
832 while (pat < patend) {
833 if (pat[0] != '#') return pat[0];
834 pat++;
f7fe979e 835 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
836 if (!pat) return 0;
837 pat++;
838 }
839 return 0;
840}
841
842/*
7accc089
JH
843=for apidoc unpackstring
844
21ebfc7a
DM
845The engine implementing the unpack() Perl function.
846
847Using the template pat..patend, this function unpacks the string
848s..strend into a number of mortal SVs, which it pushes onto the perl
849argument (@_) stack (so you will need to issue a C<PUTBACK> before and
850C<SPAGAIN> after the call to this function). It returns the number of
851pushed elements.
852
853The strend and patend pointers should point to the byte following the last
854character of each string.
855
856Although this function returns its values on the perl argument stack, it
857doesn't take any parameters from that stack (and thus in particular
858there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
859example).
7accc089
JH
860
861=cut */
862
863I32
f7fe979e 864Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 865{
f7fe979e 866 tempsym_t sym;
08ca2aa3 867
7918f24d
NC
868 PERL_ARGS_ASSERT_UNPACKSTRING;
869
f337b084 870 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
871 else if (need_utf8(pat, patend)) {
872 /* We probably should try to avoid this in case a scalar context call
873 wouldn't get to the "U0" */
874 STRLEN len = strend - s;
230e1fce 875 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
876 SAVEFREEPV(s);
877 strend = s + len;
f337b084 878 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
879 }
880
f337b084
TH
881 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
882 flags |= FLAG_PARSE_UTF8;
08ca2aa3 883
f7fe979e 884 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
885
886 return unpack_rec(&sym, s, s, strend, NULL );
887}
888
4136a0f7 889STATIC I32
f7fe979e 890S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 891{
27da23d5 892 dVAR; dSP;
3297d27d 893 SV *sv = NULL;
f7fe979e 894 const I32 start_sp_offset = SP - PL_stack_base;
49704364 895 howlen_t howlen;
a6ec74c1 896 I32 checksum = 0;
92d41999 897 UV cuv = 0;
a6ec74c1 898 NV cdouble = 0.0;
f337b084 899 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 900 bool beyond = FALSE;
21c16052 901 bool explicit_length;
9e27e96a 902 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 903 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
904
905 PERL_ARGS_ASSERT_UNPACK_REC;
906
28be1210 907 symptr->strbeg = s - strbeg;
49704364 908
49704364 909 while (next_symbol(symptr)) {
a7a3cfaa 910 packprops_t props;
9e27e96a 911 I32 len;
f337b084 912 I32 datumtype = symptr->code;
206947d2 913 /* do first one only unless in list context
08ca2aa3 914 / is implemented by unpacking the count, then popping it from the
206947d2 915 stack, so must check that we're not in the middle of a / */
49704364 916 if ( unpack_only_one
206947d2 917 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 918 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 919 break;
49704364 920
f337b084 921 switch (howlen = symptr->howlen) {
fc241834
RGS
922 case e_star:
923 len = strend - strbeg; /* long enough */
49704364 924 break;
f337b084
TH
925 default:
926 /* e_no_len and e_number */
927 len = symptr->length;
928 break;
49704364 929 }
18529408 930
21c16052 931 explicit_length = TRUE;
a6ec74c1 932 redo_switch:
49704364 933 beyond = s >= strend;
a7a3cfaa
TH
934
935 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
936 if (props) {
937 /* props nonzero means we can process this letter. */
9e27e96a
AL
938 const long size = props & PACK_SIZE_MASK;
939 const long howmany = (strend - s) / size;
a7a3cfaa
TH
940 if (len > howmany)
941 len = howmany;
942
943 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
944 if (len && unpack_only_one) len = 1;
945 EXTEND(SP, len);
946 EXTEND_MORTAL(len);
78d46eaa
NC
947 }
948 }
a7a3cfaa 949
1109a392 950 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 951 default:
1109a392 952 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 953
a6ec74c1 954 case '%':
49704364 955 if (howlen == e_no_len)
18529408 956 len = 16; /* len is not specified */
a6ec74c1 957 checksum = len;
92d41999 958 cuv = 0;
a6ec74c1 959 cdouble = 0;
18529408 960 continue;
a6ec74c1 961 break;
18529408
IZ
962 case '(':
963 {
49704364 964 tempsym_t savsym = *symptr;
9e27e96a 965 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 966 symptr->flags |= group_modifiers;
49704364 967 symptr->patend = savsym.grpend;
28be1210 968 symptr->previous = &savsym;
49704364 969 symptr->level++;
18529408 970 PUTBACK;
c6f750d1 971 if (len && unpack_only_one) len = 1;
18529408 972 while (len--) {
49704364 973 symptr->patptr = savsym.grpbeg;
f337b084
TH
974 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
975 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
976 unpack_rec(symptr, s, strbeg, strend, &s);
977 if (s == strend && savsym.howlen == e_star)
49704364 978 break; /* No way to continue */
18529408
IZ
979 }
980 SPAGAIN;
28be1210 981 savsym.flags = symptr->flags & ~group_modifiers;
49704364 982 *symptr = savsym;
18529408
IZ
983 break;
984 }
28be1210 985 case '.' | TYPE_IS_SHRIEKING:
28be1210 986 case '.': {
9e27e96a 987 const char *from;
28be1210 988 SV *sv;
9e27e96a 989 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
990 if (howlen == e_star) from = strbeg;
991 else if (len <= 0) from = s;
992 else {
993 tempsym_t *group = symptr;
994
995 while (--len && group) group = group->previous;
996 from = group ? strbeg + group->strbeg : strbeg;
997 }
998 sv = from <= s ?
00646304
CB
999 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1000 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1001 mXPUSHs(sv);
28be1210
TH
1002 break;
1003 }
28be1210 1004 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 1005 case '@':
28be1210 1006 s = strbeg + symptr->strbeg;
28be1210 1007 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 1008 {
08ca2aa3
TH
1009 while (len > 0) {
1010 if (s >= strend)
1011 Perl_croak(aTHX_ "'@' outside of string in unpack");
1012 s += UTF8SKIP(s);
1013 len--;
1014 }
1015 if (s > strend)
1016 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1017 } else {
28be1210 1018 if (strend-s < len)
fc241834 1019 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1020 s += len;
08ca2aa3 1021 }
a6ec74c1 1022 break;
62f95557
IZ
1023 case 'X' | TYPE_IS_SHRIEKING:
1024 if (!len) /* Avoid division by 0 */
1025 len = 1;
08ca2aa3 1026 if (utf8) {
f7fe979e 1027 const char *hop, *last;
f337b084
TH
1028 I32 l = len;
1029 hop = last = strbeg;
1030 while (hop < s) {
1031 hop += UTF8SKIP(hop);
1032 if (--l == 0) {
08ca2aa3 1033 last = hop;
f337b084
TH
1034 l = len;
1035 }
fc241834 1036 }
f337b084
TH
1037 if (last > s)
1038 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1039 s = last;
1040 break;
f337b084
TH
1041 }
1042 len = (s - strbeg) % len;
62f95557 1043 /* FALL THROUGH */
a6ec74c1 1044 case 'X':
08ca2aa3
TH
1045 if (utf8) {
1046 while (len > 0) {
1047 if (s <= strbeg)
1048 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1049 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1050 if (s <= strbeg)
1051 Perl_croak(aTHX_ "'X' outside of string in unpack");
1052 }
1053 len--;
1054 }
1055 } else {
fc241834
RGS
1056 if (len > s - strbeg)
1057 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1058 s -= len;
08ca2aa3 1059 }
a6ec74c1 1060 break;
9e27e96a
AL
1061 case 'x' | TYPE_IS_SHRIEKING: {
1062 I32 ai32;
62f95557
IZ
1063 if (!len) /* Avoid division by 0 */
1064 len = 1;
230e1fce
NC
1065 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1066 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1067 if (ai32 == 0) break;
1068 len -= ai32;
9e27e96a 1069 }
62f95557 1070 /* FALL THROUGH */
a6ec74c1 1071 case 'x':
08ca2aa3
TH
1072 if (utf8) {
1073 while (len>0) {
1074 if (s >= strend)
1075 Perl_croak(aTHX_ "'x' outside of string in unpack");
1076 s += UTF8SKIP(s);
1077 len--;
1078 }
1079 } else {
fc241834
RGS
1080 if (len > strend - s)
1081 Perl_croak(aTHX_ "'x' outside of string in unpack");
1082 s += len;
f337b084 1083 }
a6ec74c1
JH
1084 break;
1085 case '/':
49704364
LW
1086 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1087 break;
a6ec74c1
JH
1088 case 'A':
1089 case 'Z':
1090 case 'a':
08ca2aa3
TH
1091 if (checksum) {
1092 /* Preliminary length estimate is assumed done in 'W' */
1093 if (len > strend - s) len = strend - s;
1094 goto W_checksum;
1095 }
1096 if (utf8) {
1097 I32 l;
f7fe979e 1098 const char *hop;
08ca2aa3
TH
1099 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1100 if (hop >= strend) {
1101 if (hop > strend)
1102 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1103 break;
fc241834 1104 }
a6ec74c1 1105 }
08ca2aa3
TH
1106 if (hop > strend)
1107 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1108 len = hop - s;
1109 } else if (len > strend - s)
1110 len = strend - s;
1111
1112 if (datumtype == 'Z') {
1113 /* 'Z' strips stuff after first null */
f7fe979e 1114 const char *ptr, *end;
f337b084
TH
1115 end = s + len;
1116 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1117 sv = newSVpvn(s, ptr-s);
1118 if (howlen == e_star) /* exact for 'Z*' */
1119 len = ptr-s + (ptr != strend ? 1 : 0);
1120 } else if (datumtype == 'A') {
1121 /* 'A' strips both nulls and spaces */
f7fe979e 1122 const char *ptr;
18bdf90a
TH
1123 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1124 for (ptr = s+len-1; ptr >= s; ptr--)
1125 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1126 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1127 if (ptr >= s) ptr += UTF8SKIP(ptr);
1128 else ptr++;
28be1210 1129 if (ptr > s+len)
18bdf90a
TH
1130 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1131 } else {
1132 for (ptr = s+len-1; ptr >= s; ptr--)
1133 if (*ptr != 0 && !isSPACE(*ptr)) break;
1134 ptr++;
1135 }
08ca2aa3
TH
1136 sv = newSVpvn(s, ptr-s);
1137 } else sv = newSVpvn(s, len);
1138
1139 if (utf8) {
1140 SvUTF8_on(sv);
1141 /* Undo any upgrade done due to need_utf8() */
f337b084 1142 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1143 sv_utf8_downgrade(sv, 0);
a6ec74c1 1144 }
6e449a3a 1145 mXPUSHs(sv);
08ca2aa3 1146 s += len;
a6ec74c1
JH
1147 break;
1148 case 'B':
08ca2aa3
TH
1149 case 'b': {
1150 char *str;
49704364 1151 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1152 len = (strend - s) * 8;
1153 if (checksum) {
f337b084 1154 if (utf8)
08ca2aa3 1155 while (len >= 8 && s < strend) {
f337b084 1156 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1157 len -= 8;
1158 }
f337b084 1159 else
fc241834 1160 while (len >= 8) {
08ca2aa3 1161 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1162 len -= 8;
1163 }
08ca2aa3
TH
1164 if (len && s < strend) {
1165 U8 bits;
f337b084
TH
1166 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1167 if (datumtype == 'b')
a6ec74c1 1168 while (len-- > 0) {
92d41999 1169 if (bits & 1) cuv++;
a6ec74c1
JH
1170 bits >>= 1;
1171 }
f337b084 1172 else
a6ec74c1 1173 while (len-- > 0) {
08ca2aa3 1174 if (bits & 0x80) cuv++;
a6ec74c1
JH
1175 bits <<= 1;
1176 }
fc241834 1177 }
a6ec74c1
JH
1178 break;
1179 }
08ca2aa3 1180
561b68a9 1181 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1182 SvPOK_on(sv);
1183 str = SvPVX(sv);
1184 if (datumtype == 'b') {
f337b084 1185 U8 bits = 0;
f7fe979e 1186 const I32 ai32 = len;
08ca2aa3
TH
1187 for (len = 0; len < ai32; len++) {
1188 if (len & 7) bits >>= 1;
1189 else if (utf8) {
1190 if (s >= strend) break;
f337b084 1191 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1192 } else bits = *(U8 *) s++;
1193 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1194 }
08ca2aa3 1195 } else {
f337b084 1196 U8 bits = 0;
f7fe979e 1197 const I32 ai32 = len;
08ca2aa3
TH
1198 for (len = 0; len < ai32; len++) {
1199 if (len & 7) bits <<= 1;
1200 else if (utf8) {
1201 if (s >= strend) break;
f337b084 1202 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1203 } else bits = *(U8 *) s++;
1204 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1205 }
1206 }
1207 *str = '\0';
aa07b2f6 1208 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1209 XPUSHs(sv);
a6ec74c1 1210 break;
08ca2aa3 1211 }
a6ec74c1 1212 case 'H':
08ca2aa3 1213 case 'h': {
3297d27d 1214 char *str = NULL;
fc241834 1215 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1216 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1217 len = (strend - s) * 2;
858fe5e1
TC
1218 if (!checksum) {
1219 sv = sv_2mortal(newSV(len ? len : 1));
1220 SvPOK_on(sv);
1221 str = SvPVX(sv);
1222 }
a6ec74c1 1223 if (datumtype == 'h') {
f337b084 1224 U8 bits = 0;
9e27e96a 1225 I32 ai32 = len;
fc241834
RGS
1226 for (len = 0; len < ai32; len++) {
1227 if (len & 1) bits >>= 4;
1228 else if (utf8) {
1229 if (s >= strend) break;
f337b084 1230 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1231 } else bits = * (U8 *) s++;
858fe5e1
TC
1232 if (!checksum)
1233 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1234 }
08ca2aa3 1235 } else {
f337b084 1236 U8 bits = 0;
f7fe979e 1237 const I32 ai32 = len;
08ca2aa3
TH
1238 for (len = 0; len < ai32; len++) {
1239 if (len & 1) bits <<= 4;
1240 else if (utf8) {
1241 if (s >= strend) break;
f337b084 1242 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1243 } else bits = *(U8 *) s++;
858fe5e1
TC
1244 if (!checksum)
1245 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1246 }
1247 }
858fe5e1
TC
1248 if (!checksum) {
1249 *str = '\0';
1250 SvCUR_set(sv, str - SvPVX_const(sv));
1251 XPUSHs(sv);
1252 }
a6ec74c1 1253 break;
08ca2aa3 1254 }
1651fc44
ML
1255 case 'C':
1256 if (len == 0) {
1257 if (explicit_length)
1258 /* Switch to "character" mode */
1259 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1260 break;
1261 }
1262 /* FALL THROUGH */
a6ec74c1 1263 case 'c':
1651fc44
ML
1264 while (len-- > 0 && s < strend) {
1265 int aint;
1266 if (utf8)
1267 {
1268 STRLEN retlen;
1269 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1270 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1271 if (retlen == (STRLEN) -1 || retlen == 0)
1272 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1273 s += retlen;
1274 }
1275 else
1276 aint = *(U8 *)(s)++;
1277 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1278 aint -= 256;
08ca2aa3 1279 if (!checksum)
6e449a3a 1280 mPUSHi(aint);
73cb7263
NC
1281 else if (checksum > bits_in_uv)
1282 cdouble += (NV)aint;
1283 else
1284 cuv += aint;
a6ec74c1
JH
1285 }
1286 break;
08ca2aa3
TH
1287 case 'W':
1288 W_checksum:
1651fc44 1289 if (utf8) {
08ca2aa3 1290 while (len-- > 0 && s < strend) {
08ca2aa3 1291 STRLEN retlen;
f7fe979e 1292 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1293 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1294 if (retlen == (STRLEN) -1 || retlen == 0)
1295 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1296 s += retlen;
1297 if (!checksum)
6e449a3a 1298 mPUSHu(val);
08ca2aa3
TH
1299 else if (checksum > bits_in_uv)
1300 cdouble += (NV) val;
d6d3e8bd 1301 else
08ca2aa3 1302 cuv += val;
fc241834 1303 }
08ca2aa3 1304 } else if (!checksum)
a6ec74c1 1305 while (len-- > 0) {
f7fe979e 1306 const U8 ch = *(U8 *) s++;
6e449a3a 1307 mPUSHu(ch);
a6ec74c1 1308 }
08ca2aa3
TH
1309 else if (checksum > bits_in_uv)
1310 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1311 else
1312 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1313 break;
1314 case 'U':
35bcd338 1315 if (len == 0) {
c5333953 1316 if (explicit_length && howlen != e_star) {
08ca2aa3 1317 /* Switch to "bytes in UTF-8" mode */
f337b084 1318 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1319 else
1320 /* Should be impossible due to the need_utf8() test */
1321 Perl_croak(aTHX_ "U0 mode on a byte string");
1322 }
35bcd338
JH
1323 break;
1324 }
08ca2aa3 1325 if (len > strend - s) len = strend - s;
fc241834 1326 if (!checksum) {
08ca2aa3
TH
1327 if (len && unpack_only_one) len = 1;
1328 EXTEND(SP, len);
1329 EXTEND_MORTAL(len);
fc241834 1330 }
08ca2aa3
TH
1331 while (len-- > 0 && s < strend) {
1332 STRLEN retlen;
1333 UV auv;
1334 if (utf8) {
1335 U8 result[UTF8_MAXLEN];
f7fe979e 1336 const char *ptr = s;
08ca2aa3 1337 STRLEN len;
08ca2aa3
TH
1338 /* Bug: warns about bad utf8 even if we are short on bytes
1339 and will break out of the loop */
230e1fce
NC
1340 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1341 'U'))
08ca2aa3
TH
1342 break;
1343 len = UTF8SKIP(result);
fc241834 1344 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1345 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1346 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1347 s = ptr;
1348 } else {
cde84f2a 1349 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1350 if (retlen == (STRLEN) -1 || retlen == 0)
1351 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1352 s += retlen;
1353 }
1354 if (!checksum)
6e449a3a 1355 mPUSHu(auv);
73cb7263 1356 else if (checksum > bits_in_uv)
08ca2aa3 1357 cdouble += (NV) auv;
73cb7263 1358 else
08ca2aa3 1359 cuv += auv;
a6ec74c1
JH
1360 }
1361 break;
49704364
LW
1362 case 's' | TYPE_IS_SHRIEKING:
1363#if SHORTSIZE != SIZE16
73cb7263 1364 while (len-- > 0) {
08ca2aa3 1365 short ashort;
f337b084
TH
1366 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1367 DO_BO_UNPACK(ashort, s);
08ca2aa3 1368 if (!checksum)
6e449a3a 1369 mPUSHi(ashort);
73cb7263
NC
1370 else if (checksum > bits_in_uv)
1371 cdouble += (NV)ashort;
1372 else
1373 cuv += ashort;
49704364
LW
1374 }
1375 break;
1376#else
1377 /* Fallthrough! */
a6ec74c1 1378#endif
49704364 1379 case 's':
73cb7263 1380 while (len-- > 0) {
08ca2aa3
TH
1381 I16 ai16;
1382
1383#if U16SIZE > SIZE16
1384 ai16 = 0;
1385#endif
f337b084 1386 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1387 DO_BO_UNPACK(ai16, 16);
1109a392 1388#if U16SIZE > SIZE16
73cb7263
NC
1389 if (ai16 > 32767)
1390 ai16 -= 65536;
a6ec74c1 1391#endif
08ca2aa3 1392 if (!checksum)
6e449a3a 1393 mPUSHi(ai16);
73cb7263
NC
1394 else if (checksum > bits_in_uv)
1395 cdouble += (NV)ai16;
1396 else
1397 cuv += ai16;
a6ec74c1
JH
1398 }
1399 break;
49704364
LW
1400 case 'S' | TYPE_IS_SHRIEKING:
1401#if SHORTSIZE != SIZE16
73cb7263 1402 while (len-- > 0) {
08ca2aa3 1403 unsigned short aushort;
f337b084
TH
1404 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1405 DO_BO_UNPACK(aushort, s);
08ca2aa3 1406 if (!checksum)
6e449a3a 1407 mPUSHu(aushort);
73cb7263
NC
1408 else if (checksum > bits_in_uv)
1409 cdouble += (NV)aushort;
1410 else
1411 cuv += aushort;
49704364
LW
1412 }
1413 break;
1414#else
486ec47a 1415 /* Fallthrough! */
49704364 1416#endif
a6ec74c1
JH
1417 case 'v':
1418 case 'n':
1419 case 'S':
73cb7263 1420 while (len-- > 0) {
08ca2aa3
TH
1421 U16 au16;
1422#if U16SIZE > SIZE16
1423 au16 = 0;
1424#endif
f337b084 1425 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1426 DO_BO_UNPACK(au16, 16);
73cb7263
NC
1427 if (datumtype == 'n')
1428 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1429 if (datumtype == 'v')
1430 au16 = vtohs(au16);
08ca2aa3 1431 if (!checksum)
6e449a3a 1432 mPUSHu(au16);
73cb7263 1433 else if (checksum > bits_in_uv)
f337b084 1434 cdouble += (NV) au16;
73cb7263
NC
1435 else
1436 cuv += au16;
a6ec74c1
JH
1437 }
1438 break;
068bd2e7
MHM
1439 case 'v' | TYPE_IS_SHRIEKING:
1440 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1441 while (len-- > 0) {
08ca2aa3
TH
1442 I16 ai16;
1443# if U16SIZE > SIZE16
1444 ai16 = 0;
1445# endif
f337b084 1446 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1447 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1448 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1449 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1450 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1451 if (!checksum)
6e449a3a 1452 mPUSHi(ai16);
73cb7263 1453 else if (checksum > bits_in_uv)
08ca2aa3 1454 cdouble += (NV) ai16;
73cb7263
NC
1455 else
1456 cuv += ai16;
068bd2e7
MHM
1457 }
1458 break;
a6ec74c1 1459 case 'i':
49704364 1460 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1461 while (len-- > 0) {
08ca2aa3 1462 int aint;
f337b084
TH
1463 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1464 DO_BO_UNPACK(aint, i);
08ca2aa3 1465 if (!checksum)
6e449a3a 1466 mPUSHi(aint);
73cb7263
NC
1467 else if (checksum > bits_in_uv)
1468 cdouble += (NV)aint;
1469 else
1470 cuv += aint;
a6ec74c1
JH
1471 }
1472 break;
1473 case 'I':
49704364 1474 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1475 while (len-- > 0) {
08ca2aa3 1476 unsigned int auint;
f337b084
TH
1477 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1478 DO_BO_UNPACK(auint, i);
08ca2aa3 1479 if (!checksum)
6e449a3a 1480 mPUSHu(auint);
73cb7263
NC
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)auint;
1483 else
1484 cuv += auint;
a6ec74c1
JH
1485 }
1486 break;
92d41999 1487 case 'j':
73cb7263 1488 while (len-- > 0) {
08ca2aa3 1489 IV aiv;
f337b084 1490 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1491#if IVSIZE == INTSIZE
f337b084 1492 DO_BO_UNPACK(aiv, i);
1109a392 1493#elif IVSIZE == LONGSIZE
f337b084 1494 DO_BO_UNPACK(aiv, l);
1109a392 1495#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1496 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1497#else
1498 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1499#endif
08ca2aa3 1500 if (!checksum)
6e449a3a 1501 mPUSHi(aiv);
73cb7263
NC
1502 else if (checksum > bits_in_uv)
1503 cdouble += (NV)aiv;
1504 else
1505 cuv += aiv;
92d41999
JH
1506 }
1507 break;
1508 case 'J':
73cb7263 1509 while (len-- > 0) {
08ca2aa3 1510 UV auv;
f337b084 1511 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1512#if IVSIZE == INTSIZE
f337b084 1513 DO_BO_UNPACK(auv, i);
08ca2aa3 1514#elif IVSIZE == LONGSIZE
f337b084 1515 DO_BO_UNPACK(auv, l);
08ca2aa3 1516#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1517 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1518#else
1519 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1520#endif
08ca2aa3 1521 if (!checksum)
6e449a3a 1522 mPUSHu(auv);
73cb7263
NC
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV)auv;
1525 else
1526 cuv += auv;
92d41999
JH
1527 }
1528 break;
49704364
LW
1529 case 'l' | TYPE_IS_SHRIEKING:
1530#if LONGSIZE != SIZE32
73cb7263 1531 while (len-- > 0) {
08ca2aa3 1532 long along;
f337b084
TH
1533 SHIFT_VAR(utf8, s, strend, along, datumtype);
1534 DO_BO_UNPACK(along, l);
08ca2aa3 1535 if (!checksum)
6e449a3a 1536 mPUSHi(along);
73cb7263
NC
1537 else if (checksum > bits_in_uv)
1538 cdouble += (NV)along;
1539 else
1540 cuv += along;
49704364
LW
1541 }
1542 break;
1543#else
1544 /* Fallthrough! */
a6ec74c1 1545#endif
49704364 1546 case 'l':
73cb7263 1547 while (len-- > 0) {
08ca2aa3
TH
1548 I32 ai32;
1549#if U32SIZE > SIZE32
1550 ai32 = 0;
1551#endif
f337b084 1552 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1553 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1554#if U32SIZE > SIZE32
08ca2aa3 1555 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1556#endif
08ca2aa3 1557 if (!checksum)
6e449a3a 1558 mPUSHi(ai32);
73cb7263
NC
1559 else if (checksum > bits_in_uv)
1560 cdouble += (NV)ai32;
1561 else
1562 cuv += ai32;
a6ec74c1
JH
1563 }
1564 break;
49704364
LW
1565 case 'L' | TYPE_IS_SHRIEKING:
1566#if LONGSIZE != SIZE32
73cb7263 1567 while (len-- > 0) {
08ca2aa3 1568 unsigned long aulong;
f337b084
TH
1569 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1570 DO_BO_UNPACK(aulong, l);
08ca2aa3 1571 if (!checksum)
6e449a3a 1572 mPUSHu(aulong);
73cb7263
NC
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)aulong;
1575 else
1576 cuv += aulong;
49704364
LW
1577 }
1578 break;
1579#else
1580 /* Fall through! */
1581#endif
a6ec74c1
JH
1582 case 'V':
1583 case 'N':
1584 case 'L':
73cb7263 1585 while (len-- > 0) {
08ca2aa3
TH
1586 U32 au32;
1587#if U32SIZE > SIZE32
1588 au32 = 0;
1589#endif
f337b084 1590 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1591 DO_BO_UNPACK(au32, 32);
73cb7263
NC
1592 if (datumtype == 'N')
1593 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1594 if (datumtype == 'V')
1595 au32 = vtohl(au32);
08ca2aa3 1596 if (!checksum)
6e449a3a 1597 mPUSHu(au32);
fc241834
RGS
1598 else if (checksum > bits_in_uv)
1599 cdouble += (NV)au32;
1600 else
1601 cuv += au32;
a6ec74c1
JH
1602 }
1603 break;
068bd2e7
MHM
1604 case 'V' | TYPE_IS_SHRIEKING:
1605 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1606 while (len-- > 0) {
08ca2aa3 1607 I32 ai32;
f8e5a5db 1608#if U32SIZE > SIZE32
08ca2aa3 1609 ai32 = 0;
f8e5a5db 1610#endif
f337b084 1611 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263
NC
1612 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1613 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1614 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1615 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1616 if (!checksum)
6e449a3a 1617 mPUSHi(ai32);
73cb7263
NC
1618 else if (checksum > bits_in_uv)
1619 cdouble += (NV)ai32;
1620 else
1621 cuv += ai32;
068bd2e7
MHM
1622 }
1623 break;
a6ec74c1 1624 case 'p':
a6ec74c1 1625 while (len-- > 0) {
f7fe979e 1626 const char *aptr;
f337b084 1627 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
7e38f754 1628 DO_BO_UNPACK(aptr, pointer);
c4c5f44a 1629 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1630 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1631 }
1632 break;
1633 case 'w':
a6ec74c1
JH
1634 {
1635 UV auv = 0;
1636 U32 bytes = 0;
fc241834 1637
08ca2aa3
TH
1638 while (len > 0 && s < strend) {
1639 U8 ch;
f337b084 1640 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1641 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1642 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1643 if (ch < 0x80) {
a6ec74c1 1644 bytes = 0;
6e449a3a 1645 mPUSHu(auv);
a6ec74c1
JH
1646 len--;
1647 auv = 0;
08ca2aa3 1648 continue;
a6ec74c1 1649 }
08ca2aa3 1650 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1651 const char *t;
a6ec74c1 1652
f5992bc4 1653 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1654 while (s < strend) {
f337b084 1655 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1656 sv = mul128(sv, (U8)(ch & 0x7f));
1657 if (!(ch & 0x80)) {
a6ec74c1
JH
1658 bytes = 0;
1659 break;
1660 }
1661 }
10516c54 1662 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1663 while (*t == '0')
1664 t++;
1665 sv_chop(sv, t);
6e449a3a 1666 mPUSHs(sv);
a6ec74c1
JH
1667 len--;
1668 auv = 0;
1669 }
1670 }
1671 if ((s >= strend) && bytes)
49704364 1672 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1673 }
1674 break;
1675 case 'P':
49704364
LW
1676 if (symptr->howlen == e_star)
1677 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1678 EXTEND(SP, 1);
2d3e0934 1679 if (s + sizeof(char*) <= strend) {
08ca2aa3 1680 char *aptr;
f337b084 1681 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
7e38f754 1682 DO_BO_UNPACK(aptr, pointer);
fc241834 1683 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1684 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1685 }
a6ec74c1
JH
1686 break;
1687#ifdef HAS_QUAD
1688 case 'q':
73cb7263 1689 while (len-- > 0) {
08ca2aa3 1690 Quad_t aquad;
f337b084
TH
1691 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1692 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1693 if (!checksum)
6e449a3a
MHM
1694 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1695 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1696 else if (checksum > bits_in_uv)
1697 cdouble += (NV)aquad;
1698 else
1699 cuv += aquad;
1700 }
a6ec74c1
JH
1701 break;
1702 case 'Q':
73cb7263 1703 while (len-- > 0) {
08ca2aa3 1704 Uquad_t auquad;
f337b084
TH
1705 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1706 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1707 if (!checksum)
6e449a3a
MHM
1708 mPUSHs(auquad <= UV_MAX ?
1709 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1710 else if (checksum > bits_in_uv)
1711 cdouble += (NV)auquad;
1712 else
1713 cuv += auquad;
a6ec74c1
JH
1714 }
1715 break;
08ca2aa3 1716#endif /* HAS_QUAD */
a6ec74c1
JH
1717 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1718 case 'f':
73cb7263 1719 while (len-- > 0) {
08ca2aa3 1720 float afloat;
f337b084 1721 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
7e38f754 1722 DO_BO_UNPACK(afloat, float);
08ca2aa3 1723 if (!checksum)
6e449a3a 1724 mPUSHn(afloat);
08ca2aa3 1725 else
73cb7263 1726 cdouble += afloat;
fc241834 1727 }
a6ec74c1
JH
1728 break;
1729 case 'd':
73cb7263 1730 while (len-- > 0) {
08ca2aa3 1731 double adouble;
f337b084 1732 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
7e38f754 1733 DO_BO_UNPACK(adouble, double);
08ca2aa3 1734 if (!checksum)
6e449a3a 1735 mPUSHn(adouble);
08ca2aa3 1736 else
73cb7263 1737 cdouble += adouble;
fc241834 1738 }
a6ec74c1 1739 break;
92d41999 1740 case 'F':
73cb7263 1741 while (len-- > 0) {
275663fa
TC
1742 NV_bytes anv;
1743 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
7e38f754 1744 DO_BO_UNPACK(anv.nv, NV);
08ca2aa3 1745 if (!checksum)
275663fa 1746 mPUSHn(anv.nv);
08ca2aa3 1747 else
275663fa 1748 cdouble += anv.nv;
fc241834 1749 }
92d41999
JH
1750 break;
1751#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1752 case 'D':
73cb7263 1753 while (len-- > 0) {
275663fa
TC
1754 ld_bytes aldouble;
1755 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
7e38f754 1756 DO_BO_UNPACK(aldouble.ld, long double);
08ca2aa3 1757 if (!checksum)
275663fa 1758 mPUSHn(aldouble.ld);
08ca2aa3 1759 else
275663fa 1760 cdouble += aldouble.ld;
92d41999
JH
1761 }
1762 break;
1763#endif
a6ec74c1 1764 case 'u':
858fe5e1 1765 if (!checksum) {
f7fe979e 1766 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1767 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1768 if (l) SvPOK_on(sv);
1769 }
1770 if (utf8) {
1771 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1772 I32 a, b, c, d;
db187877 1773 char hunk[3];
08ca2aa3 1774
08ca2aa3
TH
1775 while (len > 0) {
1776 next_uni_uu(aTHX_ &s, strend, &a);
1777 next_uni_uu(aTHX_ &s, strend, &b);
1778 next_uni_uu(aTHX_ &s, strend, &c);
1779 next_uni_uu(aTHX_ &s, strend, &d);
1780 hunk[0] = (char)((a << 2) | (b >> 4));
1781 hunk[1] = (char)((b << 4) | (c >> 2));
1782 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1783 if (!checksum)
1784 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1785 len -= 3;
1786 }
1787 if (s < strend) {
f7fe979e
AL
1788 if (*s == '\n') {
1789 s++;
1790 }
08ca2aa3
TH
1791 else {
1792 /* possible checksum byte */
f7fe979e
AL
1793 const char *skip = s+UTF8SKIP(s);
1794 if (skip < strend && *skip == '\n')
1795 s = skip+1;
08ca2aa3
TH
1796 }
1797 }
1798 }
1799 } else {
fc241834
RGS
1800 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1801 I32 a, b, c, d;
db187877 1802 char hunk[3];
a6ec74c1 1803
fc241834
RGS
1804 len = PL_uudmap[*(U8*)s++] & 077;
1805 while (len > 0) {
1806 if (s < strend && ISUUCHAR(*s))
1807 a = PL_uudmap[*(U8*)s++] & 077;
1808 else
1809 a = 0;
1810 if (s < strend && ISUUCHAR(*s))
1811 b = PL_uudmap[*(U8*)s++] & 077;
1812 else
1813 b = 0;
1814 if (s < strend && ISUUCHAR(*s))
1815 c = PL_uudmap[*(U8*)s++] & 077;
1816 else
1817 c = 0;
1818 if (s < strend && ISUUCHAR(*s))
1819 d = PL_uudmap[*(U8*)s++] & 077;
1820 else
1821 d = 0;
1822 hunk[0] = (char)((a << 2) | (b >> 4));
1823 hunk[1] = (char)((b << 4) | (c >> 2));
1824 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1825 if (!checksum)
1826 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1827 len -= 3;
1828 }
1829 if (*s == '\n')
1830 s++;
1831 else /* possible checksum byte */
1832 if (s + 1 < strend && s[1] == '\n')
1833 s += 2;
a6ec74c1 1834 }
08ca2aa3 1835 }
858fe5e1
TC
1836 if (!checksum)
1837 XPUSHs(sv);
a6ec74c1
JH
1838 break;
1839 }
49704364 1840
a6ec74c1 1841 if (checksum) {
1109a392 1842 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1843 (checksum > bits_in_uv &&
08ca2aa3
TH
1844 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1845 NV trouble, anv;
a6ec74c1 1846
08ca2aa3 1847 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1848 while (checksum >= 16) {
1849 checksum -= 16;
08ca2aa3 1850 anv *= 65536.0;
a6ec74c1 1851 }
a6ec74c1 1852 while (cdouble < 0.0)
08ca2aa3
TH
1853 cdouble += anv;
1854 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1855 sv = newSVnv(cdouble);
a6ec74c1
JH
1856 }
1857 else {
fa8ec7c1
NC
1858 if (checksum < bits_in_uv) {
1859 UV mask = ((UV)1 << checksum) - 1;
92d41999 1860 cuv &= mask;
a6ec74c1 1861 }
c4c5f44a 1862 sv = newSVuv(cuv);
a6ec74c1 1863 }
6e449a3a 1864 mXPUSHs(sv);
a6ec74c1
JH
1865 checksum = 0;
1866 }
fc241834 1867
49704364
LW
1868 if (symptr->flags & FLAG_SLASH){
1869 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1870 break;
49704364
LW
1871 if( next_symbol(symptr) ){
1872 if( symptr->howlen == e_number )
1873 Perl_croak(aTHX_ "Count after length/code in unpack" );
1874 if( beyond ){
1875 /* ...end of char buffer then no decent length available */
1876 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1877 } else {
1878 /* take top of stack (hope it's numeric) */
1879 len = POPi;
1880 if( len < 0 )
1881 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1882 }
1883 } else {
1884 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1885 }
1886 datumtype = symptr->code;
21c16052 1887 explicit_length = FALSE;
49704364
LW
1888 goto redo_switch;
1889 }
a6ec74c1 1890 }
49704364 1891
18529408
IZ
1892 if (new_s)
1893 *new_s = s;
1894 PUTBACK;
1895 return SP - PL_stack_base - start_sp_offset;
1896}
1897
1898PP(pp_unpack)
1899{
97aff369 1900 dVAR;
18529408 1901 dSP;
bab9c0ac 1902 dPOPPOPssrl;
18529408
IZ
1903 I32 gimme = GIMME_V;
1904 STRLEN llen;
1905 STRLEN rlen;
5c144d81
NC
1906 const char *pat = SvPV_const(left, llen);
1907 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1908 const char *strend = s + rlen;
1909 const char *patend = pat + llen;
08ca2aa3 1910 I32 cnt;
18529408
IZ
1911
1912 PUTBACK;
7accc089 1913 cnt = unpackstring(pat, patend, s, strend,
49704364 1914 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1915 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1916
18529408
IZ
1917 SPAGAIN;
1918 if ( !cnt && gimme == G_SCALAR )
1919 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1920 RETURN;
1921}
1922
f337b084 1923STATIC U8 *
f7fe979e 1924doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1925{
f337b084 1926 *h++ = PL_uuemap[len];
a6ec74c1 1927 while (len > 2) {
f337b084
TH
1928 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1929 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1930 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1931 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1932 s += 3;
1933 len -= 3;
1934 }
1935 if (len > 0) {
f7fe979e 1936 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1937 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1938 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1939 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1940 *h++ = PL_uuemap[0];
a6ec74c1 1941 }
f337b084
TH
1942 *h++ = '\n';
1943 return h;
a6ec74c1
JH
1944}
1945
1946STATIC SV *
f7fe979e 1947S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1948{
8b6e33c7
AL
1949 SV *result = newSVpvn(s, l);
1950 char *const result_c = SvPV_nolen(result); /* convenience */
1951 char *out = result_c;
1952 bool skip = 1;
1953 bool ignore = 0;
a6ec74c1 1954
7918f24d
NC
1955 PERL_ARGS_ASSERT_IS_AN_INT;
1956
a6ec74c1
JH
1957 while (*s) {
1958 switch (*s) {
1959 case ' ':
1960 break;
1961 case '+':
1962 if (!skip) {
1963 SvREFCNT_dec(result);
1964 return (NULL);
1965 }
1966 break;
1967 case '0':
1968 case '1':
1969 case '2':
1970 case '3':
1971 case '4':
1972 case '5':
1973 case '6':
1974 case '7':
1975 case '8':
1976 case '9':
1977 skip = 0;
1978 if (!ignore) {
1979 *(out++) = *s;
1980 }
1981 break;
1982 case '.':
1983 ignore = 1;
1984 break;
1985 default:
1986 SvREFCNT_dec(result);
1987 return (NULL);
1988 }
1989 s++;
1990 }
1991 *(out++) = '\0';
1992 SvCUR_set(result, out - result_c);
1993 return (result);
1994}
1995
1996/* pnum must be '\0' terminated */
1997STATIC int
1998S_div128(pTHX_ SV *pnum, bool *done)
1999{
8b6e33c7
AL
2000 STRLEN len;
2001 char * const s = SvPV(pnum, len);
2002 char *t = s;
2003 int m = 0;
2004
7918f24d
NC
2005 PERL_ARGS_ASSERT_DIV128;
2006
8b6e33c7
AL
2007 *done = 1;
2008 while (*t) {
2009 const int i = m * 10 + (*t - '0');
2010 const int r = (i >> 7); /* r < 10 */
2011 m = i & 0x7F;
2012 if (r) {
2013 *done = 0;
2014 }
2015 *(t++) = '0' + r;
a6ec74c1 2016 }
8b6e33c7
AL
2017 *(t++) = '\0';
2018 SvCUR_set(pnum, (STRLEN) (t - s));
2019 return (m);
a6ec74c1
JH
2020}
2021
18529408 2022/*
7accc089
JH
2023=for apidoc packlist
2024
2025The engine implementing pack() Perl function.
2026
bfce84ec
AL
2027=cut
2028*/
7accc089
JH
2029
2030void
5aaab254 2031Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2032{
97aff369 2033 dVAR;
aadb217d
JH
2034 tempsym_t sym;
2035
7918f24d
NC
2036 PERL_ARGS_ASSERT_PACKLIST;
2037
f7fe979e 2038 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2039
f337b084
TH
2040 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2041 Also make sure any UTF8 flag is loaded */
56eb0262 2042 SvPV_force_nolen(cat);
bfce84ec
AL
2043 if (DO_UTF8(cat))
2044 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2045
49704364
LW
2046 (void)pack_rec( cat, &sym, beglist, endlist );
2047}
2048
f337b084
TH
2049/* like sv_utf8_upgrade, but also repoint the group start markers */
2050STATIC void
2051marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2052 STRLEN len;
2053 tempsym_t *group;
f7fe979e
AL
2054 const char *from_ptr, *from_start, *from_end, **marks, **m;
2055 char *to_start, *to_ptr;
f337b084
TH
2056
2057 if (SvUTF8(sv)) return;
2058
aa07b2f6 2059 from_start = SvPVX_const(sv);
f337b084
TH
2060 from_end = from_start + SvCUR(sv);
2061 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2062 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2063 if (from_ptr == from_end) {
2064 /* Simple case: no character needs to be changed */
2065 SvUTF8_on(sv);
2066 return;
2067 }
2068
3473cf63 2069 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2070 Newx(to_start, len, char);
f337b084
TH
2071 Copy(from_start, to_start, from_ptr-from_start, char);
2072 to_ptr = to_start + (from_ptr-from_start);
2073
a02a5408 2074 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2075 for (group=sym_ptr; group; group = group->previous)
2076 marks[group->level] = from_start + group->strbeg;
2077 marks[sym_ptr->level+1] = from_end+1;
2078 for (m = marks; *m < from_ptr; m++)
2079 *m = to_start + (*m-from_start);
2080
2081 for (;from_ptr < from_end; from_ptr++) {
2082 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2083 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2084 }
2085 *to_ptr = 0;
2086
2087 while (*m == from_ptr) *m++ = to_ptr;
2088 if (m != marks + sym_ptr->level+1) {
2089 Safefree(marks);
2090 Safefree(to_start);
5637ef5b
NC
2091 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2092 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2093 }
2094 for (group=sym_ptr; group; group = group->previous)
2095 group->strbeg = marks[group->level] - to_start;
2096 Safefree(marks);
2097
2098 if (SvOOK(sv)) {
2099 if (SvIVX(sv)) {
b162af07 2100 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2101 from_start -= SvIVX(sv);
2102 SvIV_set(sv, 0);
2103 }
2104 SvFLAGS(sv) &= ~SVf_OOK;
2105 }
2106 if (SvLEN(sv) != 0)
2107 Safefree(from_start);
f880fe2f 2108 SvPV_set(sv, to_start);
b162af07
SP
2109 SvCUR_set(sv, to_ptr - to_start);
2110 SvLEN_set(sv, len);
f337b084
TH
2111 SvUTF8_on(sv);
2112}
2113
2114/* Exponential string grower. Makes string extension effectively O(n)
2115 needed says how many extra bytes we need (not counting the final '\0')
2116 Only grows the string if there is an actual lack of space
2117*/
2118STATIC char *
0bd48802 2119S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2120 const STRLEN cur = SvCUR(sv);
2121 const STRLEN len = SvLEN(sv);
f337b084 2122 STRLEN extend;
7918f24d
NC
2123
2124 PERL_ARGS_ASSERT_SV_EXP_GROW;
2125
f337b084
TH
2126 if (len - cur > needed) return SvPVX(sv);
2127 extend = needed > len ? needed : len;
2128 return SvGROW(sv, len+extend+1);
2129}
49704364
LW
2130
2131STATIC
2132SV **
f337b084 2133S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2134{
97aff369 2135 dVAR;
49704364 2136 tempsym_t lookahead;
f337b084
TH
2137 I32 items = endlist - beglist;
2138 bool found = next_symbol(symptr);
2139 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2140 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2141
7918f24d
NC
2142 PERL_ARGS_ASSERT_PACK_REC;
2143
f337b084
TH
2144 if (symptr->level == 0 && found && symptr->code == 'U') {
2145 marked_upgrade(aTHX_ cat, symptr);
2146 symptr->flags |= FLAG_DO_UTF8;
2147 utf8 = 0;
49704364 2148 }
f337b084 2149 symptr->strbeg = SvCUR(cat);
49704364
LW
2150
2151 while (found) {
f337b084
TH
2152 SV *fromstr;
2153 STRLEN fromlen;
2154 I32 len;
a0714e2c 2155 SV *lengthcode = NULL;
49704364 2156 I32 datumtype = symptr->code;
f337b084
TH
2157 howlen_t howlen = symptr->howlen;
2158 char *start = SvPVX(cat);
2159 char *cur = start + SvCUR(cat);
49704364 2160
f337b084
TH
2161#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2162
2163 switch (howlen) {
fc241834 2164 case e_star:
f337b084
TH
2165 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2166 0 : items;
2167 break;
2168 default:
2169 /* e_no_len and e_number */
2170 len = symptr->length;
49704364
LW
2171 break;
2172 }
2173
f337b084 2174 if (len) {
a7a3cfaa 2175 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2176
a7a3cfaa
TH
2177 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2178 /* We can process this letter. */
2179 STRLEN size = props & PACK_SIZE_MASK;
2180 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2181 }
f337b084
TH
2182 }
2183
49704364
LW
2184 /* Look ahead for next symbol. Do we have code/code? */
2185 lookahead = *symptr;
2186 found = next_symbol(&lookahead);
246f24af
TH
2187 if (symptr->flags & FLAG_SLASH) {
2188 IV count;
f337b084 2189 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2190 if (strchr("aAZ", lookahead.code)) {
2191 if (lookahead.howlen == e_number) count = lookahead.length;
2192 else {
ce399ba6 2193 if (items > 0) {
48a5da33 2194 count = sv_len_utf8(*beglist);
ce399ba6 2195 }
246f24af
TH
2196 else count = 0;
2197 if (lookahead.code == 'Z') count++;
2198 }
2199 } else {
2200 if (lookahead.howlen == e_number && lookahead.length < items)
2201 count = lookahead.length;
2202 else count = items;
2203 }
2204 lookahead.howlen = e_number;
2205 lookahead.length = count;
2206 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2207 }
49704364 2208
fc241834
RGS
2209 /* Code inside the switch must take care to properly update
2210 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2211 doesn't simply leave using break */
1109a392 2212 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2213 default:
f337b084
TH
2214 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2215 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2216 case '%':
49704364 2217 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2218 {
2219 char *from;
28be1210 2220 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2221 case '.':
2222 if (howlen == e_star) from = start;
2223 else if (len == 0) from = cur;
2224 else {
2225 tempsym_t *group = symptr;
2226
2227 while (--len && group) group = group->previous;
2228 from = group ? start + group->strbeg : start;
2229 }
2230 fromstr = NEXTFROM;
2231 len = SvIV(fromstr);
2232 goto resize;
28be1210 2233 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2234 case '@':
28be1210
TH
2235 from = start + symptr->strbeg;
2236 resize:
28be1210 2237 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2238 if (len >= 0) {
2239 while (len && from < cur) {
2240 from += UTF8SKIP(from);
2241 len--;
2242 }
2243 if (from > cur)
2244 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2245 if (len) {
2246 /* Here we know from == cur */
2247 grow:
2248 GROWING(0, cat, start, cur, len);
2249 Zero(cur, len, char);
2250 cur += len;
2251 } else if (from < cur) {
2252 len = cur - from;
2253 goto shrink;
2254 } else goto no_change;
2255 } else {
2256 cur = from;
2257 len = -len;
2258 goto utf8_shrink;
f337b084 2259 }
28be1210
TH
2260 else {
2261 len -= cur - from;
f337b084 2262 if (len > 0) goto grow;
28be1210 2263 if (len == 0) goto no_change;
fc241834 2264 len = -len;
28be1210 2265 goto shrink;
f337b084 2266 }
a6ec74c1 2267 break;
28be1210 2268 }
fc241834 2269 case '(': {
49704364 2270 tempsym_t savsym = *symptr;
66c611c5
MHM
2271 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2272 symptr->flags |= group_modifiers;
49704364
LW
2273 symptr->patend = savsym.grpend;
2274 symptr->level++;
f337b084 2275 symptr->previous = &lookahead;
18529408 2276 while (len--) {
f337b084
TH
2277 U32 was_utf8;
2278 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2279 else symptr->flags &= ~FLAG_PARSE_UTF8;
2280 was_utf8 = SvUTF8(cat);
49704364 2281 symptr->patptr = savsym.grpbeg;
f337b084
TH
2282 beglist = pack_rec(cat, symptr, beglist, endlist);
2283 if (SvUTF8(cat) != was_utf8)
2284 /* This had better be an upgrade while in utf8==0 mode */
2285 utf8 = 1;
2286
49704364 2287 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2288 break; /* No way to continue */
2289 }
ee790063 2290 items = endlist - beglist;
f337b084
TH
2291 lookahead.flags = symptr->flags & ~group_modifiers;
2292 goto no_change;
18529408 2293 }
62f95557
IZ
2294 case 'X' | TYPE_IS_SHRIEKING:
2295 if (!len) /* Avoid division by 0 */
2296 len = 1;
f337b084
TH
2297 if (utf8) {
2298 char *hop, *last;
2299 I32 l = len;
2300 hop = last = start;
2301 while (hop < cur) {
2302 hop += UTF8SKIP(hop);
2303 if (--l == 0) {
2304 last = hop;
2305 l = len;
2306 }
2307 }
2308 if (last > cur)
2309 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2310 cur = last;
2311 break;
2312 }
2313 len = (cur-start) % len;
62f95557 2314 /* FALL THROUGH */
a6ec74c1 2315 case 'X':
f337b084
TH
2316 if (utf8) {
2317 if (len < 1) goto no_change;
28be1210 2318 utf8_shrink:
f337b084
TH
2319 while (len > 0) {
2320 if (cur <= start)
28be1210
TH
2321 Perl_croak(aTHX_ "'%c' outside of string in pack",
2322 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2323 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2324 if (cur <= start)
28be1210
TH
2325 Perl_croak(aTHX_ "'%c' outside of string in pack",
2326 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2327 }
2328 len--;
2329 }
2330 } else {
fc241834 2331 shrink:
f337b084 2332 if (cur - start < len)
28be1210
TH
2333 Perl_croak(aTHX_ "'%c' outside of string in pack",
2334 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2335 cur -= len;
2336 }
2337 if (cur < start+symptr->strbeg) {
2338 /* Make sure group starts don't point into the void */
2339 tempsym_t *group;
9e27e96a 2340 const STRLEN length = cur-start;
f337b084
TH
2341 for (group = symptr;
2342 group && length < group->strbeg;
2343 group = group->previous) group->strbeg = length;
2344 lookahead.strbeg = length;
2345 }
a6ec74c1 2346 break;
fc241834
RGS
2347 case 'x' | TYPE_IS_SHRIEKING: {
2348 I32 ai32;
62f95557
IZ
2349 if (!len) /* Avoid division by 0 */
2350 len = 1;
230e1fce 2351 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2352 else ai32 = (cur - start) % len;
2353 if (ai32 == 0) goto no_change;
2354 len -= ai32;
2355 }
2356 /* FALL THROUGH */
a6ec74c1 2357 case 'x':
f337b084 2358 goto grow;
a6ec74c1
JH
2359 case 'A':
2360 case 'Z':
f337b084 2361 case 'a': {
f7fe979e 2362 const char *aptr;
f337b084 2363
a6ec74c1 2364 fromstr = NEXTFROM;
e62f0680 2365 aptr = SvPV_const(fromstr, fromlen);
f337b084 2366 if (DO_UTF8(fromstr)) {
f7fe979e 2367 const char *end, *s;
f337b084
TH
2368
2369 if (!utf8 && !SvUTF8(cat)) {
2370 marked_upgrade(aTHX_ cat, symptr);
2371 lookahead.flags |= FLAG_DO_UTF8;
2372 lookahead.strbeg = symptr->strbeg;
2373 utf8 = 1;
2374 start = SvPVX(cat);
2375 cur = start + SvCUR(cat);
2376 }
fc241834 2377 if (howlen == e_star) {
f337b084
TH
2378 if (utf8) goto string_copy;
2379 len = fromlen+1;
2380 }
2381 s = aptr;
2382 end = aptr + fromlen;
2383 fromlen = datumtype == 'Z' ? len-1 : len;
2384 while ((I32) fromlen > 0 && s < end) {
2385 s += UTF8SKIP(s);
2386 fromlen--;
2387 }
2388 if (s > end)
2389 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2390 if (utf8) {
fc241834 2391 len = fromlen;
f337b084
TH
2392 if (datumtype == 'Z') len++;
2393 fromlen = s-aptr;
2394 len += fromlen;
fc241834 2395
f337b084 2396 goto string_copy;
fc241834 2397 }
f337b084
TH
2398 fromlen = len - fromlen;
2399 if (datumtype == 'Z') fromlen--;
2400 if (howlen == e_star) {
2401 len = fromlen;
2402 if (datumtype == 'Z') len++;
fc241834 2403 }
f337b084 2404 GROWING(0, cat, start, cur, len);
fc241834 2405 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2406 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2407 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2408 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2409 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2410 cur += fromlen;
a6ec74c1 2411 len -= fromlen;
f337b084
TH
2412 } else if (utf8) {
2413 if (howlen == e_star) {
2414 len = fromlen;
2415 if (datumtype == 'Z') len++;
a6ec74c1 2416 }
f337b084
TH
2417 if (len <= (I32) fromlen) {
2418 fromlen = len;
2419 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2420 }
fc241834 2421 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2422 upgrade, so:
2423 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2424 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2425 len -= fromlen;
2426 while (fromlen > 0) {
230e1fce 2427 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2428 aptr++;
2429 fromlen--;
fc241834 2430 }
f337b084
TH
2431 } else {
2432 string_copy:
2433 if (howlen == e_star) {
2434 len = fromlen;
2435 if (datumtype == 'Z') len++;
2436 }
2437 if (len <= (I32) fromlen) {
2438 fromlen = len;
2439 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2440 }
f337b084
TH
2441 GROWING(0, cat, start, cur, len);
2442 Copy(aptr, cur, fromlen, char);
2443 cur += fromlen;
2444 len -= fromlen;
a6ec74c1 2445 }
f337b084
TH
2446 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2447 cur += len;
3c4fb04a 2448 SvTAINT(cat);
a6ec74c1 2449 break;
f337b084 2450 }
a6ec74c1 2451 case 'B':
f337b084 2452 case 'b': {
b83604b4 2453 const char *str, *end;
f337b084
TH
2454 I32 l, field_len;
2455 U8 bits;
2456 bool utf8_source;
2457 U32 utf8_flags;
a6ec74c1 2458
fc241834 2459 fromstr = NEXTFROM;
b83604b4 2460 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2461 end = str + fromlen;
2462 if (DO_UTF8(fromstr)) {
2463 utf8_source = TRUE;
041457d9 2464 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2465 } else {
2466 utf8_source = FALSE;
2467 utf8_flags = 0; /* Unused, but keep compilers happy */
2468 }
2469 if (howlen == e_star) len = fromlen;
2470 field_len = (len+7)/8;
2471 GROWING(utf8, cat, start, cur, field_len);
2472 if (len > (I32)fromlen) len = fromlen;
2473 bits = 0;
2474 l = 0;
2475 if (datumtype == 'B')
2476 while (l++ < len) {
2477 if (utf8_source) {
95b63a38 2478 UV val = 0;
f337b084
TH
2479 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2480 bits |= val & 1;
2481 } else bits |= *str++ & 1;
2482 if (l & 7) bits <<= 1;
fc241834 2483 else {
f337b084
TH
2484 PUSH_BYTE(utf8, cur, bits);
2485 bits = 0;
a6ec74c1
JH
2486 }
2487 }
f337b084
TH
2488 else
2489 /* datumtype == 'b' */
2490 while (l++ < len) {
2491 if (utf8_source) {
95b63a38 2492 UV val = 0;
f337b084
TH
2493 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2494 if (val & 1) bits |= 0x80;
2495 } else if (*str++ & 1)
2496 bits |= 0x80;
2497 if (l & 7) bits >>= 1;
fc241834 2498 else {
f337b084
TH
2499 PUSH_BYTE(utf8, cur, bits);
2500 bits = 0;
a6ec74c1
JH
2501 }
2502 }
f337b084
TH
2503 l--;
2504 if (l & 7) {
fc241834 2505 if (datumtype == 'B')
f337b084 2506 bits <<= 7 - (l & 7);
fc241834 2507 else
f337b084
TH
2508 bits >>= 7 - (l & 7);
2509 PUSH_BYTE(utf8, cur, bits);
2510 l += 7;
a6ec74c1 2511 }
f337b084
TH
2512 /* Determine how many chars are left in the requested field */
2513 l /= 8;
2514 if (howlen == e_star) field_len = 0;
2515 else field_len -= l;
2516 Zero(cur, field_len, char);
2517 cur += field_len;
a6ec74c1 2518 break;
f337b084 2519 }
a6ec74c1 2520 case 'H':
f337b084 2521 case 'h': {
10516c54 2522 const char *str, *end;
f337b084
TH
2523 I32 l, field_len;
2524 U8 bits;
2525 bool utf8_source;
2526 U32 utf8_flags;
a6ec74c1 2527
fc241834 2528 fromstr = NEXTFROM;
10516c54 2529 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2530 end = str + fromlen;
2531 if (DO_UTF8(fromstr)) {
2532 utf8_source = TRUE;
041457d9 2533 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2534 } else {
2535 utf8_source = FALSE;
2536 utf8_flags = 0; /* Unused, but keep compilers happy */
2537 }
2538 if (howlen == e_star) len = fromlen;
2539 field_len = (len+1)/2;
2540 GROWING(utf8, cat, start, cur, field_len);
2541 if (!utf8 && len > (I32)fromlen) len = fromlen;
2542 bits = 0;
2543 l = 0;
2544 if (datumtype == 'H')
2545 while (l++ < len) {
2546 if (utf8_source) {
95b63a38 2547 UV val = 0;
f337b084
TH
2548 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2549 if (val < 256 && isALPHA(val))
2550 bits |= (val + 9) & 0xf;
a6ec74c1 2551 else
f337b084
TH
2552 bits |= val & 0xf;
2553 } else if (isALPHA(*str))
2554 bits |= (*str++ + 9) & 0xf;
2555 else
2556 bits |= *str++ & 0xf;
2557 if (l & 1) bits <<= 4;
fc241834 2558 else {
f337b084
TH
2559 PUSH_BYTE(utf8, cur, bits);
2560 bits = 0;
a6ec74c1
JH
2561 }
2562 }
f337b084
TH
2563 else
2564 while (l++ < len) {
2565 if (utf8_source) {
95b63a38 2566 UV val = 0;
f337b084
TH
2567 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2568 if (val < 256 && isALPHA(val))
2569 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2570 else
f337b084
TH
2571 bits |= (val & 0xf) << 4;
2572 } else if (isALPHA(*str))
2573 bits |= ((*str++ + 9) & 0xf) << 4;
2574 else
2575 bits |= (*str++ & 0xf) << 4;
2576 if (l & 1) bits >>= 4;
fc241834 2577 else {
f337b084
TH
2578 PUSH_BYTE(utf8, cur, bits);
2579 bits = 0;
a6ec74c1 2580 }
fc241834 2581 }
f337b084
TH
2582 l--;
2583 if (l & 1) {
2584 PUSH_BYTE(utf8, cur, bits);
2585 l++;
2586 }
2587 /* Determine how many chars are left in the requested field */
2588 l /= 2;
2589 if (howlen == e_star) field_len = 0;
2590 else field_len -= l;
2591 Zero(cur, field_len, char);
2592 cur += field_len;
2593 break;
fc241834
RGS
2594 }
2595 case 'c':
f337b084
TH
2596 while (len-- > 0) {
2597 IV aiv;
2598 fromstr = NEXTFROM;
2599 aiv = SvIV(fromstr);
a2a5de95
NC
2600 if ((-128 > aiv || aiv > 127))
2601 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2602 "Character in 'c' format wrapped in pack");
585ec06d 2603 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2604 }
2605 break;
2606 case 'C':
f337b084
TH
2607 if (len == 0) {
2608 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2609 break;
2610 }
a6ec74c1 2611 while (len-- > 0) {
f337b084 2612 IV aiv;
a6ec74c1 2613 fromstr = NEXTFROM;
f337b084 2614 aiv = SvIV(fromstr);
a2a5de95
NC
2615 if ((0 > aiv || aiv > 0xff))
2616 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2617 "Character in 'C' format wrapped in pack");
1651fc44 2618 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2619 }
fc241834
RGS
2620 break;
2621 case 'W': {
2622 char *end;
670f1322 2623 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2624
2625 end = start+SvLEN(cat)-1;
2626 if (utf8) end -= UTF8_MAXLEN-1;
2627 while (len-- > 0) {
2628 UV auv;
2629 fromstr = NEXTFROM;
2630 auv = SvUV(fromstr);
2631 if (in_bytes) auv = auv % 0x100;
2632 if (utf8) {
2633 W_utf8:
2634 if (cur > end) {
2635 *cur = '\0';
b162af07 2636 SvCUR_set(cat, cur - start);
fc241834
RGS
2637
2638 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2639 end = start+SvLEN(cat)-UTF8_MAXLEN;
2640 }
230e1fce
NC
2641 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2642 NATIVE_TO_UNI(auv),
041457d9 2643 warn_utf8 ?
230e1fce 2644 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2645 } else {
2646 if (auv >= 0x100) {
2647 if (!SvUTF8(cat)) {
2648 *cur = '\0';
b162af07 2649 SvCUR_set(cat, cur - start);
fc241834
RGS
2650 marked_upgrade(aTHX_ cat, symptr);
2651 lookahead.flags |= FLAG_DO_UTF8;
2652 lookahead.strbeg = symptr->strbeg;
2653 utf8 = 1;
2654 start = SvPVX(cat);
2655 cur = start + SvCUR(cat);
2656 end = start+SvLEN(cat)-UTF8_MAXLEN;
2657 goto W_utf8;
2658 }
a2a5de95
NC
2659 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2660 "Character in 'W' format wrapped in pack");
fc241834
RGS
2661 auv &= 0xff;
2662 }
2663 if (cur >= end) {
2664 *cur = '\0';
b162af07 2665 SvCUR_set(cat, cur - start);
fc241834
RGS
2666 GROWING(0, cat, start, cur, len+1);
2667 end = start+SvLEN(cat)-1;
2668 }
fe2774ed 2669 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2670 }
2671 }
2672 break;
fc241834
RGS
2673 }
2674 case 'U': {
2675 char *end;
2676
2677 if (len == 0) {
2678 if (!(symptr->flags & FLAG_DO_UTF8)) {
2679 marked_upgrade(aTHX_ cat, symptr);
2680 lookahead.flags |= FLAG_DO_UTF8;
2681 lookahead.strbeg = symptr->strbeg;
2682 }
2683 utf8 = 0;
2684 goto no_change;
2685 }
2686
2687 end = start+SvLEN(cat);
2688 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2689 while (len-- > 0) {
fc241834 2690 UV auv;
a6ec74c1 2691 fromstr = NEXTFROM;
fc241834
RGS
2692 auv = SvUV(fromstr);
2693 if (utf8) {
230e1fce 2694 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2695 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2696 warn_utf8 ?
fc241834
RGS
2697 0 : UNICODE_ALLOW_ANY);
2698 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2699 *cur = '\0';
b162af07 2700 SvCUR_set(cat, cur - start);
fc241834
RGS
2701 GROWING(0, cat, start, cur,
2702 len+(endb-buffer)*UTF8_EXPAND);
2703 end = start+SvLEN(cat);
2704 }
64844641 2705 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2706 } else {
2707 if (cur >= end) {
2708 *cur = '\0';
b162af07 2709 SvCUR_set(cat, cur - start);
fc241834
RGS
2710 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2711 end = start+SvLEN(cat)-UTF8_MAXLEN;
2712 }
230e1fce 2713 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2714 warn_utf8 ?
230e1fce 2715 0 : UNICODE_ALLOW_ANY);
fc241834 2716 }
a6ec74c1 2717 }
a6ec74c1 2718 break;
fc241834 2719 }
a6ec74c1
JH
2720 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2721 case 'f':
a6ec74c1 2722 while (len-- > 0) {
f337b084
TH
2723 float afloat;
2724 NV anv;
a6ec74c1 2725 fromstr = NEXTFROM;
f337b084 2726 anv = SvNV(fromstr);
85bba25f 2727# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2728 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2729 * on Alpha; fake it if we don't have them.
2730 */
f337b084 2731 if (anv > FLT_MAX)
fc241834 2732 afloat = FLT_MAX;
f337b084 2733 else if (anv < -FLT_MAX)
fc241834 2734 afloat = -FLT_MAX;
f337b084 2735 else afloat = (float)anv;
baf3cf9c 2736# else
f337b084 2737 afloat = (float)anv;
baf3cf9c 2738# endif
7e38f754 2739 DO_BO_PACK(afloat, float);
f337b084 2740 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2741 }
2742 break;
2743 case 'd':
a6ec74c1 2744 while (len-- > 0) {
f337b084
TH
2745 double adouble;
2746 NV anv;
a6ec74c1 2747 fromstr = NEXTFROM;
f337b084 2748 anv = SvNV(fromstr);
85bba25f 2749# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2750 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2751 * on Alpha; fake it if we don't have them.
2752 */
f337b084 2753 if (anv > DBL_MAX)
fc241834 2754 adouble = DBL_MAX;
f337b084 2755 else if (anv < -DBL_MAX)
fc241834 2756 adouble = -DBL_MAX;
f337b084 2757 else adouble = (double)anv;
baf3cf9c 2758# else
f337b084 2759 adouble = (double)anv;
baf3cf9c 2760# endif
7e38f754 2761 DO_BO_PACK(adouble, double);
f337b084 2762 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2763 }
2764 break;
fc241834 2765 case 'F': {
275663fa 2766 NV_bytes anv;
1109a392 2767 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2768 while (len-- > 0) {
2769 fromstr = NEXTFROM;
cd07c537
DM
2770#ifdef __GNUC__
2771 /* to work round a gcc/x86 bug; don't use SvNV */
2772 anv.nv = sv_2nv(fromstr);
2773#else
275663fa 2774 anv.nv = SvNV(fromstr);
cd07c537 2775#endif
7e38f754 2776 DO_BO_PACK(anv, NV);
275663fa 2777 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2778 }
2779 break;
fc241834 2780 }
92d41999 2781#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2782 case 'D': {
275663fa 2783 ld_bytes aldouble;
1109a392
MHM
2784 /* long doubles can have unused bits, which may be nonzero */
2785 Zero(&aldouble, 1, long double);
92d41999
JH
2786 while (len-- > 0) {
2787 fromstr = NEXTFROM;
cd07c537
DM
2788# ifdef __GNUC__
2789 /* to work round a gcc/x86 bug; don't use SvNV */
2790 aldouble.ld = (long double)sv_2nv(fromstr);
2791# else
275663fa 2792 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2793# endif
7e38f754 2794 DO_BO_PACK(aldouble, long double);
275663fa 2795 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2796 }
2797 break;
fc241834 2798 }
92d41999 2799#endif
068bd2e7 2800 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2801 case 'n':
2802 while (len-- > 0) {
f337b084 2803 I16 ai16;
a6ec74c1 2804 fromstr = NEXTFROM;
ef108786 2805 ai16 = (I16)SvIV(fromstr);
ef108786 2806 ai16 = PerlSock_htons(ai16);
f337b084 2807 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2808 }
2809 break;
068bd2e7 2810 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2811 case 'v':
2812 while (len-- > 0) {
f337b084 2813 I16 ai16;
a6ec74c1 2814 fromstr = NEXTFROM;
ef108786 2815 ai16 = (I16)SvIV(fromstr);
ef108786 2816 ai16 = htovs(ai16);
f337b084 2817 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2818 }
2819 break;
49704364 2820 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2821#if SHORTSIZE != SIZE16
fc241834 2822 while (len-- > 0) {
f337b084 2823 unsigned short aushort;
fc241834
RGS
2824 fromstr = NEXTFROM;
2825 aushort = SvUV(fromstr);
2826 DO_BO_PACK(aushort, s);
f337b084 2827 PUSH_VAR(utf8, cur, aushort);
fc241834 2828 }
49704364
LW
2829 break;
2830#else
2831 /* Fall through! */
a6ec74c1 2832#endif
49704364 2833 case 'S':
fc241834 2834 while (len-- > 0) {
f337b084 2835 U16 au16;
fc241834
RGS
2836 fromstr = NEXTFROM;
2837 au16 = (U16)SvUV(fromstr);
2838 DO_BO_PACK(au16, 16);
f337b084 2839 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2840 }
2841 break;
49704364 2842 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2843#if SHORTSIZE != SIZE16
fc241834 2844 while (len-- > 0) {
f337b084 2845 short ashort;
fc241834
RGS
2846 fromstr = NEXTFROM;
2847 ashort = SvIV(fromstr);
2848 DO_BO_PACK(ashort, s);
f337b084 2849 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2850 }
49704364
LW
2851 break;
2852#else
2853 /* Fall through! */
a6ec74c1 2854#endif
49704364
LW
2855 case 's':
2856 while (len-- > 0) {
f337b084 2857 I16 ai16;
49704364 2858 fromstr = NEXTFROM;
ef108786
MHM
2859 ai16 = (I16)SvIV(fromstr);
2860 DO_BO_PACK(ai16, 16);
f337b084 2861 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2862 }
2863 break;
2864 case 'I':
49704364 2865 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2866 while (len-- > 0) {
f337b084 2867 unsigned int auint;
a6ec74c1
JH
2868 fromstr = NEXTFROM;
2869 auint = SvUV(fromstr);
1109a392 2870 DO_BO_PACK(auint, i);
f337b084 2871 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2872 }
2873 break;
92d41999
JH
2874 case 'j':
2875 while (len-- > 0) {
f337b084 2876 IV aiv;
92d41999
JH
2877 fromstr = NEXTFROM;
2878 aiv = SvIV(fromstr);
1109a392
MHM
2879#if IVSIZE == INTSIZE
2880 DO_BO_PACK(aiv, i);
2881#elif IVSIZE == LONGSIZE
2882 DO_BO_PACK(aiv, l);
2883#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2884 DO_BO_PACK(aiv, 64);
f337b084
TH
2885#else
2886 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 2887#endif
f337b084 2888 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2889 }
2890 break;
2891 case 'J':
2892 while (len-- > 0) {
f337b084 2893 UV auv;
92d41999
JH
2894 fromstr = NEXTFROM;
2895 auv = SvUV(fromstr);
1109a392
MHM
2896#if UVSIZE == INTSIZE
2897 DO_BO_PACK(auv, i);
2898#elif UVSIZE == LONGSIZE
2899 DO_BO_PACK(auv, l);
2900#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2901 DO_BO_PACK(auv, 64);
f337b084
TH
2902#else
2903 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 2904#endif
f337b084 2905 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2906 }
2907 break;
a6ec74c1
JH
2908 case 'w':
2909 while (len-- > 0) {
f337b084 2910 NV anv;
a6ec74c1 2911 fromstr = NEXTFROM;
15e9f109 2912 anv = SvNV(fromstr);
a6ec74c1 2913
f337b084
TH
2914 if (anv < 0) {
2915 *cur = '\0';
b162af07 2916 SvCUR_set(cat, cur - start);
49704364 2917 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2918 }
a6ec74c1 2919
196b62db
NC
2920 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2921 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2922 any negative IVs will have already been got by the croak()
2923 above. IOK is untrue for fractions, so we test them
2924 against UV_MAX_P1. */
f337b084
TH
2925 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2926 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2927 char *in = buf + sizeof(buf);
196b62db 2928 UV auv = SvUV(fromstr);
a6ec74c1
JH
2929
2930 do {
eb160463 2931 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2932 auv >>= 7;
2933 } while (auv);
2934 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2935 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2936 in, (buf + sizeof(buf)) - in);
2937 } else if (SvPOKp(fromstr))
2938 goto w_string;
a6ec74c1 2939 else if (SvNOKp(fromstr)) {
0258719b 2940 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2941 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2942 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2943 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2944 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2945 Some C compilers are strict about integral constant
2946 expressions so we conservatively divide by a slightly
2947 smaller integer instead of multiplying by the exact
2948 floating-point value.
0258719b
NC
2949 */
2950#ifdef NV_MAX_10_EXP
f337b084 2951 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2952 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2953#else
f337b084 2954 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2955 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2956#endif
a6ec74c1
JH
2957 char *in = buf + sizeof(buf);
2958
8b6e33c7 2959 anv = Perl_floor(anv);
a6ec74c1 2960 do {
8b6e33c7 2961 const NV next = Perl_floor(anv / 128);
a6ec74c1 2962 if (in <= buf) /* this cannot happen ;-) */
49704364 2963 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2964 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2965 anv = next;
2966 } while (anv > 0);
a6ec74c1 2967 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2968 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2969 in, (buf + sizeof(buf)) - in);
2970 } else {
8b6e33c7
AL
2971 const char *from;
2972 char *result, *in;
735b914b
JH
2973 SV *norm;
2974 STRLEN len;
2975 bool done;
2976
f337b084 2977 w_string:
735b914b 2978 /* Copy string and check for compliance */
349d4f2f 2979 from = SvPV_const(fromstr, len);
735b914b 2980 if ((norm = is_an_int(from, len)) == NULL)
49704364 2981 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2982
a02a5408 2983 Newx(result, len, char);
735b914b
JH
2984 in = result + len;
2985 done = FALSE;
f337b084 2986 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2987 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2988 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2989 in, (result + len) - in);
735b914b
JH
2990 Safefree(result);
2991 SvREFCNT_dec(norm); /* free norm */
fc241834 2992 }
a6ec74c1
JH
2993 }
2994 break;
2995 case 'i':
49704364 2996 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2997 while (len-- > 0) {
f337b084 2998 int aint;
a6ec74c1
JH
2999 fromstr = NEXTFROM;
3000 aint = SvIV(fromstr);
1109a392 3001 DO_BO_PACK(aint, i);
f337b084 3002 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3003 }
3004 break;
068bd2e7 3005 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3006 case 'N':
3007 while (len-- > 0) {
f337b084 3008 U32 au32;
a6ec74c1 3009 fromstr = NEXTFROM;
ef108786 3010 au32 = SvUV(fromstr);
ef108786 3011 au32 = PerlSock_htonl(au32);
f337b084 3012 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3013 }
3014 break;
068bd2e7 3015 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3016 case 'V':
3017 while (len-- > 0) {
f337b084 3018 U32 au32;
a6ec74c1 3019 fromstr = NEXTFROM;
ef108786 3020 au32 = SvUV(fromstr);
ef108786 3021 au32 = htovl(au32);
f337b084 3022 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3023 }
3024 break;
49704364 3025 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3026#if LONGSIZE != SIZE32
fc241834 3027 while (len-- > 0) {
f337b084 3028 unsigned long aulong;
fc241834
RGS
3029 fromstr = NEXTFROM;
3030 aulong = SvUV(fromstr);
3031 DO_BO_PACK(aulong, l);
f337b084 3032 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3033 }
49704364
LW
3034 break;
3035#else
3036 /* Fall though! */
a6ec74c1 3037#endif
49704364 3038 case 'L':
fc241834 3039 while (len-- > 0) {
f337b084 3040 U32 au32;
fc241834
RGS
3041 fromstr = NEXTFROM;
3042 au32 = SvUV(fromstr);
3043 DO_BO_PACK(au32, 32);
f337b084 3044 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3045 }
3046 break;
49704364 3047 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3048#if LONGSIZE != SIZE32
fc241834 3049 while (len-- > 0) {
f337b084 3050 long along;
fc241834
RGS
3051 fromstr = NEXTFROM;
3052 along = SvIV(fromstr);
3053 DO_BO_PACK(along, l);
f337b084 3054 PUSH_VAR(utf8, cur, along);
a6ec74c1 3055 }
49704364
LW
3056 break;
3057#else
3058 /* Fall though! */
a6ec74c1 3059#endif
49704364
LW
3060 case 'l':
3061 while (len-- > 0) {
f337b084 3062 I32 ai32;
49704364 3063 fromstr = NEXTFROM;
ef108786
MHM
3064 ai32 = SvIV(fromstr);
3065 DO_BO_PACK(ai32, 32);
f337b084 3066 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3067 }
3068 break;
3069#ifdef HAS_QUAD
3070 case 'Q':
3071 while (len-- > 0) {
f337b084 3072 Uquad_t auquad;
a6ec74c1 3073 fromstr = NEXTFROM;
f337b084 3074 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3075 DO_BO_PACK(auquad, 64);
f337b084 3076 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3077 }
3078 break;
3079 case 'q':
3080 while (len-- > 0) {
f337b084 3081 Quad_t aquad;
a6ec74c1
JH
3082 fromstr = NEXTFROM;
3083 aquad = (Quad_t)SvIV(fromstr);
1109a392 3084 DO_BO_PACK(aquad, 64);
f337b084 3085 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3086 }
3087 break;
f337b084 3088#endif /* HAS_QUAD */
a6ec74c1
JH
3089 case 'P':
3090 len = 1; /* assume SV is correct length */
f337b084 3091 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3092 /* Fall through! */
a6ec74c1
JH
3093 case 'p':
3094 while (len-- > 0) {
83003860 3095 const char *aptr;
f337b084 3096
a6ec74c1 3097 fromstr = NEXTFROM;
28a4f200 3098 SvGETMAGIC(fromstr);
3099 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3100 else {
a6ec74c1
JH
3101 /* XXX better yet, could spirit away the string to
3102 * a safe spot and hang on to it until the result
3103 * of pack() (and all copies of the result) are
3104 * gone.
3105 */
041457d9 3106 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3107 !SvREADONLY(fromstr)))) {
3108 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3109 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3110 }
3111 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3112 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3113 else
2596d9fe 3114 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3115 }
7e38f754 3116 DO_BO_PACK(aptr, pointer);
f337b084 3117 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3118 }
3119 break;
fc241834 3120 case 'u': {
f7fe979e 3121 const char *aptr, *aend;
fc241834 3122 bool from_utf8;
f337b084 3123
a6ec74c1 3124 fromstr = NEXTFROM;
fc241834
RGS
3125 if (len <= 2) len = 45;
3126 else len = len / 3 * 3;
3127 if (len >= 64) {
a2a5de95
NC
3128 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3129 "Field too wide in 'u' format in pack");
fc241834
RGS
3130 len = 63;
3131 }
83003860 3132 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3133 from_utf8 = DO_UTF8(fromstr);
3134 if (from_utf8) {
3135 aend = aptr + fromlen;
3f63b0e5 3136 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3137 } else aend = NULL; /* Unused, but keep compilers happy */
3138 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3139 while (fromlen > 0) {
fc241834 3140 U8 *end;
a6ec74c1 3141 I32 todo;
fc241834 3142 U8 hunk[1+63/3*4+1];
a6ec74c1 3143
eb160463 3144 if ((I32)fromlen > len)
a6ec74c1
JH
3145 todo = len;
3146 else
3147 todo = fromlen;
fc241834
RGS
3148 if (from_utf8) {
3149 char buffer[64];
3150 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3151 'u' | TYPE_IS_PACK)) {
3152 *cur = '\0';
b162af07 3153 SvCUR_set(cat, cur - start);
5637ef5b
NC
3154 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3155 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3156 aptr, aend, buffer, (long) todo);
fc241834
RGS
3157 }
3158 end = doencodes(hunk, buffer, todo);
3159 } else {
3160 end = doencodes(hunk, aptr, todo);
3161 aptr += todo;
3162 }
3163 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3164 fromlen -= todo;
3165 }
a6ec74c1
JH
3166 break;
3167 }
f337b084
TH
3168 }
3169 *cur = '\0';
b162af07 3170 SvCUR_set(cat, cur - start);
f337b084 3171 no_change:
49704364 3172 *symptr = lookahead;
a6ec74c1 3173 }
49704364 3174 return beglist;
18529408
IZ
3175}
3176#undef NEXTFROM
3177
3178
3179PP(pp_pack)
3180{
97aff369 3181 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3182 SV *cat = TARG;
18529408 3183 STRLEN fromlen;
349d4f2f 3184 SV *pat_sv = *++MARK;
eb578fdb
KW
3185 const char *pat = SvPV_const(pat_sv, fromlen);
3186 const char *patend = pat + fromlen;
18529408
IZ
3187
3188 MARK++;
76f68e9b 3189 sv_setpvs(cat, "");
f337b084 3190 SvUTF8_off(cat);
18529408 3191
7accc089 3192 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3193
a6ec74c1
JH
3194 SvSETMAGIC(cat);
3195 SP = ORIGMARK;
3196 PUSHs(cat);
3197 RETURN;
3198}
a6ec74c1 3199
73cb7263
NC
3200/*
3201 * Local variables:
3202 * c-indentation-style: bsd
3203 * c-basic-offset: 4
14d04a33 3204 * indent-tabs-mode: nil
73cb7263
NC
3205 * End:
3206 *
14d04a33 3207 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3208 */