This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
In pp_pack.c, refactor DO_BO_(UN)?PACK_PTR to use my_letohn etc
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132/* Only to be used inside a loop (see the break) */
133#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (utf8) { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 } else { \
137 Copy(s, OFF16(p), SIZE16, char); \
138 (s) += SIZE16; \
139 } \
140} STMT_END
141
142/* Only to be used inside a loop (see the break) */
143#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (utf8) { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 } else { \
147 Copy(s, OFF32(p), SIZE32, char); \
148 (s) += SIZE32; \
149 } \
150} STMT_END
151
152#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
154
155/* Only to be used inside a loop (see the break) */
275663fa 156#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
157STMT_START { \
158 if (utf8) { \
f337b084 159 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 160 (char *) (buf), len, datumtype)) break; \
08ca2aa3 161 } else { \
275663fa
TC
162 Copy(s, (char *) (buf), len, char); \
163 s += len; \
08ca2aa3 164 } \
08ca2aa3
TH
165} STMT_END
166
275663fa
TC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
169
f337b084 170#define PUSH_VAR(utf8, aptr, var) \
230e1fce 171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 172
49704364
LW
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
a6ec74c1 194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
215#if 'I' == 73 && 'J' == 74
216/* On an ASCII/ISO kind of system */
217#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
218#else
219/*
220 Some other sort of character set - use memchr() so we don't match
221 the null byte.
222 */
223#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
224#endif
225
66c611c5 226/* type modifiers */
62f95557 227#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
228#define TYPE_IS_BIG_ENDIAN 0x200
229#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 230#define TYPE_IS_PACK 0x800
1109a392 231#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 232#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
233#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
234
7212898e
NC
235# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
237
238# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
239
3cc90e2e
NC
240#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
241
7212898e 242# define DO_BO_UNPACK(var, type) \
1109a392 243 STMT_START { \
3cc90e2e
NC
244 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
245 var = my_letoh ## type (var); \
1109a392
MHM
246 } \
247 } STMT_END
248
7212898e 249# define DO_BO_PACK(var, type) \
1109a392 250 STMT_START { \
3cc90e2e
NC
251 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
252 var = my_htole ## type (var); \
1109a392
MHM
253 } \
254 } STMT_END
255
07409e01 256# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 257 STMT_START { \
3cc90e2e 258 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
86c6fce0 259 my_letohn(&var, sizeof(var)); \
1109a392
MHM
260 } \
261 } STMT_END
262
07409e01 263# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 264 STMT_START { \
3cc90e2e 265 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
86c6fce0 266 my_htolen(&var, sizeof(var)); \
1109a392
MHM
267 } \
268 } STMT_END
269
3cc90e2e
NC
270# define DO_BO_UNPACK_N(var, type) \
271 STMT_START { \
272 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
df609c5f 273 my_letohn(&var, sizeof(var)); \
3cc90e2e
NC
274 } \
275 } STMT_END
276
277# define DO_BO_PACK_N(var, type) \
278 STMT_START { \
279 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
df609c5f 280 my_htolen(&var, sizeof(var)); \
3cc90e2e
NC
281 } \
282 } STMT_END
283
284# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
285
286# define DO_BO_UNPACK(var, type) \
287 STMT_START { \
288 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
289 var = my_betoh ## type (var); \
290 } \
291 } STMT_END
292
293# define DO_BO_PACK(var, type) \
294 STMT_START { \
295 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
296 var = my_htobe ## type (var); \
297 } \
298 } STMT_END
299
300# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
301 STMT_START { \
302 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
86c6fce0 303 my_betohn(&var, sizeof(var)); \
3cc90e2e
NC
304 } \
305 } STMT_END
306
307# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
308 STMT_START { \
309 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
86c6fce0 310 my_htoben(&var, sizeof(var)); \
3cc90e2e
NC
311 } \
312 } STMT_END
313
314# define DO_BO_UNPACK_N(var, type) \
315 STMT_START { \
316 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
df609c5f 317 my_betohn(&var, sizeof(var)); \
3cc90e2e
NC
318 } \
319 } STMT_END
320
321# define DO_BO_PACK_N(var, type) \
322 STMT_START { \
323 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
df609c5f 324 my_htoben(&var, sizeof(var)); \
3cc90e2e
NC
325 } \
326 } STMT_END
327
328#else
329# define DO_BO_UNPACK(var, type) BO_CANT_DOIT(unpack, type)
330# define DO_BO_PACK(var, type) BO_CANT_DOIT(pack, type)
331# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
332 BO_CANT_DOIT(unpack, type)
333# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
334 BO_CANT_DOIT(pack, type)
335# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
336# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
337#endif
338
7212898e 339# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
340 STMT_START { \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: \
343 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
344 "platform", #action, #type); \
345 break; \
346 case TYPE_IS_LITTLE_ENDIAN: \
347 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
348 "platform", #action, #type); \
349 break; \
350 default: \
351 break; \
352 } \
353 } STMT_END
354
7212898e 355# if PTRSIZE == INTSIZE
07409e01
NC
356# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
357# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 358# elif PTRSIZE == LONGSIZE
fa58a56f 359# if LONGSIZE < IVSIZE && IVSIZE == 8
fa58a56f
S
360# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
361# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
362# else
fa58a56f
S
363# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
364# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
365# endif
2b00a750 366# elif PTRSIZE == IVSIZE
2b00a750
JD
367# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
368# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e 369# else
45f723e1
NC
370# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
371# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 372# endif
1109a392 373
78d46eaa 374#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 375#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
376#define PACK_SIZE_MASK 0x3F
377
298bc19c 378#include "packsizetables.c"
78d46eaa 379
08ca2aa3 380STATIC U8
f7fe979e 381uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 382{
08ca2aa3 383 STRLEN retlen;
0bcc34c2 384 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 385 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 386 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
387 warnings), but these two mean we make no progress in the string and
388 might enter an infinite loop */
389 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
390 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
391 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 392 if (val >= 0x100) {
a2a5de95
NC
393 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
394 "Character in '%c' format wrapped in unpack",
395 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
396 val &= 0xff;
397 }
398 *s += retlen;
fe2774ed 399 return (U8)val;
08ca2aa3
TH
400}
401
f337b084
TH
402#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
403 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
404 *(U8 *)(s)++)
405
406STATIC bool
f7fe979e 407uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
408{
409 UV val;
410 STRLEN retlen;
f7fe979e 411 const char *from = *s;
08ca2aa3 412 int bad = 0;
f7fe979e 413 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
414 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
415 for (;buf_len > 0; buf_len--) {
416 if (from >= end) return FALSE;
f337b084 417 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
418 if (retlen == (STRLEN) -1 || retlen == 0) {
419 from += UTF8SKIP(from);
420 bad |= 1;
421 } else from += retlen;
422 if (val >= 0x100) {
423 bad |= 2;
424 val &= 0xff;
425 }
fe2774ed 426 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
427 }
428 /* We have enough characters for the buffer. Did we have problems ? */
429 if (bad) {
430 if (bad & 1) {
431 /* Rewalk the string fragment while warning */
f7fe979e 432 const char *ptr;
9e27e96a 433 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
434 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
435 if (ptr >= end) break;
f337b084 436 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
437 }
438 if (from > end) from = end;
439 }
a2a5de95
NC
440 if ((bad & 2))
441 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 442 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
443 "Character(s) in '%c' format wrapped in %s",
444 (int) TYPE_NO_MODIFIERS(datumtype),
445 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
446 }
447 *s = from;
448 return TRUE;
449}
450
451STATIC bool
f7fe979e 452next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 453{
97aff369 454 dVAR;
08ca2aa3 455 STRLEN retlen;
0bcc34c2 456 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
457 if (val >= 0x100 || !ISUUCHAR(val) ||
458 retlen == (STRLEN) -1 || retlen == 0) {
459 *out = 0;
460 return FALSE;
461 }
462 *out = PL_uudmap[val] & 077;
f337b084 463 *s += retlen;
08ca2aa3
TH
464 return TRUE;
465}
78d46eaa 466
64844641 467STATIC char *
14333449 468S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 469 const U8 * const end = start + len;
64844641 470
7918f24d
NC
471 PERL_ARGS_ASSERT_BYTES_TO_UNI;
472
f337b084 473 while (start < end) {
48fa4626
ST
474 const UV uv = NATIVE_TO_ASCII(*start);
475 if (UNI_IS_INVARIANT(uv))
476 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
477 else {
478 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
479 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
480 }
481 start++;
482 }
64844641 483 return dest;
f337b084
TH
484}
485
230e1fce
NC
486#define PUSH_BYTES(utf8, cur, buf, len) \
487STMT_START { \
64844641
AL
488 if (utf8) \
489 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
490 else { \
491 Copy(buf, cur, len, char); \
492 (cur) += (len); \
493 } \
f337b084
TH
494} STMT_END
495
496#define GROWING(utf8, cat, start, cur, in_len) \
497STMT_START { \
498 STRLEN glen = (in_len); \
3473cf63 499 if (utf8) glen *= UTF8_EXPAND; \
f337b084 500 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 501 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
502 (cur) = (start) + SvCUR(cat); \
503 } \
504} STMT_END
505
506#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
507STMT_START { \
f7fe979e 508 const STRLEN glen = (in_len); \
f337b084 509 STRLEN gl = glen; \
3473cf63 510 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
511 if ((cur) + gl >= (start) + SvLEN(cat)) { \
512 *cur = '\0'; \
b162af07 513 SvCUR_set((cat), (cur) - (start)); \
0bd48802 514 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
515 (cur) = (start) + SvCUR(cat); \
516 } \
517 PUSH_BYTES(utf8, cur, buf, glen); \
518} STMT_END
519
520#define PUSH_BYTE(utf8, s, byte) \
521STMT_START { \
522 if (utf8) { \
f7fe979e 523 const U8 au8 = (byte); \
64844641 524 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
525 } else *(U8 *)(s)++ = (byte); \
526} STMT_END
527
528/* Only to be used inside a loop (see the break) */
529#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
530STMT_START { \
531 STRLEN retlen; \
532 if (str >= end) break; \
533 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
534 if (retlen == (STRLEN) -1 || retlen == 0) { \
535 *cur = '\0'; \
536 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
537 } \
538 str += retlen; \
539} STMT_END
540
f7fe979e
AL
541static const char *_action( const tempsym_t* symptr )
542{
10edeb5d 543 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
544}
545
206947d2 546/* Returns the sizeof() struct described by pat */
028d1f6d 547STATIC I32
f337b084 548S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 549{
f337b084 550 I32 total = 0;
206947d2 551
7918f24d
NC
552 PERL_ARGS_ASSERT_MEASURE_STRUCT;
553
49704364 554 while (next_symbol(symptr)) {
f337b084 555 I32 len;
f7fe979e 556 int size;
f337b084
TH
557
558 switch (symptr->howlen) {
fc241834 559 case e_star:
49704364 560 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 561 _action( symptr ) );
49704364 562 break;
f337b084
TH
563 default:
564 /* e_no_len and e_number */
565 len = symptr->length;
566 break;
49704364
LW
567 }
568
a7a3cfaa 569 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 570 if (!size) {
f7fe979e 571 int star;
80a13697
NC
572 /* endianness doesn't influence the size of a type */
573 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
574 default:
575 Perl_croak(aTHX_ "Invalid type '%c' in %s",
576 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 577 _action( symptr ) );
28be1210
TH
578 case '.' | TYPE_IS_SHRIEKING:
579 case '@' | TYPE_IS_SHRIEKING:
80a13697 580 case '@':
28be1210 581 case '.':
80a13697
NC
582 case '/':
583 case 'U': /* XXXX Is it correct? */
584 case 'w':
585 case 'u':
586 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 587 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 588 _action( symptr ) );
80a13697
NC
589 case '%':
590 size = 0;
591 break;
592 case '(':
fc241834
RGS
593 {
594 tempsym_t savsym = *symptr;
595 symptr->patptr = savsym.grpbeg;
596 symptr->patend = savsym.grpend;
597 /* XXXX Theoretically, we need to measure many times at
598 different positions, since the subexpression may contain
599 alignment commands, but be not of aligned length.
600 Need to detect this and croak(). */
601 size = measure_struct(symptr);
602 *symptr = savsym;
603 break;
604 }
80a13697
NC
605 case 'X' | TYPE_IS_SHRIEKING:
606 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
607 */
608 if (!len) /* Avoid division by 0 */
609 len = 1;
610 len = total % len; /* Assumed: the start is aligned. */
611 /* FALL THROUGH */
612 case 'X':
613 size = -1;
614 if (total < len)
f7fe979e 615 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
616 break;
617 case 'x' | TYPE_IS_SHRIEKING:
618 if (!len) /* Avoid division by 0 */
619 len = 1;
620 star = total % len; /* Assumed: the start is aligned. */
621 if (star) /* Other portable ways? */
622 len = len - star;
623 else
624 len = 0;
625 /* FALL THROUGH */
626 case 'x':
627 case 'A':
628 case 'Z':
629 case 'a':
80a13697
NC
630 size = 1;
631 break;
632 case 'B':
633 case 'b':
634 len = (len + 7)/8;
635 size = 1;
636 break;
637 case 'H':
638 case 'h':
639 len = (len + 1)/2;
640 size = 1;
641 break;
78d46eaa 642
80a13697
NC
643 case 'P':
644 len = 1;
645 size = sizeof(char*);
78d46eaa
NC
646 break;
647 }
206947d2
IZ
648 }
649 total += len * size;
650 }
651 return total;
652}
653
49704364
LW
654
655/* locate matching closing parenthesis or bracket
656 * returns char pointer to char after match, or NULL
657 */
f7fe979e 658STATIC const char *
5aaab254 659S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 660{
7918f24d
NC
661 PERL_ARGS_ASSERT_GROUP_END;
662
49704364 663 while (patptr < patend) {
f7fe979e 664 const char c = *patptr++;
49704364
LW
665
666 if (isSPACE(c))
667 continue;
668 else if (c == ender)
669 return patptr-1;
670 else if (c == '#') {
671 while (patptr < patend && *patptr != '\n')
672 patptr++;
673 continue;
674 } else if (c == '(')
675 patptr = group_end(patptr, patend, ')') + 1;
676 else if (c == '[')
677 patptr = group_end(patptr, patend, ']') + 1;
18529408 678 }
49704364
LW
679 Perl_croak(aTHX_ "No group ending character '%c' found in template",
680 ender);
681 return 0;
18529408
IZ
682}
683
49704364
LW
684
685/* Convert unsigned decimal number to binary.
686 * Expects a pointer to the first digit and address of length variable
687 * Advances char pointer to 1st non-digit char and returns number
fc241834 688 */
f7fe979e 689STATIC const char *
5aaab254 690S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
691{
692 I32 len = *patptr++ - '0';
7918f24d
NC
693
694 PERL_ARGS_ASSERT_GET_NUM;
695
49704364
LW
696 while (isDIGIT(*patptr)) {
697 if (len >= 0x7FFFFFFF/10)
698 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
699 len = (len * 10) + (*patptr++ - '0');
700 }
701 *lenptr = len;
702 return patptr;
703}
704
705/* The marvellous template parsing routine: Using state stored in *symptr,
706 * locates next template code and count
707 */
708STATIC bool
f337b084 709S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 710{
f7fe979e 711 const char* patptr = symptr->patptr;
0bcc34c2 712 const char* const patend = symptr->patend;
49704364 713
7918f24d
NC
714 PERL_ARGS_ASSERT_NEXT_SYMBOL;
715
49704364
LW
716 symptr->flags &= ~FLAG_SLASH;
717
718 while (patptr < patend) {
719 if (isSPACE(*patptr))
720 patptr++;
721 else if (*patptr == '#') {
722 patptr++;
723 while (patptr < patend && *patptr != '\n')
724 patptr++;
725 if (patptr < patend)
726 patptr++;
727 } else {
fc241834 728 /* We should have found a template code */
49704364 729 I32 code = *patptr++ & 0xFF;
66c611c5 730 U32 inherited_modifiers = 0;
49704364
LW
731
732 if (code == ','){ /* grandfather in commas but with a warning */
733 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
734 symptr->flags |= FLAG_COMMA;
735 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 736 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
737 }
738 continue;
739 }
fc241834 740
49704364 741 /* for '(', skip to ')' */
fc241834 742 if (code == '(') {
49704364
LW
743 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
744 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 745 _action( symptr ) );
49704364
LW
746 symptr->grpbeg = patptr;
747 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
748 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
749 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 750 _action( symptr ) );
49704364
LW
751 }
752
66c611c5
MHM
753 /* look for group modifiers to inherit */
754 if (TYPE_ENDIANNESS(symptr->flags)) {
755 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
756 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
757 }
758
1109a392
MHM
759 /* look for modifiers */
760 while (patptr < patend) {
b7787f18
AL
761 const char *allowed;
762 I32 modifier;
1109a392
MHM
763 switch (*patptr) {
764 case '!':
765 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 766 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
767 break;
768 case '>':
769 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 770 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
771 break;
772 case '<':
773 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 774 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
775 break;
776 default:
b7787f18
AL
777 allowed = "";
778 modifier = 0;
1109a392
MHM
779 break;
780 }
66c611c5 781
1109a392
MHM
782 if (modifier == 0)
783 break;
66c611c5 784
1109a392
MHM
785 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
786 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 787 allowed, _action( symptr ) );
66c611c5
MHM
788
789 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 790 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 791 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
792 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
793 TYPE_ENDIANNESS_MASK)
794 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 795 *patptr, _action( symptr ) );
66c611c5 796
a2a5de95
NC
797 if ((code & modifier)) {
798 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
799 "Duplicate modifier '%c' after '%c' in %s",
800 *patptr, (int) TYPE_NO_MODIFIERS(code),
801 _action( symptr ) );
1109a392 802 }
66c611c5 803
1109a392
MHM
804 code |= modifier;
805 patptr++;
49704364
LW
806 }
807
66c611c5
MHM
808 /* inherit modifiers */
809 code |= inherited_modifiers;
810
fc241834 811 /* look for count and/or / */
49704364
LW
812 if (patptr < patend) {
813 if (isDIGIT(*patptr)) {
814 patptr = get_num( patptr, &symptr->length );
815 symptr->howlen = e_number;
816
817 } else if (*patptr == '*') {
818 patptr++;
819 symptr->howlen = e_star;
820
821 } else if (*patptr == '[') {
f7fe979e 822 const char* lenptr = ++patptr;
49704364
LW
823 symptr->howlen = e_number;
824 patptr = group_end( patptr, patend, ']' ) + 1;
825 /* what kind of [] is it? */
826 if (isDIGIT(*lenptr)) {
827 lenptr = get_num( lenptr, &symptr->length );
828 if( *lenptr != ']' )
829 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 830 _action( symptr ) );
49704364
LW
831 } else {
832 tempsym_t savsym = *symptr;
833 symptr->patend = patptr-1;
834 symptr->patptr = lenptr;
835 savsym.length = measure_struct(symptr);
836 *symptr = savsym;
837 }
838 } else {
839 symptr->howlen = e_no_len;
840 symptr->length = 1;
841 }
842
843 /* try to find / */
844 while (patptr < patend) {
845 if (isSPACE(*patptr))
846 patptr++;
847 else if (*patptr == '#') {
848 patptr++;
849 while (patptr < patend && *patptr != '\n')
850 patptr++;
851 if (patptr < patend)
852 patptr++;
853 } else {
66c611c5 854 if (*patptr == '/') {
49704364
LW
855 symptr->flags |= FLAG_SLASH;
856 patptr++;
66c611c5
MHM
857 if (patptr < patend &&
858 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 859 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 860 _action( symptr ) );
49704364
LW
861 }
862 break;
863 }
18529408 864 }
49704364
LW
865 } else {
866 /* at end - no count, no / */
867 symptr->howlen = e_no_len;
868 symptr->length = 1;
869 }
870
871 symptr->code = code;
fc241834 872 symptr->patptr = patptr;
49704364 873 return TRUE;
18529408 874 }
49704364 875 }
fc241834 876 symptr->patptr = patptr;
49704364 877 return FALSE;
18529408
IZ
878}
879
18529408 880/*
fc241834 881 There is no way to cleanly handle the case where we should process the
08ca2aa3 882 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
883 (e.g. estimates like strend-s as an upper bound for the number of
884 characters left wouldn't work). So if we foresee the need of this
885 (pattern starts with U or contains U0), we want to work on the encoded
886 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
887 themselves if they need to do a lot of unpacks like this on it
888*/
fc241834 889STATIC bool
08ca2aa3
TH
890need_utf8(const char *pat, const char *patend)
891{
892 bool first = TRUE;
7918f24d
NC
893
894 PERL_ARGS_ASSERT_NEED_UTF8;
895
08ca2aa3
TH
896 while (pat < patend) {
897 if (pat[0] == '#') {
898 pat++;
f7fe979e 899 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
900 if (!pat) return FALSE;
901 } else if (pat[0] == 'U') {
902 if (first || pat[1] == '0') return TRUE;
903 } else first = FALSE;
904 pat++;
905 }
906 return FALSE;
907}
908
909STATIC char
910first_symbol(const char *pat, const char *patend) {
7918f24d
NC
911 PERL_ARGS_ASSERT_FIRST_SYMBOL;
912
08ca2aa3
TH
913 while (pat < patend) {
914 if (pat[0] != '#') return pat[0];
915 pat++;
f7fe979e 916 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
917 if (!pat) return 0;
918 pat++;
919 }
920 return 0;
921}
922
923/*
7accc089
JH
924=for apidoc unpackstring
925
21ebfc7a
DM
926The engine implementing the unpack() Perl function.
927
928Using the template pat..patend, this function unpacks the string
929s..strend into a number of mortal SVs, which it pushes onto the perl
930argument (@_) stack (so you will need to issue a C<PUTBACK> before and
931C<SPAGAIN> after the call to this function). It returns the number of
932pushed elements.
933
934The strend and patend pointers should point to the byte following the last
935character of each string.
936
937Although this function returns its values on the perl argument stack, it
938doesn't take any parameters from that stack (and thus in particular
939there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
940example).
7accc089
JH
941
942=cut */
943
944I32
f7fe979e 945Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 946{
f7fe979e 947 tempsym_t sym;
08ca2aa3 948
7918f24d
NC
949 PERL_ARGS_ASSERT_UNPACKSTRING;
950
f337b084 951 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
952 else if (need_utf8(pat, patend)) {
953 /* We probably should try to avoid this in case a scalar context call
954 wouldn't get to the "U0" */
955 STRLEN len = strend - s;
230e1fce 956 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
957 SAVEFREEPV(s);
958 strend = s + len;
f337b084 959 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
960 }
961
f337b084
TH
962 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
963 flags |= FLAG_PARSE_UTF8;
08ca2aa3 964
f7fe979e 965 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
966
967 return unpack_rec(&sym, s, s, strend, NULL );
968}
969
4136a0f7 970STATIC I32
f7fe979e 971S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 972{
27da23d5 973 dVAR; dSP;
3297d27d 974 SV *sv = NULL;
f7fe979e 975 const I32 start_sp_offset = SP - PL_stack_base;
49704364 976 howlen_t howlen;
a6ec74c1 977 I32 checksum = 0;
92d41999 978 UV cuv = 0;
a6ec74c1 979 NV cdouble = 0.0;
f337b084 980 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 981 bool beyond = FALSE;
21c16052 982 bool explicit_length;
9e27e96a 983 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 984 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
985
986 PERL_ARGS_ASSERT_UNPACK_REC;
987
28be1210 988 symptr->strbeg = s - strbeg;
49704364 989
49704364 990 while (next_symbol(symptr)) {
a7a3cfaa 991 packprops_t props;
9e27e96a 992 I32 len;
f337b084 993 I32 datumtype = symptr->code;
206947d2 994 /* do first one only unless in list context
08ca2aa3 995 / is implemented by unpacking the count, then popping it from the
206947d2 996 stack, so must check that we're not in the middle of a / */
49704364 997 if ( unpack_only_one
206947d2 998 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 999 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1000 break;
49704364 1001
f337b084 1002 switch (howlen = symptr->howlen) {
fc241834
RGS
1003 case e_star:
1004 len = strend - strbeg; /* long enough */
49704364 1005 break;
f337b084
TH
1006 default:
1007 /* e_no_len and e_number */
1008 len = symptr->length;
1009 break;
49704364 1010 }
18529408 1011
21c16052 1012 explicit_length = TRUE;
a6ec74c1 1013 redo_switch:
49704364 1014 beyond = s >= strend;
a7a3cfaa
TH
1015
1016 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1017 if (props) {
1018 /* props nonzero means we can process this letter. */
9e27e96a
AL
1019 const long size = props & PACK_SIZE_MASK;
1020 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1021 if (len > howmany)
1022 len = howmany;
1023
1024 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1025 if (len && unpack_only_one) len = 1;
1026 EXTEND(SP, len);
1027 EXTEND_MORTAL(len);
78d46eaa
NC
1028 }
1029 }
a7a3cfaa 1030
1109a392 1031 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1032 default:
1109a392 1033 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1034
a6ec74c1 1035 case '%':
49704364 1036 if (howlen == e_no_len)
18529408 1037 len = 16; /* len is not specified */
a6ec74c1 1038 checksum = len;
92d41999 1039 cuv = 0;
a6ec74c1 1040 cdouble = 0;
18529408 1041 continue;
a6ec74c1 1042 break;
18529408
IZ
1043 case '(':
1044 {
49704364 1045 tempsym_t savsym = *symptr;
9e27e96a 1046 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1047 symptr->flags |= group_modifiers;
49704364 1048 symptr->patend = savsym.grpend;
28be1210 1049 symptr->previous = &savsym;
49704364 1050 symptr->level++;
18529408 1051 PUTBACK;
c6f750d1 1052 if (len && unpack_only_one) len = 1;
18529408 1053 while (len--) {
49704364 1054 symptr->patptr = savsym.grpbeg;
f337b084
TH
1055 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1056 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1057 unpack_rec(symptr, s, strbeg, strend, &s);
1058 if (s == strend && savsym.howlen == e_star)
49704364 1059 break; /* No way to continue */
18529408
IZ
1060 }
1061 SPAGAIN;
28be1210 1062 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1063 *symptr = savsym;
18529408
IZ
1064 break;
1065 }
28be1210 1066 case '.' | TYPE_IS_SHRIEKING:
28be1210 1067 case '.': {
9e27e96a 1068 const char *from;
28be1210 1069 SV *sv;
9e27e96a 1070 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
1071 if (howlen == e_star) from = strbeg;
1072 else if (len <= 0) from = s;
1073 else {
1074 tempsym_t *group = symptr;
1075
1076 while (--len && group) group = group->previous;
1077 from = group ? strbeg + group->strbeg : strbeg;
1078 }
1079 sv = from <= s ?
00646304
CB
1080 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1081 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1082 mXPUSHs(sv);
28be1210
TH
1083 break;
1084 }
28be1210 1085 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 1086 case '@':
28be1210 1087 s = strbeg + symptr->strbeg;
28be1210 1088 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 1089 {
08ca2aa3
TH
1090 while (len > 0) {
1091 if (s >= strend)
1092 Perl_croak(aTHX_ "'@' outside of string in unpack");
1093 s += UTF8SKIP(s);
1094 len--;
1095 }
1096 if (s > strend)
1097 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1098 } else {
28be1210 1099 if (strend-s < len)
fc241834 1100 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1101 s += len;
08ca2aa3 1102 }
a6ec74c1 1103 break;
62f95557
IZ
1104 case 'X' | TYPE_IS_SHRIEKING:
1105 if (!len) /* Avoid division by 0 */
1106 len = 1;
08ca2aa3 1107 if (utf8) {
f7fe979e 1108 const char *hop, *last;
f337b084
TH
1109 I32 l = len;
1110 hop = last = strbeg;
1111 while (hop < s) {
1112 hop += UTF8SKIP(hop);
1113 if (--l == 0) {
08ca2aa3 1114 last = hop;
f337b084
TH
1115 l = len;
1116 }
fc241834 1117 }
f337b084
TH
1118 if (last > s)
1119 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1120 s = last;
1121 break;
f337b084
TH
1122 }
1123 len = (s - strbeg) % len;
62f95557 1124 /* FALL THROUGH */
a6ec74c1 1125 case 'X':
08ca2aa3
TH
1126 if (utf8) {
1127 while (len > 0) {
1128 if (s <= strbeg)
1129 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1130 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1131 if (s <= strbeg)
1132 Perl_croak(aTHX_ "'X' outside of string in unpack");
1133 }
1134 len--;
1135 }
1136 } else {
fc241834
RGS
1137 if (len > s - strbeg)
1138 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1139 s -= len;
08ca2aa3 1140 }
a6ec74c1 1141 break;
9e27e96a
AL
1142 case 'x' | TYPE_IS_SHRIEKING: {
1143 I32 ai32;
62f95557
IZ
1144 if (!len) /* Avoid division by 0 */
1145 len = 1;
230e1fce
NC
1146 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1147 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1148 if (ai32 == 0) break;
1149 len -= ai32;
9e27e96a 1150 }
62f95557 1151 /* FALL THROUGH */
a6ec74c1 1152 case 'x':
08ca2aa3
TH
1153 if (utf8) {
1154 while (len>0) {
1155 if (s >= strend)
1156 Perl_croak(aTHX_ "'x' outside of string in unpack");
1157 s += UTF8SKIP(s);
1158 len--;
1159 }
1160 } else {
fc241834
RGS
1161 if (len > strend - s)
1162 Perl_croak(aTHX_ "'x' outside of string in unpack");
1163 s += len;
f337b084 1164 }
a6ec74c1
JH
1165 break;
1166 case '/':
49704364
LW
1167 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1168 break;
a6ec74c1
JH
1169 case 'A':
1170 case 'Z':
1171 case 'a':
08ca2aa3
TH
1172 if (checksum) {
1173 /* Preliminary length estimate is assumed done in 'W' */
1174 if (len > strend - s) len = strend - s;
1175 goto W_checksum;
1176 }
1177 if (utf8) {
1178 I32 l;
f7fe979e 1179 const char *hop;
08ca2aa3
TH
1180 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1181 if (hop >= strend) {
1182 if (hop > strend)
1183 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1184 break;
fc241834 1185 }
a6ec74c1 1186 }
08ca2aa3
TH
1187 if (hop > strend)
1188 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1189 len = hop - s;
1190 } else if (len > strend - s)
1191 len = strend - s;
1192
1193 if (datumtype == 'Z') {
1194 /* 'Z' strips stuff after first null */
f7fe979e 1195 const char *ptr, *end;
f337b084
TH
1196 end = s + len;
1197 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1198 sv = newSVpvn(s, ptr-s);
1199 if (howlen == e_star) /* exact for 'Z*' */
1200 len = ptr-s + (ptr != strend ? 1 : 0);
1201 } else if (datumtype == 'A') {
1202 /* 'A' strips both nulls and spaces */
f7fe979e 1203 const char *ptr;
18bdf90a
TH
1204 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1205 for (ptr = s+len-1; ptr >= s; ptr--)
1206 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1207 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1208 if (ptr >= s) ptr += UTF8SKIP(ptr);
1209 else ptr++;
28be1210 1210 if (ptr > s+len)
18bdf90a
TH
1211 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1212 } else {
1213 for (ptr = s+len-1; ptr >= s; ptr--)
1214 if (*ptr != 0 && !isSPACE(*ptr)) break;
1215 ptr++;
1216 }
08ca2aa3
TH
1217 sv = newSVpvn(s, ptr-s);
1218 } else sv = newSVpvn(s, len);
1219
1220 if (utf8) {
1221 SvUTF8_on(sv);
1222 /* Undo any upgrade done due to need_utf8() */
f337b084 1223 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1224 sv_utf8_downgrade(sv, 0);
a6ec74c1 1225 }
6e449a3a 1226 mXPUSHs(sv);
08ca2aa3 1227 s += len;
a6ec74c1
JH
1228 break;
1229 case 'B':
08ca2aa3
TH
1230 case 'b': {
1231 char *str;
49704364 1232 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1233 len = (strend - s) * 8;
1234 if (checksum) {
f337b084 1235 if (utf8)
08ca2aa3 1236 while (len >= 8 && s < strend) {
f337b084 1237 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1238 len -= 8;
1239 }
f337b084 1240 else
fc241834 1241 while (len >= 8) {
08ca2aa3 1242 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1243 len -= 8;
1244 }
08ca2aa3
TH
1245 if (len && s < strend) {
1246 U8 bits;
f337b084
TH
1247 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1248 if (datumtype == 'b')
a6ec74c1 1249 while (len-- > 0) {
92d41999 1250 if (bits & 1) cuv++;
a6ec74c1
JH
1251 bits >>= 1;
1252 }
f337b084 1253 else
a6ec74c1 1254 while (len-- > 0) {
08ca2aa3 1255 if (bits & 0x80) cuv++;
a6ec74c1
JH
1256 bits <<= 1;
1257 }
fc241834 1258 }
a6ec74c1
JH
1259 break;
1260 }
08ca2aa3 1261
561b68a9 1262 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1263 SvPOK_on(sv);
1264 str = SvPVX(sv);
1265 if (datumtype == 'b') {
f337b084 1266 U8 bits = 0;
f7fe979e 1267 const I32 ai32 = len;
08ca2aa3
TH
1268 for (len = 0; len < ai32; len++) {
1269 if (len & 7) bits >>= 1;
1270 else if (utf8) {
1271 if (s >= strend) break;
f337b084 1272 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1273 } else bits = *(U8 *) s++;
1274 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1275 }
08ca2aa3 1276 } else {
f337b084 1277 U8 bits = 0;
f7fe979e 1278 const I32 ai32 = len;
08ca2aa3
TH
1279 for (len = 0; len < ai32; len++) {
1280 if (len & 7) bits <<= 1;
1281 else if (utf8) {
1282 if (s >= strend) break;
f337b084 1283 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1284 } else bits = *(U8 *) s++;
1285 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1286 }
1287 }
1288 *str = '\0';
aa07b2f6 1289 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1290 XPUSHs(sv);
a6ec74c1 1291 break;
08ca2aa3 1292 }
a6ec74c1 1293 case 'H':
08ca2aa3 1294 case 'h': {
3297d27d 1295 char *str = NULL;
fc241834 1296 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1297 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1298 len = (strend - s) * 2;
858fe5e1
TC
1299 if (!checksum) {
1300 sv = sv_2mortal(newSV(len ? len : 1));
1301 SvPOK_on(sv);
1302 str = SvPVX(sv);
1303 }
a6ec74c1 1304 if (datumtype == 'h') {
f337b084 1305 U8 bits = 0;
9e27e96a 1306 I32 ai32 = len;
fc241834
RGS
1307 for (len = 0; len < ai32; len++) {
1308 if (len & 1) bits >>= 4;
1309 else if (utf8) {
1310 if (s >= strend) break;
f337b084 1311 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1312 } else bits = * (U8 *) s++;
858fe5e1
TC
1313 if (!checksum)
1314 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1315 }
08ca2aa3 1316 } else {
f337b084 1317 U8 bits = 0;
f7fe979e 1318 const I32 ai32 = len;
08ca2aa3
TH
1319 for (len = 0; len < ai32; len++) {
1320 if (len & 1) bits <<= 4;
1321 else if (utf8) {
1322 if (s >= strend) break;
f337b084 1323 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1324 } else bits = *(U8 *) s++;
858fe5e1
TC
1325 if (!checksum)
1326 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1327 }
1328 }
858fe5e1
TC
1329 if (!checksum) {
1330 *str = '\0';
1331 SvCUR_set(sv, str - SvPVX_const(sv));
1332 XPUSHs(sv);
1333 }
a6ec74c1 1334 break;
08ca2aa3 1335 }
1651fc44
ML
1336 case 'C':
1337 if (len == 0) {
1338 if (explicit_length)
1339 /* Switch to "character" mode */
1340 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1341 break;
1342 }
1343 /* FALL THROUGH */
a6ec74c1 1344 case 'c':
1651fc44
ML
1345 while (len-- > 0 && s < strend) {
1346 int aint;
1347 if (utf8)
1348 {
1349 STRLEN retlen;
1350 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1351 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1352 if (retlen == (STRLEN) -1 || retlen == 0)
1353 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1354 s += retlen;
1355 }
1356 else
1357 aint = *(U8 *)(s)++;
1358 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1359 aint -= 256;
08ca2aa3 1360 if (!checksum)
6e449a3a 1361 mPUSHi(aint);
73cb7263
NC
1362 else if (checksum > bits_in_uv)
1363 cdouble += (NV)aint;
1364 else
1365 cuv += aint;
a6ec74c1
JH
1366 }
1367 break;
08ca2aa3
TH
1368 case 'W':
1369 W_checksum:
1651fc44 1370 if (utf8) {
08ca2aa3 1371 while (len-- > 0 && s < strend) {
08ca2aa3 1372 STRLEN retlen;
f7fe979e 1373 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1374 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1375 if (retlen == (STRLEN) -1 || retlen == 0)
1376 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1377 s += retlen;
1378 if (!checksum)
6e449a3a 1379 mPUSHu(val);
08ca2aa3
TH
1380 else if (checksum > bits_in_uv)
1381 cdouble += (NV) val;
d6d3e8bd 1382 else
08ca2aa3 1383 cuv += val;
fc241834 1384 }
08ca2aa3 1385 } else if (!checksum)
a6ec74c1 1386 while (len-- > 0) {
f7fe979e 1387 const U8 ch = *(U8 *) s++;
6e449a3a 1388 mPUSHu(ch);
a6ec74c1 1389 }
08ca2aa3
TH
1390 else if (checksum > bits_in_uv)
1391 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1392 else
1393 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1394 break;
1395 case 'U':
35bcd338 1396 if (len == 0) {
c5333953 1397 if (explicit_length && howlen != e_star) {
08ca2aa3 1398 /* Switch to "bytes in UTF-8" mode */
f337b084 1399 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1400 else
1401 /* Should be impossible due to the need_utf8() test */
1402 Perl_croak(aTHX_ "U0 mode on a byte string");
1403 }
35bcd338
JH
1404 break;
1405 }
08ca2aa3 1406 if (len > strend - s) len = strend - s;
fc241834 1407 if (!checksum) {
08ca2aa3
TH
1408 if (len && unpack_only_one) len = 1;
1409 EXTEND(SP, len);
1410 EXTEND_MORTAL(len);
fc241834 1411 }
08ca2aa3
TH
1412 while (len-- > 0 && s < strend) {
1413 STRLEN retlen;
1414 UV auv;
1415 if (utf8) {
1416 U8 result[UTF8_MAXLEN];
f7fe979e 1417 const char *ptr = s;
08ca2aa3 1418 STRLEN len;
08ca2aa3
TH
1419 /* Bug: warns about bad utf8 even if we are short on bytes
1420 and will break out of the loop */
230e1fce
NC
1421 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1422 'U'))
08ca2aa3
TH
1423 break;
1424 len = UTF8SKIP(result);
fc241834 1425 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1426 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1427 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1428 s = ptr;
1429 } else {
cde84f2a 1430 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1431 if (retlen == (STRLEN) -1 || retlen == 0)
1432 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 s += retlen;
1434 }
1435 if (!checksum)
6e449a3a 1436 mPUSHu(auv);
73cb7263 1437 else if (checksum > bits_in_uv)
08ca2aa3 1438 cdouble += (NV) auv;
73cb7263 1439 else
08ca2aa3 1440 cuv += auv;
a6ec74c1
JH
1441 }
1442 break;
49704364
LW
1443 case 's' | TYPE_IS_SHRIEKING:
1444#if SHORTSIZE != SIZE16
73cb7263 1445 while (len-- > 0) {
08ca2aa3 1446 short ashort;
f337b084
TH
1447 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1448 DO_BO_UNPACK(ashort, s);
08ca2aa3 1449 if (!checksum)
6e449a3a 1450 mPUSHi(ashort);
73cb7263
NC
1451 else if (checksum > bits_in_uv)
1452 cdouble += (NV)ashort;
1453 else
1454 cuv += ashort;
49704364
LW
1455 }
1456 break;
1457#else
1458 /* Fallthrough! */
a6ec74c1 1459#endif
49704364 1460 case 's':
73cb7263 1461 while (len-- > 0) {
08ca2aa3
TH
1462 I16 ai16;
1463
1464#if U16SIZE > SIZE16
1465 ai16 = 0;
1466#endif
f337b084 1467 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1468 DO_BO_UNPACK(ai16, 16);
1109a392 1469#if U16SIZE > SIZE16
73cb7263
NC
1470 if (ai16 > 32767)
1471 ai16 -= 65536;
a6ec74c1 1472#endif
08ca2aa3 1473 if (!checksum)
6e449a3a 1474 mPUSHi(ai16);
73cb7263
NC
1475 else if (checksum > bits_in_uv)
1476 cdouble += (NV)ai16;
1477 else
1478 cuv += ai16;
a6ec74c1
JH
1479 }
1480 break;
49704364
LW
1481 case 'S' | TYPE_IS_SHRIEKING:
1482#if SHORTSIZE != SIZE16
73cb7263 1483 while (len-- > 0) {
08ca2aa3 1484 unsigned short aushort;
f337b084
TH
1485 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1486 DO_BO_UNPACK(aushort, s);
08ca2aa3 1487 if (!checksum)
6e449a3a 1488 mPUSHu(aushort);
73cb7263
NC
1489 else if (checksum > bits_in_uv)
1490 cdouble += (NV)aushort;
1491 else
1492 cuv += aushort;
49704364
LW
1493 }
1494 break;
1495#else
486ec47a 1496 /* Fallthrough! */
49704364 1497#endif
a6ec74c1
JH
1498 case 'v':
1499 case 'n':
1500 case 'S':
73cb7263 1501 while (len-- > 0) {
08ca2aa3
TH
1502 U16 au16;
1503#if U16SIZE > SIZE16
1504 au16 = 0;
1505#endif
f337b084 1506 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1507 DO_BO_UNPACK(au16, 16);
73cb7263
NC
1508 if (datumtype == 'n')
1509 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1510 if (datumtype == 'v')
1511 au16 = vtohs(au16);
08ca2aa3 1512 if (!checksum)
6e449a3a 1513 mPUSHu(au16);
73cb7263 1514 else if (checksum > bits_in_uv)
f337b084 1515 cdouble += (NV) au16;
73cb7263
NC
1516 else
1517 cuv += au16;
a6ec74c1
JH
1518 }
1519 break;
068bd2e7
MHM
1520 case 'v' | TYPE_IS_SHRIEKING:
1521 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1522 while (len-- > 0) {
08ca2aa3
TH
1523 I16 ai16;
1524# if U16SIZE > SIZE16
1525 ai16 = 0;
1526# endif
f337b084 1527 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1528 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1529 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1530 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1531 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1532 if (!checksum)
6e449a3a 1533 mPUSHi(ai16);
73cb7263 1534 else if (checksum > bits_in_uv)
08ca2aa3 1535 cdouble += (NV) ai16;
73cb7263
NC
1536 else
1537 cuv += ai16;
068bd2e7
MHM
1538 }
1539 break;
a6ec74c1 1540 case 'i':
49704364 1541 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1542 while (len-- > 0) {
08ca2aa3 1543 int aint;
f337b084
TH
1544 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1545 DO_BO_UNPACK(aint, i);
08ca2aa3 1546 if (!checksum)
6e449a3a 1547 mPUSHi(aint);
73cb7263
NC
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV)aint;
1550 else
1551 cuv += aint;
a6ec74c1
JH
1552 }
1553 break;
1554 case 'I':
49704364 1555 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1556 while (len-- > 0) {
08ca2aa3 1557 unsigned int auint;
f337b084
TH
1558 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1559 DO_BO_UNPACK(auint, i);
08ca2aa3 1560 if (!checksum)
6e449a3a 1561 mPUSHu(auint);
73cb7263
NC
1562 else if (checksum > bits_in_uv)
1563 cdouble += (NV)auint;
1564 else
1565 cuv += auint;
a6ec74c1
JH
1566 }
1567 break;
92d41999 1568 case 'j':
73cb7263 1569 while (len-- > 0) {
08ca2aa3 1570 IV aiv;
f337b084 1571 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1572#if IVSIZE == INTSIZE
f337b084 1573 DO_BO_UNPACK(aiv, i);
1109a392 1574#elif IVSIZE == LONGSIZE
f337b084 1575 DO_BO_UNPACK(aiv, l);
1109a392 1576#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1577 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1578#else
1579 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1580#endif
08ca2aa3 1581 if (!checksum)
6e449a3a 1582 mPUSHi(aiv);
73cb7263
NC
1583 else if (checksum > bits_in_uv)
1584 cdouble += (NV)aiv;
1585 else
1586 cuv += aiv;
92d41999
JH
1587 }
1588 break;
1589 case 'J':
73cb7263 1590 while (len-- > 0) {
08ca2aa3 1591 UV auv;
f337b084 1592 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1593#if IVSIZE == INTSIZE
f337b084 1594 DO_BO_UNPACK(auv, i);
08ca2aa3 1595#elif IVSIZE == LONGSIZE
f337b084 1596 DO_BO_UNPACK(auv, l);
08ca2aa3 1597#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1598 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1599#else
1600 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1601#endif
08ca2aa3 1602 if (!checksum)
6e449a3a 1603 mPUSHu(auv);
73cb7263
NC
1604 else if (checksum > bits_in_uv)
1605 cdouble += (NV)auv;
1606 else
1607 cuv += auv;
92d41999
JH
1608 }
1609 break;
49704364
LW
1610 case 'l' | TYPE_IS_SHRIEKING:
1611#if LONGSIZE != SIZE32
73cb7263 1612 while (len-- > 0) {
08ca2aa3 1613 long along;
f337b084
TH
1614 SHIFT_VAR(utf8, s, strend, along, datumtype);
1615 DO_BO_UNPACK(along, l);
08ca2aa3 1616 if (!checksum)
6e449a3a 1617 mPUSHi(along);
73cb7263
NC
1618 else if (checksum > bits_in_uv)
1619 cdouble += (NV)along;
1620 else
1621 cuv += along;
49704364
LW
1622 }
1623 break;
1624#else
1625 /* Fallthrough! */
a6ec74c1 1626#endif
49704364 1627 case 'l':
73cb7263 1628 while (len-- > 0) {
08ca2aa3
TH
1629 I32 ai32;
1630#if U32SIZE > SIZE32
1631 ai32 = 0;
1632#endif
f337b084 1633 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1634 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1635#if U32SIZE > SIZE32
08ca2aa3 1636 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1637#endif
08ca2aa3 1638 if (!checksum)
6e449a3a 1639 mPUSHi(ai32);
73cb7263
NC
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV)ai32;
1642 else
1643 cuv += ai32;
a6ec74c1
JH
1644 }
1645 break;
49704364
LW
1646 case 'L' | TYPE_IS_SHRIEKING:
1647#if LONGSIZE != SIZE32
73cb7263 1648 while (len-- > 0) {
08ca2aa3 1649 unsigned long aulong;
f337b084
TH
1650 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1651 DO_BO_UNPACK(aulong, l);
08ca2aa3 1652 if (!checksum)
6e449a3a 1653 mPUSHu(aulong);
73cb7263
NC
1654 else if (checksum > bits_in_uv)
1655 cdouble += (NV)aulong;
1656 else
1657 cuv += aulong;
49704364
LW
1658 }
1659 break;
1660#else
1661 /* Fall through! */
1662#endif
a6ec74c1
JH
1663 case 'V':
1664 case 'N':
1665 case 'L':
73cb7263 1666 while (len-- > 0) {
08ca2aa3
TH
1667 U32 au32;
1668#if U32SIZE > SIZE32
1669 au32 = 0;
1670#endif
f337b084 1671 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1672 DO_BO_UNPACK(au32, 32);
73cb7263
NC
1673 if (datumtype == 'N')
1674 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1675 if (datumtype == 'V')
1676 au32 = vtohl(au32);
08ca2aa3 1677 if (!checksum)
6e449a3a 1678 mPUSHu(au32);
fc241834
RGS
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV)au32;
1681 else
1682 cuv += au32;
a6ec74c1
JH
1683 }
1684 break;
068bd2e7
MHM
1685 case 'V' | TYPE_IS_SHRIEKING:
1686 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1687 while (len-- > 0) {
08ca2aa3 1688 I32 ai32;
f8e5a5db 1689#if U32SIZE > SIZE32
08ca2aa3 1690 ai32 = 0;
f8e5a5db 1691#endif
f337b084 1692 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263
NC
1693 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1694 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1695 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1696 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1697 if (!checksum)
6e449a3a 1698 mPUSHi(ai32);
73cb7263
NC
1699 else if (checksum > bits_in_uv)
1700 cdouble += (NV)ai32;
1701 else
1702 cuv += ai32;
068bd2e7
MHM
1703 }
1704 break;
a6ec74c1 1705 case 'p':
a6ec74c1 1706 while (len-- > 0) {
f7fe979e 1707 const char *aptr;
f337b084 1708 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1709 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1710 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1711 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1712 }
1713 break;
1714 case 'w':
a6ec74c1
JH
1715 {
1716 UV auv = 0;
1717 U32 bytes = 0;
fc241834 1718
08ca2aa3
TH
1719 while (len > 0 && s < strend) {
1720 U8 ch;
f337b084 1721 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1722 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1723 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1724 if (ch < 0x80) {
a6ec74c1 1725 bytes = 0;
6e449a3a 1726 mPUSHu(auv);
a6ec74c1
JH
1727 len--;
1728 auv = 0;
08ca2aa3 1729 continue;
a6ec74c1 1730 }
08ca2aa3 1731 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1732 const char *t;
a6ec74c1 1733
f5992bc4 1734 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1735 while (s < strend) {
f337b084 1736 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1737 sv = mul128(sv, (U8)(ch & 0x7f));
1738 if (!(ch & 0x80)) {
a6ec74c1
JH
1739 bytes = 0;
1740 break;
1741 }
1742 }
10516c54 1743 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1744 while (*t == '0')
1745 t++;
1746 sv_chop(sv, t);
6e449a3a 1747 mPUSHs(sv);
a6ec74c1
JH
1748 len--;
1749 auv = 0;
1750 }
1751 }
1752 if ((s >= strend) && bytes)
49704364 1753 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1754 }
1755 break;
1756 case 'P':
49704364
LW
1757 if (symptr->howlen == e_star)
1758 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1759 EXTEND(SP, 1);
2d3e0934 1760 if (s + sizeof(char*) <= strend) {
08ca2aa3 1761 char *aptr;
f337b084 1762 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1763 DO_BO_UNPACK_PC(aptr);
fc241834 1764 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1765 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1766 }
a6ec74c1
JH
1767 break;
1768#ifdef HAS_QUAD
1769 case 'q':
73cb7263 1770 while (len-- > 0) {
08ca2aa3 1771 Quad_t aquad;
f337b084
TH
1772 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1773 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1774 if (!checksum)
6e449a3a
MHM
1775 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1776 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1777 else if (checksum > bits_in_uv)
1778 cdouble += (NV)aquad;
1779 else
1780 cuv += aquad;
1781 }
a6ec74c1
JH
1782 break;
1783 case 'Q':
73cb7263 1784 while (len-- > 0) {
08ca2aa3 1785 Uquad_t auquad;
f337b084
TH
1786 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1787 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1788 if (!checksum)
6e449a3a
MHM
1789 mPUSHs(auquad <= UV_MAX ?
1790 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1791 else if (checksum > bits_in_uv)
1792 cdouble += (NV)auquad;
1793 else
1794 cuv += auquad;
a6ec74c1
JH
1795 }
1796 break;
08ca2aa3 1797#endif /* HAS_QUAD */
a6ec74c1
JH
1798 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1799 case 'f':
73cb7263 1800 while (len-- > 0) {
08ca2aa3 1801 float afloat;
f337b084 1802 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1803 DO_BO_UNPACK_N(afloat, float);
1804 if (!checksum)
6e449a3a 1805 mPUSHn(afloat);
08ca2aa3 1806 else
73cb7263 1807 cdouble += afloat;
fc241834 1808 }
a6ec74c1
JH
1809 break;
1810 case 'd':
73cb7263 1811 while (len-- > 0) {
08ca2aa3 1812 double adouble;
f337b084 1813 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1814 DO_BO_UNPACK_N(adouble, double);
1815 if (!checksum)
6e449a3a 1816 mPUSHn(adouble);
08ca2aa3 1817 else
73cb7263 1818 cdouble += adouble;
fc241834 1819 }
a6ec74c1 1820 break;
92d41999 1821 case 'F':
73cb7263 1822 while (len-- > 0) {
275663fa
TC
1823 NV_bytes anv;
1824 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1825 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 1826 if (!checksum)
275663fa 1827 mPUSHn(anv.nv);
08ca2aa3 1828 else
275663fa 1829 cdouble += anv.nv;
fc241834 1830 }
92d41999
JH
1831 break;
1832#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1833 case 'D':
73cb7263 1834 while (len-- > 0) {
275663fa
TC
1835 ld_bytes aldouble;
1836 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1837 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 1838 if (!checksum)
275663fa 1839 mPUSHn(aldouble.ld);
08ca2aa3 1840 else
275663fa 1841 cdouble += aldouble.ld;
92d41999
JH
1842 }
1843 break;
1844#endif
a6ec74c1 1845 case 'u':
858fe5e1 1846 if (!checksum) {
f7fe979e 1847 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1848 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1849 if (l) SvPOK_on(sv);
1850 }
1851 if (utf8) {
1852 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1853 I32 a, b, c, d;
db187877 1854 char hunk[3];
08ca2aa3 1855
08ca2aa3
TH
1856 while (len > 0) {
1857 next_uni_uu(aTHX_ &s, strend, &a);
1858 next_uni_uu(aTHX_ &s, strend, &b);
1859 next_uni_uu(aTHX_ &s, strend, &c);
1860 next_uni_uu(aTHX_ &s, strend, &d);
1861 hunk[0] = (char)((a << 2) | (b >> 4));
1862 hunk[1] = (char)((b << 4) | (c >> 2));
1863 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1864 if (!checksum)
1865 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1866 len -= 3;
1867 }
1868 if (s < strend) {
f7fe979e
AL
1869 if (*s == '\n') {
1870 s++;
1871 }
08ca2aa3
TH
1872 else {
1873 /* possible checksum byte */
f7fe979e
AL
1874 const char *skip = s+UTF8SKIP(s);
1875 if (skip < strend && *skip == '\n')
1876 s = skip+1;
08ca2aa3
TH
1877 }
1878 }
1879 }
1880 } else {
fc241834
RGS
1881 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1882 I32 a, b, c, d;
db187877 1883 char hunk[3];
a6ec74c1 1884
fc241834
RGS
1885 len = PL_uudmap[*(U8*)s++] & 077;
1886 while (len > 0) {
1887 if (s < strend && ISUUCHAR(*s))
1888 a = PL_uudmap[*(U8*)s++] & 077;
1889 else
1890 a = 0;
1891 if (s < strend && ISUUCHAR(*s))
1892 b = PL_uudmap[*(U8*)s++] & 077;
1893 else
1894 b = 0;
1895 if (s < strend && ISUUCHAR(*s))
1896 c = PL_uudmap[*(U8*)s++] & 077;
1897 else
1898 c = 0;
1899 if (s < strend && ISUUCHAR(*s))
1900 d = PL_uudmap[*(U8*)s++] & 077;
1901 else
1902 d = 0;
1903 hunk[0] = (char)((a << 2) | (b >> 4));
1904 hunk[1] = (char)((b << 4) | (c >> 2));
1905 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1906 if (!checksum)
1907 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1908 len -= 3;
1909 }
1910 if (*s == '\n')
1911 s++;
1912 else /* possible checksum byte */
1913 if (s + 1 < strend && s[1] == '\n')
1914 s += 2;
a6ec74c1 1915 }
08ca2aa3 1916 }
858fe5e1
TC
1917 if (!checksum)
1918 XPUSHs(sv);
a6ec74c1
JH
1919 break;
1920 }
49704364 1921
a6ec74c1 1922 if (checksum) {
1109a392 1923 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1924 (checksum > bits_in_uv &&
08ca2aa3
TH
1925 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1926 NV trouble, anv;
a6ec74c1 1927
08ca2aa3 1928 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1929 while (checksum >= 16) {
1930 checksum -= 16;
08ca2aa3 1931 anv *= 65536.0;
a6ec74c1 1932 }
a6ec74c1 1933 while (cdouble < 0.0)
08ca2aa3
TH
1934 cdouble += anv;
1935 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1936 sv = newSVnv(cdouble);
a6ec74c1
JH
1937 }
1938 else {
fa8ec7c1
NC
1939 if (checksum < bits_in_uv) {
1940 UV mask = ((UV)1 << checksum) - 1;
92d41999 1941 cuv &= mask;
a6ec74c1 1942 }
c4c5f44a 1943 sv = newSVuv(cuv);
a6ec74c1 1944 }
6e449a3a 1945 mXPUSHs(sv);
a6ec74c1
JH
1946 checksum = 0;
1947 }
fc241834 1948
49704364
LW
1949 if (symptr->flags & FLAG_SLASH){
1950 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1951 break;
49704364
LW
1952 if( next_symbol(symptr) ){
1953 if( symptr->howlen == e_number )
1954 Perl_croak(aTHX_ "Count after length/code in unpack" );
1955 if( beyond ){
1956 /* ...end of char buffer then no decent length available */
1957 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1958 } else {
1959 /* take top of stack (hope it's numeric) */
1960 len = POPi;
1961 if( len < 0 )
1962 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1963 }
1964 } else {
1965 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1966 }
1967 datumtype = symptr->code;
21c16052 1968 explicit_length = FALSE;
49704364
LW
1969 goto redo_switch;
1970 }
a6ec74c1 1971 }
49704364 1972
18529408
IZ
1973 if (new_s)
1974 *new_s = s;
1975 PUTBACK;
1976 return SP - PL_stack_base - start_sp_offset;
1977}
1978
1979PP(pp_unpack)
1980{
97aff369 1981 dVAR;
18529408 1982 dSP;
bab9c0ac 1983 dPOPPOPssrl;
18529408
IZ
1984 I32 gimme = GIMME_V;
1985 STRLEN llen;
1986 STRLEN rlen;
5c144d81
NC
1987 const char *pat = SvPV_const(left, llen);
1988 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1989 const char *strend = s + rlen;
1990 const char *patend = pat + llen;
08ca2aa3 1991 I32 cnt;
18529408
IZ
1992
1993 PUTBACK;
7accc089 1994 cnt = unpackstring(pat, patend, s, strend,
49704364 1995 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1996 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1997
18529408
IZ
1998 SPAGAIN;
1999 if ( !cnt && gimme == G_SCALAR )
2000 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2001 RETURN;
2002}
2003
f337b084 2004STATIC U8 *
f7fe979e 2005doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2006{
f337b084 2007 *h++ = PL_uuemap[len];
a6ec74c1 2008 while (len > 2) {
f337b084
TH
2009 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2010 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2011 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2012 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2013 s += 3;
2014 len -= 3;
2015 }
2016 if (len > 0) {
f7fe979e 2017 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2018 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2019 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2020 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2021 *h++ = PL_uuemap[0];
a6ec74c1 2022 }
f337b084
TH
2023 *h++ = '\n';
2024 return h;
a6ec74c1
JH
2025}
2026
2027STATIC SV *
f7fe979e 2028S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2029{
8b6e33c7
AL
2030 SV *result = newSVpvn(s, l);
2031 char *const result_c = SvPV_nolen(result); /* convenience */
2032 char *out = result_c;
2033 bool skip = 1;
2034 bool ignore = 0;
a6ec74c1 2035
7918f24d
NC
2036 PERL_ARGS_ASSERT_IS_AN_INT;
2037
a6ec74c1
JH
2038 while (*s) {
2039 switch (*s) {
2040 case ' ':
2041 break;
2042 case '+':
2043 if (!skip) {
2044 SvREFCNT_dec(result);
2045 return (NULL);
2046 }
2047 break;
2048 case '0':
2049 case '1':
2050 case '2':
2051 case '3':
2052 case '4':
2053 case '5':
2054 case '6':
2055 case '7':
2056 case '8':
2057 case '9':
2058 skip = 0;
2059 if (!ignore) {
2060 *(out++) = *s;
2061 }
2062 break;
2063 case '.':
2064 ignore = 1;
2065 break;
2066 default:
2067 SvREFCNT_dec(result);
2068 return (NULL);
2069 }
2070 s++;
2071 }
2072 *(out++) = '\0';
2073 SvCUR_set(result, out - result_c);
2074 return (result);
2075}
2076
2077/* pnum must be '\0' terminated */
2078STATIC int
2079S_div128(pTHX_ SV *pnum, bool *done)
2080{
8b6e33c7
AL
2081 STRLEN len;
2082 char * const s = SvPV(pnum, len);
2083 char *t = s;
2084 int m = 0;
2085
7918f24d
NC
2086 PERL_ARGS_ASSERT_DIV128;
2087
8b6e33c7
AL
2088 *done = 1;
2089 while (*t) {
2090 const int i = m * 10 + (*t - '0');
2091 const int r = (i >> 7); /* r < 10 */
2092 m = i & 0x7F;
2093 if (r) {
2094 *done = 0;
2095 }
2096 *(t++) = '0' + r;
a6ec74c1 2097 }
8b6e33c7
AL
2098 *(t++) = '\0';
2099 SvCUR_set(pnum, (STRLEN) (t - s));
2100 return (m);
a6ec74c1
JH
2101}
2102
18529408 2103/*
7accc089
JH
2104=for apidoc packlist
2105
2106The engine implementing pack() Perl function.
2107
bfce84ec
AL
2108=cut
2109*/
7accc089
JH
2110
2111void
5aaab254 2112Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2113{
97aff369 2114 dVAR;
aadb217d
JH
2115 tempsym_t sym;
2116
7918f24d
NC
2117 PERL_ARGS_ASSERT_PACKLIST;
2118
f7fe979e 2119 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2120
f337b084
TH
2121 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2122 Also make sure any UTF8 flag is loaded */
56eb0262 2123 SvPV_force_nolen(cat);
bfce84ec
AL
2124 if (DO_UTF8(cat))
2125 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2126
49704364
LW
2127 (void)pack_rec( cat, &sym, beglist, endlist );
2128}
2129
f337b084
TH
2130/* like sv_utf8_upgrade, but also repoint the group start markers */
2131STATIC void
2132marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2133 STRLEN len;
2134 tempsym_t *group;
f7fe979e
AL
2135 const char *from_ptr, *from_start, *from_end, **marks, **m;
2136 char *to_start, *to_ptr;
f337b084
TH
2137
2138 if (SvUTF8(sv)) return;
2139
aa07b2f6 2140 from_start = SvPVX_const(sv);
f337b084
TH
2141 from_end = from_start + SvCUR(sv);
2142 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2143 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2144 if (from_ptr == from_end) {
2145 /* Simple case: no character needs to be changed */
2146 SvUTF8_on(sv);
2147 return;
2148 }
2149
3473cf63 2150 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2151 Newx(to_start, len, char);
f337b084
TH
2152 Copy(from_start, to_start, from_ptr-from_start, char);
2153 to_ptr = to_start + (from_ptr-from_start);
2154
a02a5408 2155 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2156 for (group=sym_ptr; group; group = group->previous)
2157 marks[group->level] = from_start + group->strbeg;
2158 marks[sym_ptr->level+1] = from_end+1;
2159 for (m = marks; *m < from_ptr; m++)
2160 *m = to_start + (*m-from_start);
2161
2162 for (;from_ptr < from_end; from_ptr++) {
2163 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2164 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2165 }
2166 *to_ptr = 0;
2167
2168 while (*m == from_ptr) *m++ = to_ptr;
2169 if (m != marks + sym_ptr->level+1) {
2170 Safefree(marks);
2171 Safefree(to_start);
5637ef5b
NC
2172 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2173 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2174 }
2175 for (group=sym_ptr; group; group = group->previous)
2176 group->strbeg = marks[group->level] - to_start;
2177 Safefree(marks);
2178
2179 if (SvOOK(sv)) {
2180 if (SvIVX(sv)) {
b162af07 2181 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2182 from_start -= SvIVX(sv);
2183 SvIV_set(sv, 0);
2184 }
2185 SvFLAGS(sv) &= ~SVf_OOK;
2186 }
2187 if (SvLEN(sv) != 0)
2188 Safefree(from_start);
f880fe2f 2189 SvPV_set(sv, to_start);
b162af07
SP
2190 SvCUR_set(sv, to_ptr - to_start);
2191 SvLEN_set(sv, len);
f337b084
TH
2192 SvUTF8_on(sv);
2193}
2194
2195/* Exponential string grower. Makes string extension effectively O(n)
2196 needed says how many extra bytes we need (not counting the final '\0')
2197 Only grows the string if there is an actual lack of space
2198*/
2199STATIC char *
0bd48802 2200S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2201 const STRLEN cur = SvCUR(sv);
2202 const STRLEN len = SvLEN(sv);
f337b084 2203 STRLEN extend;
7918f24d
NC
2204
2205 PERL_ARGS_ASSERT_SV_EXP_GROW;
2206
f337b084
TH
2207 if (len - cur > needed) return SvPVX(sv);
2208 extend = needed > len ? needed : len;
2209 return SvGROW(sv, len+extend+1);
2210}
49704364
LW
2211
2212STATIC
2213SV **
f337b084 2214S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2215{
97aff369 2216 dVAR;
49704364 2217 tempsym_t lookahead;
f337b084
TH
2218 I32 items = endlist - beglist;
2219 bool found = next_symbol(symptr);
2220 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2221 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2222
7918f24d
NC
2223 PERL_ARGS_ASSERT_PACK_REC;
2224
f337b084
TH
2225 if (symptr->level == 0 && found && symptr->code == 'U') {
2226 marked_upgrade(aTHX_ cat, symptr);
2227 symptr->flags |= FLAG_DO_UTF8;
2228 utf8 = 0;
49704364 2229 }
f337b084 2230 symptr->strbeg = SvCUR(cat);
49704364
LW
2231
2232 while (found) {
f337b084
TH
2233 SV *fromstr;
2234 STRLEN fromlen;
2235 I32 len;
a0714e2c 2236 SV *lengthcode = NULL;
49704364 2237 I32 datumtype = symptr->code;
f337b084
TH
2238 howlen_t howlen = symptr->howlen;
2239 char *start = SvPVX(cat);
2240 char *cur = start + SvCUR(cat);
49704364 2241
f337b084
TH
2242#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2243
2244 switch (howlen) {
fc241834 2245 case e_star:
f337b084
TH
2246 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2247 0 : items;
2248 break;
2249 default:
2250 /* e_no_len and e_number */
2251 len = symptr->length;
49704364
LW
2252 break;
2253 }
2254
f337b084 2255 if (len) {
a7a3cfaa 2256 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2257
a7a3cfaa
TH
2258 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2259 /* We can process this letter. */
2260 STRLEN size = props & PACK_SIZE_MASK;
2261 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2262 }
f337b084
TH
2263 }
2264
49704364
LW
2265 /* Look ahead for next symbol. Do we have code/code? */
2266 lookahead = *symptr;
2267 found = next_symbol(&lookahead);
246f24af
TH
2268 if (symptr->flags & FLAG_SLASH) {
2269 IV count;
f337b084 2270 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2271 if (strchr("aAZ", lookahead.code)) {
2272 if (lookahead.howlen == e_number) count = lookahead.length;
2273 else {
ce399ba6 2274 if (items > 0) {
48a5da33 2275 count = sv_len_utf8(*beglist);
ce399ba6 2276 }
246f24af
TH
2277 else count = 0;
2278 if (lookahead.code == 'Z') count++;
2279 }
2280 } else {
2281 if (lookahead.howlen == e_number && lookahead.length < items)
2282 count = lookahead.length;
2283 else count = items;
2284 }
2285 lookahead.howlen = e_number;
2286 lookahead.length = count;
2287 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2288 }
49704364 2289
fc241834
RGS
2290 /* Code inside the switch must take care to properly update
2291 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2292 doesn't simply leave using break */
1109a392 2293 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2294 default:
f337b084
TH
2295 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2297 case '%':
49704364 2298 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2299 {
2300 char *from;
28be1210 2301 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2302 case '.':
2303 if (howlen == e_star) from = start;
2304 else if (len == 0) from = cur;
2305 else {
2306 tempsym_t *group = symptr;
2307
2308 while (--len && group) group = group->previous;
2309 from = group ? start + group->strbeg : start;
2310 }
2311 fromstr = NEXTFROM;
2312 len = SvIV(fromstr);
2313 goto resize;
28be1210 2314 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2315 case '@':
28be1210
TH
2316 from = start + symptr->strbeg;
2317 resize:
28be1210 2318 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2319 if (len >= 0) {
2320 while (len && from < cur) {
2321 from += UTF8SKIP(from);
2322 len--;
2323 }
2324 if (from > cur)
2325 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2326 if (len) {
2327 /* Here we know from == cur */
2328 grow:
2329 GROWING(0, cat, start, cur, len);
2330 Zero(cur, len, char);
2331 cur += len;
2332 } else if (from < cur) {
2333 len = cur - from;
2334 goto shrink;
2335 } else goto no_change;
2336 } else {
2337 cur = from;
2338 len = -len;
2339 goto utf8_shrink;
f337b084 2340 }
28be1210
TH
2341 else {
2342 len -= cur - from;
f337b084 2343 if (len > 0) goto grow;
28be1210 2344 if (len == 0) goto no_change;
fc241834 2345 len = -len;
28be1210 2346 goto shrink;
f337b084 2347 }
a6ec74c1 2348 break;
28be1210 2349 }
fc241834 2350 case '(': {
49704364 2351 tempsym_t savsym = *symptr;
66c611c5
MHM
2352 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2353 symptr->flags |= group_modifiers;
49704364
LW
2354 symptr->patend = savsym.grpend;
2355 symptr->level++;
f337b084 2356 symptr->previous = &lookahead;
18529408 2357 while (len--) {
f337b084
TH
2358 U32 was_utf8;
2359 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2360 else symptr->flags &= ~FLAG_PARSE_UTF8;
2361 was_utf8 = SvUTF8(cat);
49704364 2362 symptr->patptr = savsym.grpbeg;
f337b084
TH
2363 beglist = pack_rec(cat, symptr, beglist, endlist);
2364 if (SvUTF8(cat) != was_utf8)
2365 /* This had better be an upgrade while in utf8==0 mode */
2366 utf8 = 1;
2367
49704364 2368 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2369 break; /* No way to continue */
2370 }
ee790063 2371 items = endlist - beglist;
f337b084
TH
2372 lookahead.flags = symptr->flags & ~group_modifiers;
2373 goto no_change;
18529408 2374 }
62f95557
IZ
2375 case 'X' | TYPE_IS_SHRIEKING:
2376 if (!len) /* Avoid division by 0 */
2377 len = 1;
f337b084
TH
2378 if (utf8) {
2379 char *hop, *last;
2380 I32 l = len;
2381 hop = last = start;
2382 while (hop < cur) {
2383 hop += UTF8SKIP(hop);
2384 if (--l == 0) {
2385 last = hop;
2386 l = len;
2387 }
2388 }
2389 if (last > cur)
2390 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2391 cur = last;
2392 break;
2393 }
2394 len = (cur-start) % len;
62f95557 2395 /* FALL THROUGH */
a6ec74c1 2396 case 'X':
f337b084
TH
2397 if (utf8) {
2398 if (len < 1) goto no_change;
28be1210 2399 utf8_shrink:
f337b084
TH
2400 while (len > 0) {
2401 if (cur <= start)
28be1210
TH
2402 Perl_croak(aTHX_ "'%c' outside of string in pack",
2403 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2404 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2405 if (cur <= start)
28be1210
TH
2406 Perl_croak(aTHX_ "'%c' outside of string in pack",
2407 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2408 }
2409 len--;
2410 }
2411 } else {
fc241834 2412 shrink:
f337b084 2413 if (cur - start < len)
28be1210
TH
2414 Perl_croak(aTHX_ "'%c' outside of string in pack",
2415 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2416 cur -= len;
2417 }
2418 if (cur < start+symptr->strbeg) {
2419 /* Make sure group starts don't point into the void */
2420 tempsym_t *group;
9e27e96a 2421 const STRLEN length = cur-start;
f337b084
TH
2422 for (group = symptr;
2423 group && length < group->strbeg;
2424 group = group->previous) group->strbeg = length;
2425 lookahead.strbeg = length;
2426 }
a6ec74c1 2427 break;
fc241834
RGS
2428 case 'x' | TYPE_IS_SHRIEKING: {
2429 I32 ai32;
62f95557
IZ
2430 if (!len) /* Avoid division by 0 */
2431 len = 1;
230e1fce 2432 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2433 else ai32 = (cur - start) % len;
2434 if (ai32 == 0) goto no_change;
2435 len -= ai32;
2436 }
2437 /* FALL THROUGH */
a6ec74c1 2438 case 'x':
f337b084 2439 goto grow;
a6ec74c1
JH
2440 case 'A':
2441 case 'Z':
f337b084 2442 case 'a': {
f7fe979e 2443 const char *aptr;
f337b084 2444
a6ec74c1 2445 fromstr = NEXTFROM;
e62f0680 2446 aptr = SvPV_const(fromstr, fromlen);
f337b084 2447 if (DO_UTF8(fromstr)) {
f7fe979e 2448 const char *end, *s;
f337b084
TH
2449
2450 if (!utf8 && !SvUTF8(cat)) {
2451 marked_upgrade(aTHX_ cat, symptr);
2452 lookahead.flags |= FLAG_DO_UTF8;
2453 lookahead.strbeg = symptr->strbeg;
2454 utf8 = 1;
2455 start = SvPVX(cat);
2456 cur = start + SvCUR(cat);
2457 }
fc241834 2458 if (howlen == e_star) {
f337b084
TH
2459 if (utf8) goto string_copy;
2460 len = fromlen+1;
2461 }
2462 s = aptr;
2463 end = aptr + fromlen;
2464 fromlen = datumtype == 'Z' ? len-1 : len;
2465 while ((I32) fromlen > 0 && s < end) {
2466 s += UTF8SKIP(s);
2467 fromlen--;
2468 }
2469 if (s > end)
2470 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2471 if (utf8) {
fc241834 2472 len = fromlen;
f337b084
TH
2473 if (datumtype == 'Z') len++;
2474 fromlen = s-aptr;
2475 len += fromlen;
fc241834 2476
f337b084 2477 goto string_copy;
fc241834 2478 }
f337b084
TH
2479 fromlen = len - fromlen;
2480 if (datumtype == 'Z') fromlen--;
2481 if (howlen == e_star) {
2482 len = fromlen;
2483 if (datumtype == 'Z') len++;
fc241834 2484 }
f337b084 2485 GROWING(0, cat, start, cur, len);
fc241834 2486 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2487 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2488 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2489 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2490 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2491 cur += fromlen;
a6ec74c1 2492 len -= fromlen;
f337b084
TH
2493 } else if (utf8) {
2494 if (howlen == e_star) {
2495 len = fromlen;
2496 if (datumtype == 'Z') len++;
a6ec74c1 2497 }
f337b084
TH
2498 if (len <= (I32) fromlen) {
2499 fromlen = len;
2500 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2501 }
fc241834 2502 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2503 upgrade, so:
2504 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2505 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2506 len -= fromlen;
2507 while (fromlen > 0) {
230e1fce 2508 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2509 aptr++;
2510 fromlen--;
fc241834 2511 }
f337b084
TH
2512 } else {
2513 string_copy:
2514 if (howlen == e_star) {
2515 len = fromlen;
2516 if (datumtype == 'Z') len++;
2517 }
2518 if (len <= (I32) fromlen) {
2519 fromlen = len;
2520 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2521 }
f337b084
TH
2522 GROWING(0, cat, start, cur, len);
2523 Copy(aptr, cur, fromlen, char);
2524 cur += fromlen;
2525 len -= fromlen;
a6ec74c1 2526 }
f337b084
TH
2527 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2528 cur += len;
3c4fb04a 2529 SvTAINT(cat);
a6ec74c1 2530 break;
f337b084 2531 }
a6ec74c1 2532 case 'B':
f337b084 2533 case 'b': {
b83604b4 2534 const char *str, *end;
f337b084
TH
2535 I32 l, field_len;
2536 U8 bits;
2537 bool utf8_source;
2538 U32 utf8_flags;
a6ec74c1 2539
fc241834 2540 fromstr = NEXTFROM;
b83604b4 2541 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2542 end = str + fromlen;
2543 if (DO_UTF8(fromstr)) {
2544 utf8_source = TRUE;
041457d9 2545 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2546 } else {
2547 utf8_source = FALSE;
2548 utf8_flags = 0; /* Unused, but keep compilers happy */
2549 }
2550 if (howlen == e_star) len = fromlen;
2551 field_len = (len+7)/8;
2552 GROWING(utf8, cat, start, cur, field_len);
2553 if (len > (I32)fromlen) len = fromlen;
2554 bits = 0;
2555 l = 0;
2556 if (datumtype == 'B')
2557 while (l++ < len) {
2558 if (utf8_source) {
95b63a38 2559 UV val = 0;
f337b084
TH
2560 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2561 bits |= val & 1;
2562 } else bits |= *str++ & 1;
2563 if (l & 7) bits <<= 1;
fc241834 2564 else {
f337b084
TH
2565 PUSH_BYTE(utf8, cur, bits);
2566 bits = 0;
a6ec74c1
JH
2567 }
2568 }
f337b084
TH
2569 else
2570 /* datumtype == 'b' */
2571 while (l++ < len) {
2572 if (utf8_source) {
95b63a38 2573 UV val = 0;
f337b084
TH
2574 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2575 if (val & 1) bits |= 0x80;
2576 } else if (*str++ & 1)
2577 bits |= 0x80;
2578 if (l & 7) bits >>= 1;
fc241834 2579 else {
f337b084
TH
2580 PUSH_BYTE(utf8, cur, bits);
2581 bits = 0;
a6ec74c1
JH
2582 }
2583 }
f337b084
TH
2584 l--;
2585 if (l & 7) {
fc241834 2586 if (datumtype == 'B')
f337b084 2587 bits <<= 7 - (l & 7);
fc241834 2588 else
f337b084
TH
2589 bits >>= 7 - (l & 7);
2590 PUSH_BYTE(utf8, cur, bits);
2591 l += 7;
a6ec74c1 2592 }
f337b084
TH
2593 /* Determine how many chars are left in the requested field */
2594 l /= 8;
2595 if (howlen == e_star) field_len = 0;
2596 else field_len -= l;
2597 Zero(cur, field_len, char);
2598 cur += field_len;
a6ec74c1 2599 break;
f337b084 2600 }
a6ec74c1 2601 case 'H':
f337b084 2602 case 'h': {
10516c54 2603 const char *str, *end;
f337b084
TH
2604 I32 l, field_len;
2605 U8 bits;
2606 bool utf8_source;
2607 U32 utf8_flags;
a6ec74c1 2608
fc241834 2609 fromstr = NEXTFROM;
10516c54 2610 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2611 end = str + fromlen;
2612 if (DO_UTF8(fromstr)) {
2613 utf8_source = TRUE;
041457d9 2614 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2615 } else {
2616 utf8_source = FALSE;
2617 utf8_flags = 0; /* Unused, but keep compilers happy */
2618 }
2619 if (howlen == e_star) len = fromlen;
2620 field_len = (len+1)/2;
2621 GROWING(utf8, cat, start, cur, field_len);
2622 if (!utf8 && len > (I32)fromlen) len = fromlen;
2623 bits = 0;
2624 l = 0;
2625 if (datumtype == 'H')
2626 while (l++ < len) {
2627 if (utf8_source) {
95b63a38 2628 UV val = 0;
f337b084
TH
2629 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2630 if (val < 256 && isALPHA(val))
2631 bits |= (val + 9) & 0xf;
a6ec74c1 2632 else
f337b084
TH
2633 bits |= val & 0xf;
2634 } else if (isALPHA(*str))
2635 bits |= (*str++ + 9) & 0xf;
2636 else
2637 bits |= *str++ & 0xf;
2638 if (l & 1) bits <<= 4;
fc241834 2639 else {
f337b084
TH
2640 PUSH_BYTE(utf8, cur, bits);
2641 bits = 0;
a6ec74c1
JH
2642 }
2643 }
f337b084
TH
2644 else
2645 while (l++ < len) {
2646 if (utf8_source) {
95b63a38 2647 UV val = 0;
f337b084
TH
2648 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2649 if (val < 256 && isALPHA(val))
2650 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2651 else
f337b084
TH
2652 bits |= (val & 0xf) << 4;
2653 } else if (isALPHA(*str))
2654 bits |= ((*str++ + 9) & 0xf) << 4;
2655 else
2656 bits |= (*str++ & 0xf) << 4;
2657 if (l & 1) bits >>= 4;
fc241834 2658 else {
f337b084
TH
2659 PUSH_BYTE(utf8, cur, bits);
2660 bits = 0;
a6ec74c1 2661 }
fc241834 2662 }
f337b084
TH
2663 l--;
2664 if (l & 1) {
2665 PUSH_BYTE(utf8, cur, bits);
2666 l++;
2667 }
2668 /* Determine how many chars are left in the requested field */
2669 l /= 2;
2670 if (howlen == e_star) field_len = 0;
2671 else field_len -= l;
2672 Zero(cur, field_len, char);
2673 cur += field_len;
2674 break;
fc241834
RGS
2675 }
2676 case 'c':
f337b084
TH
2677 while (len-- > 0) {
2678 IV aiv;
2679 fromstr = NEXTFROM;
2680 aiv = SvIV(fromstr);
a2a5de95
NC
2681 if ((-128 > aiv || aiv > 127))
2682 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2683 "Character in 'c' format wrapped in pack");
585ec06d 2684 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2685 }
2686 break;
2687 case 'C':
f337b084
TH
2688 if (len == 0) {
2689 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2690 break;
2691 }
a6ec74c1 2692 while (len-- > 0) {
f337b084 2693 IV aiv;
a6ec74c1 2694 fromstr = NEXTFROM;
f337b084 2695 aiv = SvIV(fromstr);
a2a5de95
NC
2696 if ((0 > aiv || aiv > 0xff))
2697 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2698 "Character in 'C' format wrapped in pack");
1651fc44 2699 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2700 }
fc241834
RGS
2701 break;
2702 case 'W': {
2703 char *end;
670f1322 2704 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2705
2706 end = start+SvLEN(cat)-1;
2707 if (utf8) end -= UTF8_MAXLEN-1;
2708 while (len-- > 0) {
2709 UV auv;
2710 fromstr = NEXTFROM;
2711 auv = SvUV(fromstr);
2712 if (in_bytes) auv = auv % 0x100;
2713 if (utf8) {
2714 W_utf8:
2715 if (cur > end) {
2716 *cur = '\0';
b162af07 2717 SvCUR_set(cat, cur - start);
fc241834
RGS
2718
2719 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2720 end = start+SvLEN(cat)-UTF8_MAXLEN;
2721 }
230e1fce
NC
2722 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2723 NATIVE_TO_UNI(auv),
041457d9 2724 warn_utf8 ?
230e1fce 2725 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2726 } else {
2727 if (auv >= 0x100) {
2728 if (!SvUTF8(cat)) {
2729 *cur = '\0';
b162af07 2730 SvCUR_set(cat, cur - start);
fc241834
RGS
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2734 utf8 = 1;
2735 start = SvPVX(cat);
2736 cur = start + SvCUR(cat);
2737 end = start+SvLEN(cat)-UTF8_MAXLEN;
2738 goto W_utf8;
2739 }
a2a5de95
NC
2740 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2741 "Character in 'W' format wrapped in pack");
fc241834
RGS
2742 auv &= 0xff;
2743 }
2744 if (cur >= end) {
2745 *cur = '\0';
b162af07 2746 SvCUR_set(cat, cur - start);
fc241834
RGS
2747 GROWING(0, cat, start, cur, len+1);
2748 end = start+SvLEN(cat)-1;
2749 }
fe2774ed 2750 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2751 }
2752 }
2753 break;
fc241834
RGS
2754 }
2755 case 'U': {
2756 char *end;
2757
2758 if (len == 0) {
2759 if (!(symptr->flags & FLAG_DO_UTF8)) {
2760 marked_upgrade(aTHX_ cat, symptr);
2761 lookahead.flags |= FLAG_DO_UTF8;
2762 lookahead.strbeg = symptr->strbeg;
2763 }
2764 utf8 = 0;
2765 goto no_change;
2766 }
2767
2768 end = start+SvLEN(cat);
2769 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2770 while (len-- > 0) {
fc241834 2771 UV auv;
a6ec74c1 2772 fromstr = NEXTFROM;
fc241834
RGS
2773 auv = SvUV(fromstr);
2774 if (utf8) {
230e1fce 2775 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2776 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2777 warn_utf8 ?
fc241834
RGS
2778 0 : UNICODE_ALLOW_ANY);
2779 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2780 *cur = '\0';
b162af07 2781 SvCUR_set(cat, cur - start);
fc241834
RGS
2782 GROWING(0, cat, start, cur,
2783 len+(endb-buffer)*UTF8_EXPAND);
2784 end = start+SvLEN(cat);
2785 }
64844641 2786 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2787 } else {
2788 if (cur >= end) {
2789 *cur = '\0';
b162af07 2790 SvCUR_set(cat, cur - start);
fc241834
RGS
2791 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2792 end = start+SvLEN(cat)-UTF8_MAXLEN;
2793 }
230e1fce 2794 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2795 warn_utf8 ?
230e1fce 2796 0 : UNICODE_ALLOW_ANY);
fc241834 2797 }
a6ec74c1 2798 }
a6ec74c1 2799 break;
fc241834 2800 }
a6ec74c1
JH
2801 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2802 case 'f':
a6ec74c1 2803 while (len-- > 0) {
f337b084
TH
2804 float afloat;
2805 NV anv;
a6ec74c1 2806 fromstr = NEXTFROM;
f337b084 2807 anv = SvNV(fromstr);
85bba25f 2808# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2809 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2810 * on Alpha; fake it if we don't have them.
2811 */
f337b084 2812 if (anv > FLT_MAX)
fc241834 2813 afloat = FLT_MAX;
f337b084 2814 else if (anv < -FLT_MAX)
fc241834 2815 afloat = -FLT_MAX;
f337b084 2816 else afloat = (float)anv;
baf3cf9c 2817# else
f337b084 2818 afloat = (float)anv;
baf3cf9c 2819# endif
1109a392 2820 DO_BO_PACK_N(afloat, float);
f337b084 2821 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2822 }
2823 break;
2824 case 'd':
a6ec74c1 2825 while (len-- > 0) {
f337b084
TH
2826 double adouble;
2827 NV anv;
a6ec74c1 2828 fromstr = NEXTFROM;
f337b084 2829 anv = SvNV(fromstr);
85bba25f 2830# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2831 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2832 * on Alpha; fake it if we don't have them.
2833 */
f337b084 2834 if (anv > DBL_MAX)
fc241834 2835 adouble = DBL_MAX;
f337b084 2836 else if (anv < -DBL_MAX)
fc241834 2837 adouble = -DBL_MAX;
f337b084 2838 else adouble = (double)anv;
baf3cf9c 2839# else
f337b084 2840 adouble = (double)anv;
baf3cf9c 2841# endif
1109a392 2842 DO_BO_PACK_N(adouble, double);
f337b084 2843 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2844 }
2845 break;
fc241834 2846 case 'F': {
275663fa 2847 NV_bytes anv;
1109a392 2848 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2849 while (len-- > 0) {
2850 fromstr = NEXTFROM;
cd07c537
DM
2851#ifdef __GNUC__
2852 /* to work round a gcc/x86 bug; don't use SvNV */
2853 anv.nv = sv_2nv(fromstr);
2854#else
275663fa 2855 anv.nv = SvNV(fromstr);
cd07c537 2856#endif
1109a392 2857 DO_BO_PACK_N(anv, NV);
275663fa 2858 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2859 }
2860 break;
fc241834 2861 }
92d41999 2862#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2863 case 'D': {
275663fa 2864 ld_bytes aldouble;
1109a392
MHM
2865 /* long doubles can have unused bits, which may be nonzero */
2866 Zero(&aldouble, 1, long double);
92d41999
JH
2867 while (len-- > 0) {
2868 fromstr = NEXTFROM;
cd07c537
DM
2869# ifdef __GNUC__
2870 /* to work round a gcc/x86 bug; don't use SvNV */
2871 aldouble.ld = (long double)sv_2nv(fromstr);
2872# else
275663fa 2873 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2874# endif
1109a392 2875 DO_BO_PACK_N(aldouble, long double);
275663fa 2876 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2877 }
2878 break;
fc241834 2879 }
92d41999 2880#endif
068bd2e7 2881 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2882 case 'n':
2883 while (len-- > 0) {
f337b084 2884 I16 ai16;
a6ec74c1 2885 fromstr = NEXTFROM;
ef108786 2886 ai16 = (I16)SvIV(fromstr);
ef108786 2887 ai16 = PerlSock_htons(ai16);
f337b084 2888 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2889 }
2890 break;
068bd2e7 2891 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2892 case 'v':
2893 while (len-- > 0) {
f337b084 2894 I16 ai16;
a6ec74c1 2895 fromstr = NEXTFROM;
ef108786 2896 ai16 = (I16)SvIV(fromstr);
ef108786 2897 ai16 = htovs(ai16);
f337b084 2898 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2899 }
2900 break;
49704364 2901 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2902#if SHORTSIZE != SIZE16
fc241834 2903 while (len-- > 0) {
f337b084 2904 unsigned short aushort;
fc241834
RGS
2905 fromstr = NEXTFROM;
2906 aushort = SvUV(fromstr);
2907 DO_BO_PACK(aushort, s);
f337b084 2908 PUSH_VAR(utf8, cur, aushort);
fc241834 2909 }
49704364
LW
2910 break;
2911#else
2912 /* Fall through! */
a6ec74c1 2913#endif
49704364 2914 case 'S':
fc241834 2915 while (len-- > 0) {
f337b084 2916 U16 au16;
fc241834
RGS
2917 fromstr = NEXTFROM;
2918 au16 = (U16)SvUV(fromstr);
2919 DO_BO_PACK(au16, 16);
f337b084 2920 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2921 }
2922 break;
49704364 2923 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2924#if SHORTSIZE != SIZE16
fc241834 2925 while (len-- > 0) {
f337b084 2926 short ashort;
fc241834
RGS
2927 fromstr = NEXTFROM;
2928 ashort = SvIV(fromstr);
2929 DO_BO_PACK(ashort, s);
f337b084 2930 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2931 }
49704364
LW
2932 break;
2933#else
2934 /* Fall through! */
a6ec74c1 2935#endif
49704364
LW
2936 case 's':
2937 while (len-- > 0) {
f337b084 2938 I16 ai16;
49704364 2939 fromstr = NEXTFROM;
ef108786
MHM
2940 ai16 = (I16)SvIV(fromstr);
2941 DO_BO_PACK(ai16, 16);
f337b084 2942 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2943 }
2944 break;
2945 case 'I':
49704364 2946 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2947 while (len-- > 0) {
f337b084 2948 unsigned int auint;
a6ec74c1
JH
2949 fromstr = NEXTFROM;
2950 auint = SvUV(fromstr);
1109a392 2951 DO_BO_PACK(auint, i);
f337b084 2952 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2953 }
2954 break;
92d41999
JH
2955 case 'j':
2956 while (len-- > 0) {
f337b084 2957 IV aiv;
92d41999
JH
2958 fromstr = NEXTFROM;
2959 aiv = SvIV(fromstr);
1109a392
MHM
2960#if IVSIZE == INTSIZE
2961 DO_BO_PACK(aiv, i);
2962#elif IVSIZE == LONGSIZE
2963 DO_BO_PACK(aiv, l);
2964#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2965 DO_BO_PACK(aiv, 64);
f337b084
TH
2966#else
2967 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 2968#endif
f337b084 2969 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2970 }
2971 break;
2972 case 'J':
2973 while (len-- > 0) {
f337b084 2974 UV auv;
92d41999
JH
2975 fromstr = NEXTFROM;
2976 auv = SvUV(fromstr);
1109a392
MHM
2977#if UVSIZE == INTSIZE
2978 DO_BO_PACK(auv, i);
2979#elif UVSIZE == LONGSIZE
2980 DO_BO_PACK(auv, l);
2981#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2982 DO_BO_PACK(auv, 64);
f337b084
TH
2983#else
2984 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 2985#endif
f337b084 2986 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2987 }
2988 break;
a6ec74c1
JH
2989 case 'w':
2990 while (len-- > 0) {
f337b084 2991 NV anv;
a6ec74c1 2992 fromstr = NEXTFROM;
15e9f109 2993 anv = SvNV(fromstr);
a6ec74c1 2994
f337b084
TH
2995 if (anv < 0) {
2996 *cur = '\0';
b162af07 2997 SvCUR_set(cat, cur - start);
49704364 2998 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2999 }
a6ec74c1 3000
196b62db
NC
3001 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3002 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3003 any negative IVs will have already been got by the croak()
3004 above. IOK is untrue for fractions, so we test them
3005 against UV_MAX_P1. */
f337b084
TH
3006 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3007 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3008 char *in = buf + sizeof(buf);
196b62db 3009 UV auv = SvUV(fromstr);
a6ec74c1
JH
3010
3011 do {
eb160463 3012 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3013 auv >>= 7;
3014 } while (auv);
3015 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3016 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3017 in, (buf + sizeof(buf)) - in);
3018 } else if (SvPOKp(fromstr))
3019 goto w_string;
a6ec74c1 3020 else if (SvNOKp(fromstr)) {
0258719b 3021 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3022 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3023 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3024 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3025 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3026 Some C compilers are strict about integral constant
3027 expressions so we conservatively divide by a slightly
3028 smaller integer instead of multiplying by the exact
3029 floating-point value.
0258719b
NC
3030 */
3031#ifdef NV_MAX_10_EXP
f337b084 3032 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3033 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3034#else
f337b084 3035 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3036 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3037#endif
a6ec74c1
JH
3038 char *in = buf + sizeof(buf);
3039
8b6e33c7 3040 anv = Perl_floor(anv);
a6ec74c1 3041 do {
8b6e33c7 3042 const NV next = Perl_floor(anv / 128);
a6ec74c1 3043 if (in <= buf) /* this cannot happen ;-) */
49704364 3044 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3045 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3046 anv = next;
3047 } while (anv > 0);
a6ec74c1 3048 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3049 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3050 in, (buf + sizeof(buf)) - in);
3051 } else {
8b6e33c7
AL
3052 const char *from;
3053 char *result, *in;
735b914b
JH
3054 SV *norm;
3055 STRLEN len;
3056 bool done;
3057
f337b084 3058 w_string:
735b914b 3059 /* Copy string and check for compliance */
349d4f2f 3060 from = SvPV_const(fromstr, len);
735b914b 3061 if ((norm = is_an_int(from, len)) == NULL)
49704364 3062 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3063
a02a5408 3064 Newx(result, len, char);
735b914b
JH
3065 in = result + len;
3066 done = FALSE;
f337b084 3067 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3068 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3069 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3070 in, (result + len) - in);
735b914b
JH
3071 Safefree(result);
3072 SvREFCNT_dec(norm); /* free norm */
fc241834 3073 }
a6ec74c1
JH
3074 }
3075 break;
3076 case 'i':
49704364 3077 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3078 while (len-- > 0) {
f337b084 3079 int aint;
a6ec74c1
JH
3080 fromstr = NEXTFROM;
3081 aint = SvIV(fromstr);
1109a392 3082 DO_BO_PACK(aint, i);
f337b084 3083 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3084 }
3085 break;
068bd2e7 3086 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3087 case 'N':
3088 while (len-- > 0) {
f337b084 3089 U32 au32;
a6ec74c1 3090 fromstr = NEXTFROM;
ef108786 3091 au32 = SvUV(fromstr);
ef108786 3092 au32 = PerlSock_htonl(au32);
f337b084 3093 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3094 }
3095 break;
068bd2e7 3096 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3097 case 'V':
3098 while (len-- > 0) {
f337b084 3099 U32 au32;
a6ec74c1 3100 fromstr = NEXTFROM;
ef108786 3101 au32 = SvUV(fromstr);
ef108786 3102 au32 = htovl(au32);
f337b084 3103 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3104 }
3105 break;
49704364 3106 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3107#if LONGSIZE != SIZE32
fc241834 3108 while (len-- > 0) {
f337b084 3109 unsigned long aulong;
fc241834
RGS
3110 fromstr = NEXTFROM;
3111 aulong = SvUV(fromstr);
3112 DO_BO_PACK(aulong, l);
f337b084 3113 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3114 }
49704364
LW
3115 break;
3116#else
3117 /* Fall though! */
a6ec74c1 3118#endif
49704364 3119 case 'L':
fc241834 3120 while (len-- > 0) {
f337b084 3121 U32 au32;
fc241834
RGS
3122 fromstr = NEXTFROM;
3123 au32 = SvUV(fromstr);
3124 DO_BO_PACK(au32, 32);
f337b084 3125 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3126 }
3127 break;
49704364 3128 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3129#if LONGSIZE != SIZE32
fc241834 3130 while (len-- > 0) {
f337b084 3131 long along;
fc241834
RGS
3132 fromstr = NEXTFROM;
3133 along = SvIV(fromstr);
3134 DO_BO_PACK(along, l);
f337b084 3135 PUSH_VAR(utf8, cur, along);
a6ec74c1 3136 }
49704364
LW
3137 break;
3138#else
3139 /* Fall though! */
a6ec74c1 3140#endif
49704364
LW
3141 case 'l':
3142 while (len-- > 0) {
f337b084 3143 I32 ai32;
49704364 3144 fromstr = NEXTFROM;
ef108786
MHM
3145 ai32 = SvIV(fromstr);
3146 DO_BO_PACK(ai32, 32);
f337b084 3147 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3148 }
3149 break;
3150#ifdef HAS_QUAD
3151 case 'Q':
3152 while (len-- > 0) {
f337b084 3153 Uquad_t auquad;
a6ec74c1 3154 fromstr = NEXTFROM;
f337b084 3155 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3156 DO_BO_PACK(auquad, 64);
f337b084 3157 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3158 }
3159 break;
3160 case 'q':
3161 while (len-- > 0) {
f337b084 3162 Quad_t aquad;
a6ec74c1
JH
3163 fromstr = NEXTFROM;
3164 aquad = (Quad_t)SvIV(fromstr);
1109a392 3165 DO_BO_PACK(aquad, 64);
f337b084 3166 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3167 }
3168 break;
f337b084 3169#endif /* HAS_QUAD */
a6ec74c1
JH
3170 case 'P':
3171 len = 1; /* assume SV is correct length */
f337b084 3172 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3173 /* Fall through! */
a6ec74c1
JH
3174 case 'p':
3175 while (len-- > 0) {
83003860 3176 const char *aptr;
f337b084 3177
a6ec74c1 3178 fromstr = NEXTFROM;
28a4f200 3179 SvGETMAGIC(fromstr);
3180 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3181 else {
a6ec74c1
JH
3182 /* XXX better yet, could spirit away the string to
3183 * a safe spot and hang on to it until the result
3184 * of pack() (and all copies of the result) are
3185 * gone.
3186 */
041457d9 3187 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3188 !SvREADONLY(fromstr)))) {
3189 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3190 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3191 }
3192 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3193 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3194 else
2596d9fe 3195 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3196 }
07409e01 3197 DO_BO_PACK_PC(aptr);
f337b084 3198 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3199 }
3200 break;
fc241834 3201 case 'u': {
f7fe979e 3202 const char *aptr, *aend;
fc241834 3203 bool from_utf8;
f337b084 3204
a6ec74c1 3205 fromstr = NEXTFROM;
fc241834
RGS
3206 if (len <= 2) len = 45;
3207 else len = len / 3 * 3;
3208 if (len >= 64) {
a2a5de95
NC
3209 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3210 "Field too wide in 'u' format in pack");
fc241834
RGS
3211 len = 63;
3212 }
83003860 3213 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3214 from_utf8 = DO_UTF8(fromstr);
3215 if (from_utf8) {
3216 aend = aptr + fromlen;
3f63b0e5 3217 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3218 } else aend = NULL; /* Unused, but keep compilers happy */
3219 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3220 while (fromlen > 0) {
fc241834 3221 U8 *end;
a6ec74c1 3222 I32 todo;
fc241834 3223 U8 hunk[1+63/3*4+1];
a6ec74c1 3224
eb160463 3225 if ((I32)fromlen > len)
a6ec74c1
JH
3226 todo = len;
3227 else
3228 todo = fromlen;
fc241834
RGS
3229 if (from_utf8) {
3230 char buffer[64];
3231 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3232 'u' | TYPE_IS_PACK)) {
3233 *cur = '\0';
b162af07 3234 SvCUR_set(cat, cur - start);
5637ef5b
NC
3235 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3236 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3237 aptr, aend, buffer, (long) todo);
fc241834
RGS
3238 }
3239 end = doencodes(hunk, buffer, todo);
3240 } else {
3241 end = doencodes(hunk, aptr, todo);
3242 aptr += todo;
3243 }
3244 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3245 fromlen -= todo;
3246 }
a6ec74c1
JH
3247 break;
3248 }
f337b084
TH
3249 }
3250 *cur = '\0';
b162af07 3251 SvCUR_set(cat, cur - start);
f337b084 3252 no_change:
49704364 3253 *symptr = lookahead;
a6ec74c1 3254 }
49704364 3255 return beglist;
18529408
IZ
3256}
3257#undef NEXTFROM
3258
3259
3260PP(pp_pack)
3261{
97aff369 3262 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3263 SV *cat = TARG;
18529408 3264 STRLEN fromlen;
349d4f2f 3265 SV *pat_sv = *++MARK;
eb578fdb
KW
3266 const char *pat = SvPV_const(pat_sv, fromlen);
3267 const char *patend = pat + fromlen;
18529408
IZ
3268
3269 MARK++;
76f68e9b 3270 sv_setpvs(cat, "");
f337b084 3271 SvUTF8_off(cat);
18529408 3272
7accc089 3273 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3274
a6ec74c1
JH
3275 SvSETMAGIC(cat);
3276 SP = ORIGMARK;
3277 PUSHs(cat);
3278 RETURN;
3279}
a6ec74c1 3280
73cb7263
NC
3281/*
3282 * Local variables:
3283 * c-indentation-style: bsd
3284 * c-basic-offset: 4
14d04a33 3285 * indent-tabs-mode: nil
73cb7263
NC
3286 * End:
3287 *
14d04a33 3288 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3289 */