This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Provide ntohl, ntohs, htonl and htons no-op macros on big endian systems.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132/* Only to be used inside a loop (see the break) */
133#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (utf8) { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 } else { \
137 Copy(s, OFF16(p), SIZE16, char); \
138 (s) += SIZE16; \
139 } \
140} STMT_END
141
142/* Only to be used inside a loop (see the break) */
143#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (utf8) { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 } else { \
147 Copy(s, OFF32(p), SIZE32, char); \
148 (s) += SIZE32; \
149 } \
150} STMT_END
151
152#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
154
155/* Only to be used inside a loop (see the break) */
275663fa 156#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
157STMT_START { \
158 if (utf8) { \
f337b084 159 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 160 (char *) (buf), len, datumtype)) break; \
08ca2aa3 161 } else { \
275663fa
TC
162 Copy(s, (char *) (buf), len, char); \
163 s += len; \
08ca2aa3 164 } \
08ca2aa3
TH
165} STMT_END
166
275663fa
TC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
169
f337b084 170#define PUSH_VAR(utf8, aptr, var) \
230e1fce 171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
a6ec74c1 194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
215#if 'I' == 73 && 'J' == 74
216/* On an ASCII/ISO kind of system */
217#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
218#else
219/*
220 Some other sort of character set - use memchr() so we don't match
221 the null byte.
222 */
223#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
224#endif
225
66c611c5 226/* type modifiers */
62f95557 227#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
228#define TYPE_IS_BIG_ENDIAN 0x200
229#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 230#define TYPE_IS_PACK 0x800
1109a392 231#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 232#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
233#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
234
7212898e
NC
235# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
237
238# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
239
240# define DO_BO_UNPACK(var, type) \
1109a392 241 STMT_START { \
66c611c5 242 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
243 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
244 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
245 default: break; \
246 } \
247 } STMT_END
248
7212898e 249# define DO_BO_PACK(var, type) \
1109a392 250 STMT_START { \
66c611c5 251 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
252 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
253 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
254 default: break; \
255 } \
256 } STMT_END
257
07409e01 258# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 259 STMT_START { \
66c611c5 260 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 261 case TYPE_IS_BIG_ENDIAN: \
07409e01 262 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
263 break; \
264 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 265 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
266 break; \
267 default: \
268 break; \
269 } \
270 } STMT_END
271
07409e01 272# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 273 STMT_START { \
66c611c5 274 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 275 case TYPE_IS_BIG_ENDIAN: \
07409e01 276 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
277 break; \
278 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 279 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
280 break; \
281 default: \
282 break; \
283 } \
284 } STMT_END
285
7212898e 286# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
287 STMT_START { \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
292 break; \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# if PTRSIZE == INTSIZE
07409e01
NC
303# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
304# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 305# elif PTRSIZE == LONGSIZE
fa58a56f 306# if LONGSIZE < IVSIZE && IVSIZE == 8
fa58a56f
S
307# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
308# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
309# else
fa58a56f
S
310# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
311# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
312# endif
2b00a750 313# elif PTRSIZE == IVSIZE
2b00a750
JD
314# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
315# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e 316# else
45f723e1
NC
317# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
318# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 319# endif
1109a392 320
7212898e 321# if defined(my_htolen) && defined(my_letohn) && \
1109a392 322 defined(my_htoben) && defined(my_betohn)
7212898e 323# define DO_BO_UNPACK_N(var, type) \
1109a392 324 STMT_START { \
66c611c5 325 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
326 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
327 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
328 default: break; \
329 } \
330 } STMT_END
331
7212898e 332# define DO_BO_PACK_N(var, type) \
1109a392 333 STMT_START { \
66c611c5 334 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
335 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
336 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
337 default: break; \
338 } \
339 } STMT_END
7212898e
NC
340# else
341# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
342# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
343# endif
344
78d46eaa 345#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 346#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
347#define PACK_SIZE_MASK 0x3F
348
298bc19c 349#include "packsizetables.c"
78d46eaa 350
08ca2aa3 351STATIC U8
f7fe979e 352uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 353{
08ca2aa3 354 STRLEN retlen;
0bcc34c2 355 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 357 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
358 warnings), but these two mean we make no progress in the string and
359 might enter an infinite loop */
360 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
361 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
362 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 363 if (val >= 0x100) {
a2a5de95
NC
364 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
365 "Character in '%c' format wrapped in unpack",
366 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
367 val &= 0xff;
368 }
369 *s += retlen;
fe2774ed 370 return (U8)val;
08ca2aa3
TH
371}
372
f337b084
TH
373#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
374 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
375 *(U8 *)(s)++)
376
377STATIC bool
f7fe979e 378uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
379{
380 UV val;
381 STRLEN retlen;
f7fe979e 382 const char *from = *s;
08ca2aa3 383 int bad = 0;
f7fe979e 384 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
385 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
386 for (;buf_len > 0; buf_len--) {
387 if (from >= end) return FALSE;
f337b084 388 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
389 if (retlen == (STRLEN) -1 || retlen == 0) {
390 from += UTF8SKIP(from);
391 bad |= 1;
392 } else from += retlen;
393 if (val >= 0x100) {
394 bad |= 2;
395 val &= 0xff;
396 }
fe2774ed 397 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
398 }
399 /* We have enough characters for the buffer. Did we have problems ? */
400 if (bad) {
401 if (bad & 1) {
402 /* Rewalk the string fragment while warning */
f7fe979e 403 const char *ptr;
9e27e96a 404 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
405 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
406 if (ptr >= end) break;
f337b084 407 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
408 }
409 if (from > end) from = end;
410 }
a2a5de95
NC
411 if ((bad & 2))
412 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 413 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
414 "Character(s) in '%c' format wrapped in %s",
415 (int) TYPE_NO_MODIFIERS(datumtype),
416 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
417 }
418 *s = from;
419 return TRUE;
420}
421
422STATIC bool
f7fe979e 423next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 424{
97aff369 425 dVAR;
08ca2aa3 426 STRLEN retlen;
0bcc34c2 427 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
428 if (val >= 0x100 || !ISUUCHAR(val) ||
429 retlen == (STRLEN) -1 || retlen == 0) {
430 *out = 0;
431 return FALSE;
432 }
433 *out = PL_uudmap[val] & 077;
f337b084 434 *s += retlen;
08ca2aa3
TH
435 return TRUE;
436}
78d46eaa 437
64844641 438STATIC char *
14333449 439S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 440 const U8 * const end = start + len;
64844641 441
7918f24d
NC
442 PERL_ARGS_ASSERT_BYTES_TO_UNI;
443
f337b084 444 while (start < end) {
48fa4626
TS
445 const UV uv = NATIVE_TO_ASCII(*start);
446 if (UNI_IS_INVARIANT(uv))
447 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
448 else {
449 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
450 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
451 }
452 start++;
453 }
64844641 454 return dest;
f337b084
TH
455}
456
230e1fce
NC
457#define PUSH_BYTES(utf8, cur, buf, len) \
458STMT_START { \
64844641
AL
459 if (utf8) \
460 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
461 else { \
462 Copy(buf, cur, len, char); \
463 (cur) += (len); \
464 } \
f337b084
TH
465} STMT_END
466
467#define GROWING(utf8, cat, start, cur, in_len) \
468STMT_START { \
469 STRLEN glen = (in_len); \
3473cf63 470 if (utf8) glen *= UTF8_EXPAND; \
f337b084 471 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 472 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
473 (cur) = (start) + SvCUR(cat); \
474 } \
475} STMT_END
476
477#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
478STMT_START { \
f7fe979e 479 const STRLEN glen = (in_len); \
f337b084 480 STRLEN gl = glen; \
3473cf63 481 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
482 if ((cur) + gl >= (start) + SvLEN(cat)) { \
483 *cur = '\0'; \
b162af07 484 SvCUR_set((cat), (cur) - (start)); \
0bd48802 485 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
486 (cur) = (start) + SvCUR(cat); \
487 } \
488 PUSH_BYTES(utf8, cur, buf, glen); \
489} STMT_END
490
491#define PUSH_BYTE(utf8, s, byte) \
492STMT_START { \
493 if (utf8) { \
f7fe979e 494 const U8 au8 = (byte); \
64844641 495 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
496 } else *(U8 *)(s)++ = (byte); \
497} STMT_END
498
499/* Only to be used inside a loop (see the break) */
500#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
501STMT_START { \
502 STRLEN retlen; \
503 if (str >= end) break; \
504 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
505 if (retlen == (STRLEN) -1 || retlen == 0) { \
506 *cur = '\0'; \
507 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
508 } \
509 str += retlen; \
510} STMT_END
511
f7fe979e
AL
512static const char *_action( const tempsym_t* symptr )
513{
10edeb5d 514 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
515}
516
206947d2 517/* Returns the sizeof() struct described by pat */
028d1f6d 518STATIC I32
f337b084 519S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 520{
f337b084 521 I32 total = 0;
206947d2 522
7918f24d
NC
523 PERL_ARGS_ASSERT_MEASURE_STRUCT;
524
49704364 525 while (next_symbol(symptr)) {
f337b084 526 I32 len;
f7fe979e 527 int size;
f337b084
TH
528
529 switch (symptr->howlen) {
fc241834 530 case e_star:
49704364 531 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 532 _action( symptr ) );
49704364 533 break;
f337b084
TH
534 default:
535 /* e_no_len and e_number */
536 len = symptr->length;
537 break;
49704364
WL
538 }
539
a7a3cfaa 540 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 541 if (!size) {
f7fe979e 542 int star;
80a13697
NC
543 /* endianness doesn't influence the size of a type */
544 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
545 default:
546 Perl_croak(aTHX_ "Invalid type '%c' in %s",
547 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 548 _action( symptr ) );
28be1210
TH
549 case '.' | TYPE_IS_SHRIEKING:
550 case '@' | TYPE_IS_SHRIEKING:
80a13697 551 case '@':
28be1210 552 case '.':
80a13697
NC
553 case '/':
554 case 'U': /* XXXX Is it correct? */
555 case 'w':
556 case 'u':
557 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 558 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 559 _action( symptr ) );
80a13697
NC
560 case '%':
561 size = 0;
562 break;
563 case '(':
fc241834
RGS
564 {
565 tempsym_t savsym = *symptr;
566 symptr->patptr = savsym.grpbeg;
567 symptr->patend = savsym.grpend;
568 /* XXXX Theoretically, we need to measure many times at
569 different positions, since the subexpression may contain
570 alignment commands, but be not of aligned length.
571 Need to detect this and croak(). */
572 size = measure_struct(symptr);
573 *symptr = savsym;
574 break;
575 }
80a13697
NC
576 case 'X' | TYPE_IS_SHRIEKING:
577 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
578 */
579 if (!len) /* Avoid division by 0 */
580 len = 1;
581 len = total % len; /* Assumed: the start is aligned. */
582 /* FALL THROUGH */
583 case 'X':
584 size = -1;
585 if (total < len)
f7fe979e 586 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
587 break;
588 case 'x' | TYPE_IS_SHRIEKING:
589 if (!len) /* Avoid division by 0 */
590 len = 1;
591 star = total % len; /* Assumed: the start is aligned. */
592 if (star) /* Other portable ways? */
593 len = len - star;
594 else
595 len = 0;
596 /* FALL THROUGH */
597 case 'x':
598 case 'A':
599 case 'Z':
600 case 'a':
80a13697
NC
601 size = 1;
602 break;
603 case 'B':
604 case 'b':
605 len = (len + 7)/8;
606 size = 1;
607 break;
608 case 'H':
609 case 'h':
610 len = (len + 1)/2;
611 size = 1;
612 break;
78d46eaa 613
80a13697
NC
614 case 'P':
615 len = 1;
616 size = sizeof(char*);
78d46eaa
NC
617 break;
618 }
206947d2
IZ
619 }
620 total += len * size;
621 }
622 return total;
623}
624
49704364
WL
625
626/* locate matching closing parenthesis or bracket
627 * returns char pointer to char after match, or NULL
628 */
f7fe979e 629STATIC const char *
5aaab254 630S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 631{
7918f24d
NC
632 PERL_ARGS_ASSERT_GROUP_END;
633
49704364 634 while (patptr < patend) {
f7fe979e 635 const char c = *patptr++;
49704364
WL
636
637 if (isSPACE(c))
638 continue;
639 else if (c == ender)
640 return patptr-1;
641 else if (c == '#') {
642 while (patptr < patend && *patptr != '\n')
643 patptr++;
644 continue;
645 } else if (c == '(')
646 patptr = group_end(patptr, patend, ')') + 1;
647 else if (c == '[')
648 patptr = group_end(patptr, patend, ']') + 1;
18529408 649 }
49704364
WL
650 Perl_croak(aTHX_ "No group ending character '%c' found in template",
651 ender);
652 return 0;
18529408
IZ
653}
654
49704364
WL
655
656/* Convert unsigned decimal number to binary.
657 * Expects a pointer to the first digit and address of length variable
658 * Advances char pointer to 1st non-digit char and returns number
fc241834 659 */
f7fe979e 660STATIC const char *
5aaab254 661S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
662{
663 I32 len = *patptr++ - '0';
7918f24d
NC
664
665 PERL_ARGS_ASSERT_GET_NUM;
666
49704364
WL
667 while (isDIGIT(*patptr)) {
668 if (len >= 0x7FFFFFFF/10)
669 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
670 len = (len * 10) + (*patptr++ - '0');
671 }
672 *lenptr = len;
673 return patptr;
674}
675
676/* The marvellous template parsing routine: Using state stored in *symptr,
677 * locates next template code and count
678 */
679STATIC bool
f337b084 680S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 681{
f7fe979e 682 const char* patptr = symptr->patptr;
0bcc34c2 683 const char* const patend = symptr->patend;
49704364 684
7918f24d
NC
685 PERL_ARGS_ASSERT_NEXT_SYMBOL;
686
49704364
WL
687 symptr->flags &= ~FLAG_SLASH;
688
689 while (patptr < patend) {
690 if (isSPACE(*patptr))
691 patptr++;
692 else if (*patptr == '#') {
693 patptr++;
694 while (patptr < patend && *patptr != '\n')
695 patptr++;
696 if (patptr < patend)
697 patptr++;
698 } else {
fc241834 699 /* We should have found a template code */
49704364 700 I32 code = *patptr++ & 0xFF;
66c611c5 701 U32 inherited_modifiers = 0;
49704364
WL
702
703 if (code == ','){ /* grandfather in commas but with a warning */
704 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
705 symptr->flags |= FLAG_COMMA;
706 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 707 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
708 }
709 continue;
710 }
fc241834 711
49704364 712 /* for '(', skip to ')' */
fc241834 713 if (code == '(') {
49704364
WL
714 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
715 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 716 _action( symptr ) );
49704364
WL
717 symptr->grpbeg = patptr;
718 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
719 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
720 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 721 _action( symptr ) );
49704364
WL
722 }
723
66c611c5
MHM
724 /* look for group modifiers to inherit */
725 if (TYPE_ENDIANNESS(symptr->flags)) {
726 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
727 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
728 }
729
1109a392
MHM
730 /* look for modifiers */
731 while (patptr < patend) {
b7787f18
AL
732 const char *allowed;
733 I32 modifier;
1109a392
MHM
734 switch (*patptr) {
735 case '!':
736 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 737 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
738 break;
739 case '>':
740 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 741 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
742 break;
743 case '<':
744 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 745 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
746 break;
747 default:
b7787f18
AL
748 allowed = "";
749 modifier = 0;
1109a392
MHM
750 break;
751 }
66c611c5 752
1109a392
MHM
753 if (modifier == 0)
754 break;
66c611c5 755
1109a392
MHM
756 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
757 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 758 allowed, _action( symptr ) );
66c611c5
MHM
759
760 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 761 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 762 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
763 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
764 TYPE_ENDIANNESS_MASK)
765 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 766 *patptr, _action( symptr ) );
66c611c5 767
a2a5de95
NC
768 if ((code & modifier)) {
769 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
770 "Duplicate modifier '%c' after '%c' in %s",
771 *patptr, (int) TYPE_NO_MODIFIERS(code),
772 _action( symptr ) );
1109a392 773 }
66c611c5 774
1109a392
MHM
775 code |= modifier;
776 patptr++;
49704364
WL
777 }
778
66c611c5
MHM
779 /* inherit modifiers */
780 code |= inherited_modifiers;
781
fc241834 782 /* look for count and/or / */
49704364
WL
783 if (patptr < patend) {
784 if (isDIGIT(*patptr)) {
785 patptr = get_num( patptr, &symptr->length );
786 symptr->howlen = e_number;
787
788 } else if (*patptr == '*') {
789 patptr++;
790 symptr->howlen = e_star;
791
792 } else if (*patptr == '[') {
f7fe979e 793 const char* lenptr = ++patptr;
49704364
WL
794 symptr->howlen = e_number;
795 patptr = group_end( patptr, patend, ']' ) + 1;
796 /* what kind of [] is it? */
797 if (isDIGIT(*lenptr)) {
798 lenptr = get_num( lenptr, &symptr->length );
799 if( *lenptr != ']' )
800 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 801 _action( symptr ) );
49704364
WL
802 } else {
803 tempsym_t savsym = *symptr;
804 symptr->patend = patptr-1;
805 symptr->patptr = lenptr;
806 savsym.length = measure_struct(symptr);
807 *symptr = savsym;
808 }
809 } else {
810 symptr->howlen = e_no_len;
811 symptr->length = 1;
812 }
813
814 /* try to find / */
815 while (patptr < patend) {
816 if (isSPACE(*patptr))
817 patptr++;
818 else if (*patptr == '#') {
819 patptr++;
820 while (patptr < patend && *patptr != '\n')
821 patptr++;
822 if (patptr < patend)
823 patptr++;
824 } else {
66c611c5 825 if (*patptr == '/') {
49704364
WL
826 symptr->flags |= FLAG_SLASH;
827 patptr++;
66c611c5
MHM
828 if (patptr < patend &&
829 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 830 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 831 _action( symptr ) );
49704364
WL
832 }
833 break;
834 }
18529408 835 }
49704364
WL
836 } else {
837 /* at end - no count, no / */
838 symptr->howlen = e_no_len;
839 symptr->length = 1;
840 }
841
842 symptr->code = code;
fc241834 843 symptr->patptr = patptr;
49704364 844 return TRUE;
18529408 845 }
49704364 846 }
fc241834 847 symptr->patptr = patptr;
49704364 848 return FALSE;
18529408
IZ
849}
850
18529408 851/*
fc241834 852 There is no way to cleanly handle the case where we should process the
08ca2aa3 853 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
854 (e.g. estimates like strend-s as an upper bound for the number of
855 characters left wouldn't work). So if we foresee the need of this
856 (pattern starts with U or contains U0), we want to work on the encoded
857 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
858 themselves if they need to do a lot of unpacks like this on it
859*/
fc241834 860STATIC bool
08ca2aa3
TH
861need_utf8(const char *pat, const char *patend)
862{
863 bool first = TRUE;
7918f24d
NC
864
865 PERL_ARGS_ASSERT_NEED_UTF8;
866
08ca2aa3
TH
867 while (pat < patend) {
868 if (pat[0] == '#') {
869 pat++;
f7fe979e 870 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
871 if (!pat) return FALSE;
872 } else if (pat[0] == 'U') {
873 if (first || pat[1] == '0') return TRUE;
874 } else first = FALSE;
875 pat++;
876 }
877 return FALSE;
878}
879
880STATIC char
881first_symbol(const char *pat, const char *patend) {
7918f24d
NC
882 PERL_ARGS_ASSERT_FIRST_SYMBOL;
883
08ca2aa3
TH
884 while (pat < patend) {
885 if (pat[0] != '#') return pat[0];
886 pat++;
f7fe979e 887 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
888 if (!pat) return 0;
889 pat++;
890 }
891 return 0;
892}
893
894/*
7accc089
JH
895=for apidoc unpackstring
896
21ebfc7a
DM
897The engine implementing the unpack() Perl function.
898
899Using the template pat..patend, this function unpacks the string
900s..strend into a number of mortal SVs, which it pushes onto the perl
901argument (@_) stack (so you will need to issue a C<PUTBACK> before and
902C<SPAGAIN> after the call to this function). It returns the number of
903pushed elements.
904
905The strend and patend pointers should point to the byte following the last
906character of each string.
907
908Although this function returns its values on the perl argument stack, it
909doesn't take any parameters from that stack (and thus in particular
910there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
911example).
7accc089
JH
912
913=cut */
914
915I32
f7fe979e 916Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 917{
f7fe979e 918 tempsym_t sym;
08ca2aa3 919
7918f24d
NC
920 PERL_ARGS_ASSERT_UNPACKSTRING;
921
f337b084 922 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
923 else if (need_utf8(pat, patend)) {
924 /* We probably should try to avoid this in case a scalar context call
925 wouldn't get to the "U0" */
926 STRLEN len = strend - s;
230e1fce 927 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
928 SAVEFREEPV(s);
929 strend = s + len;
f337b084 930 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
931 }
932
f337b084
TH
933 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
934 flags |= FLAG_PARSE_UTF8;
08ca2aa3 935
f7fe979e 936 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
937
938 return unpack_rec(&sym, s, s, strend, NULL );
939}
940
4136a0f7 941STATIC I32
f7fe979e 942S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 943{
27da23d5 944 dVAR; dSP;
3297d27d 945 SV *sv = NULL;
f7fe979e 946 const I32 start_sp_offset = SP - PL_stack_base;
49704364 947 howlen_t howlen;
a6ec74c1 948 I32 checksum = 0;
92d41999 949 UV cuv = 0;
a6ec74c1 950 NV cdouble = 0.0;
f337b084 951 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 952 bool beyond = FALSE;
21c16052 953 bool explicit_length;
9e27e96a 954 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 955 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
956
957 PERL_ARGS_ASSERT_UNPACK_REC;
958
28be1210 959 symptr->strbeg = s - strbeg;
49704364 960
49704364 961 while (next_symbol(symptr)) {
a7a3cfaa 962 packprops_t props;
9e27e96a 963 I32 len;
f337b084 964 I32 datumtype = symptr->code;
206947d2 965 /* do first one only unless in list context
08ca2aa3 966 / is implemented by unpacking the count, then popping it from the
206947d2 967 stack, so must check that we're not in the middle of a / */
49704364 968 if ( unpack_only_one
206947d2 969 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 970 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 971 break;
49704364 972
f337b084 973 switch (howlen = symptr->howlen) {
fc241834
RGS
974 case e_star:
975 len = strend - strbeg; /* long enough */
49704364 976 break;
f337b084
TH
977 default:
978 /* e_no_len and e_number */
979 len = symptr->length;
980 break;
49704364 981 }
18529408 982
21c16052 983 explicit_length = TRUE;
a6ec74c1 984 redo_switch:
49704364 985 beyond = s >= strend;
a7a3cfaa
TH
986
987 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
988 if (props) {
989 /* props nonzero means we can process this letter. */
9e27e96a
AL
990 const long size = props & PACK_SIZE_MASK;
991 const long howmany = (strend - s) / size;
a7a3cfaa
TH
992 if (len > howmany)
993 len = howmany;
994
995 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
996 if (len && unpack_only_one) len = 1;
997 EXTEND(SP, len);
998 EXTEND_MORTAL(len);
78d46eaa
NC
999 }
1000 }
a7a3cfaa 1001
1109a392 1002 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1003 default:
1109a392 1004 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1005
a6ec74c1 1006 case '%':
49704364 1007 if (howlen == e_no_len)
18529408 1008 len = 16; /* len is not specified */
a6ec74c1 1009 checksum = len;
92d41999 1010 cuv = 0;
a6ec74c1 1011 cdouble = 0;
18529408 1012 continue;
a6ec74c1 1013 break;
18529408
IZ
1014 case '(':
1015 {
49704364 1016 tempsym_t savsym = *symptr;
9e27e96a 1017 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1018 symptr->flags |= group_modifiers;
49704364 1019 symptr->patend = savsym.grpend;
28be1210 1020 symptr->previous = &savsym;
49704364 1021 symptr->level++;
18529408 1022 PUTBACK;
c6f750d1 1023 if (len && unpack_only_one) len = 1;
18529408 1024 while (len--) {
49704364 1025 symptr->patptr = savsym.grpbeg;
f337b084
TH
1026 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1027 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1028 unpack_rec(symptr, s, strbeg, strend, &s);
1029 if (s == strend && savsym.howlen == e_star)
49704364 1030 break; /* No way to continue */
18529408
IZ
1031 }
1032 SPAGAIN;
28be1210 1033 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1034 *symptr = savsym;
18529408
IZ
1035 break;
1036 }
28be1210 1037 case '.' | TYPE_IS_SHRIEKING:
28be1210 1038 case '.': {
9e27e96a 1039 const char *from;
28be1210 1040 SV *sv;
9e27e96a 1041 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
1042 if (howlen == e_star) from = strbeg;
1043 else if (len <= 0) from = s;
1044 else {
1045 tempsym_t *group = symptr;
1046
1047 while (--len && group) group = group->previous;
1048 from = group ? strbeg + group->strbeg : strbeg;
1049 }
1050 sv = from <= s ?
00646304
CB
1051 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1052 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1053 mXPUSHs(sv);
28be1210
TH
1054 break;
1055 }
28be1210 1056 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 1057 case '@':
28be1210 1058 s = strbeg + symptr->strbeg;
28be1210 1059 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 1060 {
08ca2aa3
TH
1061 while (len > 0) {
1062 if (s >= strend)
1063 Perl_croak(aTHX_ "'@' outside of string in unpack");
1064 s += UTF8SKIP(s);
1065 len--;
1066 }
1067 if (s > strend)
1068 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1069 } else {
28be1210 1070 if (strend-s < len)
fc241834 1071 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1072 s += len;
08ca2aa3 1073 }
a6ec74c1 1074 break;
62f95557
IZ
1075 case 'X' | TYPE_IS_SHRIEKING:
1076 if (!len) /* Avoid division by 0 */
1077 len = 1;
08ca2aa3 1078 if (utf8) {
f7fe979e 1079 const char *hop, *last;
f337b084
TH
1080 I32 l = len;
1081 hop = last = strbeg;
1082 while (hop < s) {
1083 hop += UTF8SKIP(hop);
1084 if (--l == 0) {
08ca2aa3 1085 last = hop;
f337b084
TH
1086 l = len;
1087 }
fc241834 1088 }
f337b084
TH
1089 if (last > s)
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1091 s = last;
1092 break;
f337b084
TH
1093 }
1094 len = (s - strbeg) % len;
62f95557 1095 /* FALL THROUGH */
a6ec74c1 1096 case 'X':
08ca2aa3
TH
1097 if (utf8) {
1098 while (len > 0) {
1099 if (s <= strbeg)
1100 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1101 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1102 if (s <= strbeg)
1103 Perl_croak(aTHX_ "'X' outside of string in unpack");
1104 }
1105 len--;
1106 }
1107 } else {
fc241834
RGS
1108 if (len > s - strbeg)
1109 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1110 s -= len;
08ca2aa3 1111 }
a6ec74c1 1112 break;
9e27e96a
AL
1113 case 'x' | TYPE_IS_SHRIEKING: {
1114 I32 ai32;
62f95557
IZ
1115 if (!len) /* Avoid division by 0 */
1116 len = 1;
230e1fce
NC
1117 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1118 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1119 if (ai32 == 0) break;
1120 len -= ai32;
9e27e96a 1121 }
62f95557 1122 /* FALL THROUGH */
a6ec74c1 1123 case 'x':
08ca2aa3
TH
1124 if (utf8) {
1125 while (len>0) {
1126 if (s >= strend)
1127 Perl_croak(aTHX_ "'x' outside of string in unpack");
1128 s += UTF8SKIP(s);
1129 len--;
1130 }
1131 } else {
fc241834
RGS
1132 if (len > strend - s)
1133 Perl_croak(aTHX_ "'x' outside of string in unpack");
1134 s += len;
f337b084 1135 }
a6ec74c1
JH
1136 break;
1137 case '/':
49704364
WL
1138 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1139 break;
a6ec74c1
JH
1140 case 'A':
1141 case 'Z':
1142 case 'a':
08ca2aa3
TH
1143 if (checksum) {
1144 /* Preliminary length estimate is assumed done in 'W' */
1145 if (len > strend - s) len = strend - s;
1146 goto W_checksum;
1147 }
1148 if (utf8) {
1149 I32 l;
f7fe979e 1150 const char *hop;
08ca2aa3
TH
1151 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1152 if (hop >= strend) {
1153 if (hop > strend)
1154 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1155 break;
fc241834 1156 }
a6ec74c1 1157 }
08ca2aa3
TH
1158 if (hop > strend)
1159 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1160 len = hop - s;
1161 } else if (len > strend - s)
1162 len = strend - s;
1163
1164 if (datumtype == 'Z') {
1165 /* 'Z' strips stuff after first null */
f7fe979e 1166 const char *ptr, *end;
f337b084
TH
1167 end = s + len;
1168 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1169 sv = newSVpvn(s, ptr-s);
1170 if (howlen == e_star) /* exact for 'Z*' */
1171 len = ptr-s + (ptr != strend ? 1 : 0);
1172 } else if (datumtype == 'A') {
1173 /* 'A' strips both nulls and spaces */
f7fe979e 1174 const char *ptr;
18bdf90a
TH
1175 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1176 for (ptr = s+len-1; ptr >= s; ptr--)
1177 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1178 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1179 if (ptr >= s) ptr += UTF8SKIP(ptr);
1180 else ptr++;
28be1210 1181 if (ptr > s+len)
18bdf90a
TH
1182 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1183 } else {
1184 for (ptr = s+len-1; ptr >= s; ptr--)
1185 if (*ptr != 0 && !isSPACE(*ptr)) break;
1186 ptr++;
1187 }
08ca2aa3
TH
1188 sv = newSVpvn(s, ptr-s);
1189 } else sv = newSVpvn(s, len);
1190
1191 if (utf8) {
1192 SvUTF8_on(sv);
1193 /* Undo any upgrade done due to need_utf8() */
f337b084 1194 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1195 sv_utf8_downgrade(sv, 0);
a6ec74c1 1196 }
6e449a3a 1197 mXPUSHs(sv);
08ca2aa3 1198 s += len;
a6ec74c1
JH
1199 break;
1200 case 'B':
08ca2aa3
TH
1201 case 'b': {
1202 char *str;
49704364 1203 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1204 len = (strend - s) * 8;
1205 if (checksum) {
f337b084 1206 if (utf8)
08ca2aa3 1207 while (len >= 8 && s < strend) {
f337b084 1208 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1209 len -= 8;
1210 }
f337b084 1211 else
fc241834 1212 while (len >= 8) {
08ca2aa3 1213 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1214 len -= 8;
1215 }
08ca2aa3
TH
1216 if (len && s < strend) {
1217 U8 bits;
f337b084
TH
1218 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1219 if (datumtype == 'b')
a6ec74c1 1220 while (len-- > 0) {
92d41999 1221 if (bits & 1) cuv++;
a6ec74c1
JH
1222 bits >>= 1;
1223 }
f337b084 1224 else
a6ec74c1 1225 while (len-- > 0) {
08ca2aa3 1226 if (bits & 0x80) cuv++;
a6ec74c1
JH
1227 bits <<= 1;
1228 }
fc241834 1229 }
a6ec74c1
JH
1230 break;
1231 }
08ca2aa3 1232
561b68a9 1233 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1234 SvPOK_on(sv);
1235 str = SvPVX(sv);
1236 if (datumtype == 'b') {
f337b084 1237 U8 bits = 0;
f7fe979e 1238 const I32 ai32 = len;
08ca2aa3
TH
1239 for (len = 0; len < ai32; len++) {
1240 if (len & 7) bits >>= 1;
1241 else if (utf8) {
1242 if (s >= strend) break;
f337b084 1243 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1244 } else bits = *(U8 *) s++;
1245 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1246 }
08ca2aa3 1247 } else {
f337b084 1248 U8 bits = 0;
f7fe979e 1249 const I32 ai32 = len;
08ca2aa3
TH
1250 for (len = 0; len < ai32; len++) {
1251 if (len & 7) bits <<= 1;
1252 else if (utf8) {
1253 if (s >= strend) break;
f337b084 1254 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1255 } else bits = *(U8 *) s++;
1256 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1257 }
1258 }
1259 *str = '\0';
aa07b2f6 1260 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1261 XPUSHs(sv);
a6ec74c1 1262 break;
08ca2aa3 1263 }
a6ec74c1 1264 case 'H':
08ca2aa3 1265 case 'h': {
3297d27d 1266 char *str = NULL;
fc241834 1267 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1268 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1269 len = (strend - s) * 2;
858fe5e1
TC
1270 if (!checksum) {
1271 sv = sv_2mortal(newSV(len ? len : 1));
1272 SvPOK_on(sv);
1273 str = SvPVX(sv);
1274 }
a6ec74c1 1275 if (datumtype == 'h') {
f337b084 1276 U8 bits = 0;
9e27e96a 1277 I32 ai32 = len;
fc241834
RGS
1278 for (len = 0; len < ai32; len++) {
1279 if (len & 1) bits >>= 4;
1280 else if (utf8) {
1281 if (s >= strend) break;
f337b084 1282 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1283 } else bits = * (U8 *) s++;
858fe5e1
TC
1284 if (!checksum)
1285 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1286 }
08ca2aa3 1287 } else {
f337b084 1288 U8 bits = 0;
f7fe979e 1289 const I32 ai32 = len;
08ca2aa3
TH
1290 for (len = 0; len < ai32; len++) {
1291 if (len & 1) bits <<= 4;
1292 else if (utf8) {
1293 if (s >= strend) break;
f337b084 1294 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1295 } else bits = *(U8 *) s++;
858fe5e1
TC
1296 if (!checksum)
1297 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1298 }
1299 }
858fe5e1
TC
1300 if (!checksum) {
1301 *str = '\0';
1302 SvCUR_set(sv, str - SvPVX_const(sv));
1303 XPUSHs(sv);
1304 }
a6ec74c1 1305 break;
08ca2aa3 1306 }
1651fc44
ML
1307 case 'C':
1308 if (len == 0) {
1309 if (explicit_length)
1310 /* Switch to "character" mode */
1311 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1312 break;
1313 }
1314 /* FALL THROUGH */
a6ec74c1 1315 case 'c':
1651fc44
ML
1316 while (len-- > 0 && s < strend) {
1317 int aint;
1318 if (utf8)
1319 {
1320 STRLEN retlen;
1321 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1322 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1325 s += retlen;
1326 }
1327 else
1328 aint = *(U8 *)(s)++;
1329 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1330 aint -= 256;
08ca2aa3 1331 if (!checksum)
6e449a3a 1332 mPUSHi(aint);
73cb7263
NC
1333 else if (checksum > bits_in_uv)
1334 cdouble += (NV)aint;
1335 else
1336 cuv += aint;
a6ec74c1
JH
1337 }
1338 break;
08ca2aa3
TH
1339 case 'W':
1340 W_checksum:
1651fc44 1341 if (utf8) {
08ca2aa3 1342 while (len-- > 0 && s < strend) {
08ca2aa3 1343 STRLEN retlen;
f7fe979e 1344 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1345 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1346 if (retlen == (STRLEN) -1 || retlen == 0)
1347 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1348 s += retlen;
1349 if (!checksum)
6e449a3a 1350 mPUSHu(val);
08ca2aa3
TH
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV) val;
d6d3e8bd 1353 else
08ca2aa3 1354 cuv += val;
fc241834 1355 }
08ca2aa3 1356 } else if (!checksum)
a6ec74c1 1357 while (len-- > 0) {
f7fe979e 1358 const U8 ch = *(U8 *) s++;
6e449a3a 1359 mPUSHu(ch);
a6ec74c1 1360 }
08ca2aa3
TH
1361 else if (checksum > bits_in_uv)
1362 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1363 else
1364 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1365 break;
1366 case 'U':
35bcd338 1367 if (len == 0) {
c5333953 1368 if (explicit_length && howlen != e_star) {
08ca2aa3 1369 /* Switch to "bytes in UTF-8" mode */
f337b084 1370 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1371 else
1372 /* Should be impossible due to the need_utf8() test */
1373 Perl_croak(aTHX_ "U0 mode on a byte string");
1374 }
35bcd338
JH
1375 break;
1376 }
08ca2aa3 1377 if (len > strend - s) len = strend - s;
fc241834 1378 if (!checksum) {
08ca2aa3
TH
1379 if (len && unpack_only_one) len = 1;
1380 EXTEND(SP, len);
1381 EXTEND_MORTAL(len);
fc241834 1382 }
08ca2aa3
TH
1383 while (len-- > 0 && s < strend) {
1384 STRLEN retlen;
1385 UV auv;
1386 if (utf8) {
1387 U8 result[UTF8_MAXLEN];
f7fe979e 1388 const char *ptr = s;
08ca2aa3 1389 STRLEN len;
08ca2aa3
TH
1390 /* Bug: warns about bad utf8 even if we are short on bytes
1391 and will break out of the loop */
230e1fce
NC
1392 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1393 'U'))
08ca2aa3
TH
1394 break;
1395 len = UTF8SKIP(result);
fc241834 1396 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1397 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1398 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1399 s = ptr;
1400 } else {
cde84f2a 1401 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1402 if (retlen == (STRLEN) -1 || retlen == 0)
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1404 s += retlen;
1405 }
1406 if (!checksum)
6e449a3a 1407 mPUSHu(auv);
73cb7263 1408 else if (checksum > bits_in_uv)
08ca2aa3 1409 cdouble += (NV) auv;
73cb7263 1410 else
08ca2aa3 1411 cuv += auv;
a6ec74c1
JH
1412 }
1413 break;
49704364
WL
1414 case 's' | TYPE_IS_SHRIEKING:
1415#if SHORTSIZE != SIZE16
73cb7263 1416 while (len-- > 0) {
08ca2aa3 1417 short ashort;
f337b084
TH
1418 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1419 DO_BO_UNPACK(ashort, s);
08ca2aa3 1420 if (!checksum)
6e449a3a 1421 mPUSHi(ashort);
73cb7263
NC
1422 else if (checksum > bits_in_uv)
1423 cdouble += (NV)ashort;
1424 else
1425 cuv += ashort;
49704364
WL
1426 }
1427 break;
1428#else
1429 /* Fallthrough! */
a6ec74c1 1430#endif
49704364 1431 case 's':
73cb7263 1432 while (len-- > 0) {
08ca2aa3
TH
1433 I16 ai16;
1434
1435#if U16SIZE > SIZE16
1436 ai16 = 0;
1437#endif
f337b084 1438 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1439 DO_BO_UNPACK(ai16, 16);
1109a392 1440#if U16SIZE > SIZE16
73cb7263
NC
1441 if (ai16 > 32767)
1442 ai16 -= 65536;
a6ec74c1 1443#endif
08ca2aa3 1444 if (!checksum)
6e449a3a 1445 mPUSHi(ai16);
73cb7263
NC
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)ai16;
1448 else
1449 cuv += ai16;
a6ec74c1
JH
1450 }
1451 break;
49704364
WL
1452 case 'S' | TYPE_IS_SHRIEKING:
1453#if SHORTSIZE != SIZE16
73cb7263 1454 while (len-- > 0) {
08ca2aa3 1455 unsigned short aushort;
f337b084
TH
1456 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1457 DO_BO_UNPACK(aushort, s);
08ca2aa3 1458 if (!checksum)
6e449a3a 1459 mPUSHu(aushort);
73cb7263
NC
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)aushort;
1462 else
1463 cuv += aushort;
49704364
WL
1464 }
1465 break;
1466#else
486ec47a 1467 /* Fallthrough! */
49704364 1468#endif
a6ec74c1
JH
1469 case 'v':
1470 case 'n':
1471 case 'S':
73cb7263 1472 while (len-- > 0) {
08ca2aa3
TH
1473 U16 au16;
1474#if U16SIZE > SIZE16
1475 au16 = 0;
1476#endif
f337b084 1477 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1478 DO_BO_UNPACK(au16, 16);
73cb7263
NC
1479 if (datumtype == 'n')
1480 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1481 if (datumtype == 'v')
1482 au16 = vtohs(au16);
08ca2aa3 1483 if (!checksum)
6e449a3a 1484 mPUSHu(au16);
73cb7263 1485 else if (checksum > bits_in_uv)
f337b084 1486 cdouble += (NV) au16;
73cb7263
NC
1487 else
1488 cuv += au16;
a6ec74c1
JH
1489 }
1490 break;
068bd2e7
MHM
1491 case 'v' | TYPE_IS_SHRIEKING:
1492 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1493 while (len-- > 0) {
08ca2aa3
TH
1494 I16 ai16;
1495# if U16SIZE > SIZE16
1496 ai16 = 0;
1497# endif
f337b084 1498 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1499 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1500 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1501 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1502 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1503 if (!checksum)
6e449a3a 1504 mPUSHi(ai16);
73cb7263 1505 else if (checksum > bits_in_uv)
08ca2aa3 1506 cdouble += (NV) ai16;
73cb7263
NC
1507 else
1508 cuv += ai16;
068bd2e7
MHM
1509 }
1510 break;
a6ec74c1 1511 case 'i':
49704364 1512 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1513 while (len-- > 0) {
08ca2aa3 1514 int aint;
f337b084
TH
1515 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1516 DO_BO_UNPACK(aint, i);
08ca2aa3 1517 if (!checksum)
6e449a3a 1518 mPUSHi(aint);
73cb7263
NC
1519 else if (checksum > bits_in_uv)
1520 cdouble += (NV)aint;
1521 else
1522 cuv += aint;
a6ec74c1
JH
1523 }
1524 break;
1525 case 'I':
49704364 1526 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1527 while (len-- > 0) {
08ca2aa3 1528 unsigned int auint;
f337b084
TH
1529 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1530 DO_BO_UNPACK(auint, i);
08ca2aa3 1531 if (!checksum)
6e449a3a 1532 mPUSHu(auint);
73cb7263
NC
1533 else if (checksum > bits_in_uv)
1534 cdouble += (NV)auint;
1535 else
1536 cuv += auint;
a6ec74c1
JH
1537 }
1538 break;
92d41999 1539 case 'j':
73cb7263 1540 while (len-- > 0) {
08ca2aa3 1541 IV aiv;
f337b084 1542 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1543#if IVSIZE == INTSIZE
f337b084 1544 DO_BO_UNPACK(aiv, i);
1109a392 1545#elif IVSIZE == LONGSIZE
f337b084 1546 DO_BO_UNPACK(aiv, l);
1109a392 1547#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1548 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1549#else
1550 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1551#endif
08ca2aa3 1552 if (!checksum)
6e449a3a 1553 mPUSHi(aiv);
73cb7263
NC
1554 else if (checksum > bits_in_uv)
1555 cdouble += (NV)aiv;
1556 else
1557 cuv += aiv;
92d41999
JH
1558 }
1559 break;
1560 case 'J':
73cb7263 1561 while (len-- > 0) {
08ca2aa3 1562 UV auv;
f337b084 1563 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1564#if IVSIZE == INTSIZE
f337b084 1565 DO_BO_UNPACK(auv, i);
08ca2aa3 1566#elif IVSIZE == LONGSIZE
f337b084 1567 DO_BO_UNPACK(auv, l);
08ca2aa3 1568#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1569 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1570#else
1571 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1572#endif
08ca2aa3 1573 if (!checksum)
6e449a3a 1574 mPUSHu(auv);
73cb7263
NC
1575 else if (checksum > bits_in_uv)
1576 cdouble += (NV)auv;
1577 else
1578 cuv += auv;
92d41999
JH
1579 }
1580 break;
49704364
WL
1581 case 'l' | TYPE_IS_SHRIEKING:
1582#if LONGSIZE != SIZE32
73cb7263 1583 while (len-- > 0) {
08ca2aa3 1584 long along;
f337b084
TH
1585 SHIFT_VAR(utf8, s, strend, along, datumtype);
1586 DO_BO_UNPACK(along, l);
08ca2aa3 1587 if (!checksum)
6e449a3a 1588 mPUSHi(along);
73cb7263
NC
1589 else if (checksum > bits_in_uv)
1590 cdouble += (NV)along;
1591 else
1592 cuv += along;
49704364
WL
1593 }
1594 break;
1595#else
1596 /* Fallthrough! */
a6ec74c1 1597#endif
49704364 1598 case 'l':
73cb7263 1599 while (len-- > 0) {
08ca2aa3
TH
1600 I32 ai32;
1601#if U32SIZE > SIZE32
1602 ai32 = 0;
1603#endif
f337b084 1604 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1605 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1606#if U32SIZE > SIZE32
08ca2aa3 1607 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1608#endif
08ca2aa3 1609 if (!checksum)
6e449a3a 1610 mPUSHi(ai32);
73cb7263
NC
1611 else if (checksum > bits_in_uv)
1612 cdouble += (NV)ai32;
1613 else
1614 cuv += ai32;
a6ec74c1
JH
1615 }
1616 break;
49704364
WL
1617 case 'L' | TYPE_IS_SHRIEKING:
1618#if LONGSIZE != SIZE32
73cb7263 1619 while (len-- > 0) {
08ca2aa3 1620 unsigned long aulong;
f337b084
TH
1621 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1622 DO_BO_UNPACK(aulong, l);
08ca2aa3 1623 if (!checksum)
6e449a3a 1624 mPUSHu(aulong);
73cb7263
NC
1625 else if (checksum > bits_in_uv)
1626 cdouble += (NV)aulong;
1627 else
1628 cuv += aulong;
49704364
WL
1629 }
1630 break;
1631#else
1632 /* Fall through! */
1633#endif
a6ec74c1
JH
1634 case 'V':
1635 case 'N':
1636 case 'L':
73cb7263 1637 while (len-- > 0) {
08ca2aa3
TH
1638 U32 au32;
1639#if U32SIZE > SIZE32
1640 au32 = 0;
1641#endif
f337b084 1642 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1643 DO_BO_UNPACK(au32, 32);
73cb7263
NC
1644 if (datumtype == 'N')
1645 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1646 if (datumtype == 'V')
1647 au32 = vtohl(au32);
08ca2aa3 1648 if (!checksum)
6e449a3a 1649 mPUSHu(au32);
fc241834
RGS
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)au32;
1652 else
1653 cuv += au32;
a6ec74c1
JH
1654 }
1655 break;
068bd2e7
MHM
1656 case 'V' | TYPE_IS_SHRIEKING:
1657 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1658 while (len-- > 0) {
08ca2aa3 1659 I32 ai32;
f8e5a5db 1660#if U32SIZE > SIZE32
08ca2aa3 1661 ai32 = 0;
f8e5a5db 1662#endif
f337b084 1663 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263
NC
1664 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1665 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1666 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1667 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1668 if (!checksum)
6e449a3a 1669 mPUSHi(ai32);
73cb7263
NC
1670 else if (checksum > bits_in_uv)
1671 cdouble += (NV)ai32;
1672 else
1673 cuv += ai32;
068bd2e7
MHM
1674 }
1675 break;
a6ec74c1 1676 case 'p':
a6ec74c1 1677 while (len-- > 0) {
f7fe979e 1678 const char *aptr;
f337b084 1679 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1680 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1681 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1682 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1683 }
1684 break;
1685 case 'w':
a6ec74c1
JH
1686 {
1687 UV auv = 0;
1688 U32 bytes = 0;
fc241834 1689
08ca2aa3
TH
1690 while (len > 0 && s < strend) {
1691 U8 ch;
f337b084 1692 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1693 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1694 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1695 if (ch < 0x80) {
a6ec74c1 1696 bytes = 0;
6e449a3a 1697 mPUSHu(auv);
a6ec74c1
JH
1698 len--;
1699 auv = 0;
08ca2aa3 1700 continue;
a6ec74c1 1701 }
08ca2aa3 1702 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1703 const char *t;
a6ec74c1 1704
f5992bc4 1705 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1706 while (s < strend) {
f337b084 1707 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1708 sv = mul128(sv, (U8)(ch & 0x7f));
1709 if (!(ch & 0x80)) {
a6ec74c1
JH
1710 bytes = 0;
1711 break;
1712 }
1713 }
10516c54 1714 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1715 while (*t == '0')
1716 t++;
1717 sv_chop(sv, t);
6e449a3a 1718 mPUSHs(sv);
a6ec74c1
JH
1719 len--;
1720 auv = 0;
1721 }
1722 }
1723 if ((s >= strend) && bytes)
49704364 1724 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1725 }
1726 break;
1727 case 'P':
49704364
WL
1728 if (symptr->howlen == e_star)
1729 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1730 EXTEND(SP, 1);
2d3e0934 1731 if (s + sizeof(char*) <= strend) {
08ca2aa3 1732 char *aptr;
f337b084 1733 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1734 DO_BO_UNPACK_PC(aptr);
fc241834 1735 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1736 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1737 }
a6ec74c1
JH
1738 break;
1739#ifdef HAS_QUAD
1740 case 'q':
73cb7263 1741 while (len-- > 0) {
08ca2aa3 1742 Quad_t aquad;
f337b084
TH
1743 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1744 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1745 if (!checksum)
6e449a3a
MHM
1746 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1747 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1748 else if (checksum > bits_in_uv)
1749 cdouble += (NV)aquad;
1750 else
1751 cuv += aquad;
1752 }
a6ec74c1
JH
1753 break;
1754 case 'Q':
73cb7263 1755 while (len-- > 0) {
08ca2aa3 1756 Uquad_t auquad;
f337b084
TH
1757 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1758 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1759 if (!checksum)
6e449a3a
MHM
1760 mPUSHs(auquad <= UV_MAX ?
1761 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1762 else if (checksum > bits_in_uv)
1763 cdouble += (NV)auquad;
1764 else
1765 cuv += auquad;
a6ec74c1
JH
1766 }
1767 break;
08ca2aa3 1768#endif /* HAS_QUAD */
a6ec74c1
JH
1769 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1770 case 'f':
73cb7263 1771 while (len-- > 0) {
08ca2aa3 1772 float afloat;
f337b084 1773 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1774 DO_BO_UNPACK_N(afloat, float);
1775 if (!checksum)
6e449a3a 1776 mPUSHn(afloat);
08ca2aa3 1777 else
73cb7263 1778 cdouble += afloat;
fc241834 1779 }
a6ec74c1
JH
1780 break;
1781 case 'd':
73cb7263 1782 while (len-- > 0) {
08ca2aa3 1783 double adouble;
f337b084 1784 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1785 DO_BO_UNPACK_N(adouble, double);
1786 if (!checksum)
6e449a3a 1787 mPUSHn(adouble);
08ca2aa3 1788 else
73cb7263 1789 cdouble += adouble;
fc241834 1790 }
a6ec74c1 1791 break;
92d41999 1792 case 'F':
73cb7263 1793 while (len-- > 0) {
275663fa
TC
1794 NV_bytes anv;
1795 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1796 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 1797 if (!checksum)
275663fa 1798 mPUSHn(anv.nv);
08ca2aa3 1799 else
275663fa 1800 cdouble += anv.nv;
fc241834 1801 }
92d41999
JH
1802 break;
1803#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1804 case 'D':
73cb7263 1805 while (len-- > 0) {
275663fa
TC
1806 ld_bytes aldouble;
1807 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1808 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 1809 if (!checksum)
275663fa 1810 mPUSHn(aldouble.ld);
08ca2aa3 1811 else
275663fa 1812 cdouble += aldouble.ld;
92d41999
JH
1813 }
1814 break;
1815#endif
a6ec74c1 1816 case 'u':
858fe5e1 1817 if (!checksum) {
f7fe979e 1818 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1819 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1820 if (l) SvPOK_on(sv);
1821 }
1822 if (utf8) {
1823 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1824 I32 a, b, c, d;
db187877 1825 char hunk[3];
08ca2aa3 1826
08ca2aa3
TH
1827 while (len > 0) {
1828 next_uni_uu(aTHX_ &s, strend, &a);
1829 next_uni_uu(aTHX_ &s, strend, &b);
1830 next_uni_uu(aTHX_ &s, strend, &c);
1831 next_uni_uu(aTHX_ &s, strend, &d);
1832 hunk[0] = (char)((a << 2) | (b >> 4));
1833 hunk[1] = (char)((b << 4) | (c >> 2));
1834 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1835 if (!checksum)
1836 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1837 len -= 3;
1838 }
1839 if (s < strend) {
f7fe979e
AL
1840 if (*s == '\n') {
1841 s++;
1842 }
08ca2aa3
TH
1843 else {
1844 /* possible checksum byte */
f7fe979e
AL
1845 const char *skip = s+UTF8SKIP(s);
1846 if (skip < strend && *skip == '\n')
1847 s = skip+1;
08ca2aa3
TH
1848 }
1849 }
1850 }
1851 } else {
fc241834
RGS
1852 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1853 I32 a, b, c, d;
db187877 1854 char hunk[3];
a6ec74c1 1855
fc241834
RGS
1856 len = PL_uudmap[*(U8*)s++] & 077;
1857 while (len > 0) {
1858 if (s < strend && ISUUCHAR(*s))
1859 a = PL_uudmap[*(U8*)s++] & 077;
1860 else
1861 a = 0;
1862 if (s < strend && ISUUCHAR(*s))
1863 b = PL_uudmap[*(U8*)s++] & 077;
1864 else
1865 b = 0;
1866 if (s < strend && ISUUCHAR(*s))
1867 c = PL_uudmap[*(U8*)s++] & 077;
1868 else
1869 c = 0;
1870 if (s < strend && ISUUCHAR(*s))
1871 d = PL_uudmap[*(U8*)s++] & 077;
1872 else
1873 d = 0;
1874 hunk[0] = (char)((a << 2) | (b >> 4));
1875 hunk[1] = (char)((b << 4) | (c >> 2));
1876 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1877 if (!checksum)
1878 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1879 len -= 3;
1880 }
1881 if (*s == '\n')
1882 s++;
1883 else /* possible checksum byte */
1884 if (s + 1 < strend && s[1] == '\n')
1885 s += 2;
a6ec74c1 1886 }
08ca2aa3 1887 }
858fe5e1
TC
1888 if (!checksum)
1889 XPUSHs(sv);
a6ec74c1
JH
1890 break;
1891 }
49704364 1892
a6ec74c1 1893 if (checksum) {
1109a392 1894 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1895 (checksum > bits_in_uv &&
08ca2aa3
TH
1896 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1897 NV trouble, anv;
a6ec74c1 1898
08ca2aa3 1899 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1900 while (checksum >= 16) {
1901 checksum -= 16;
08ca2aa3 1902 anv *= 65536.0;
a6ec74c1 1903 }
a6ec74c1 1904 while (cdouble < 0.0)
08ca2aa3
TH
1905 cdouble += anv;
1906 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1907 sv = newSVnv(cdouble);
a6ec74c1
JH
1908 }
1909 else {
fa8ec7c1
NC
1910 if (checksum < bits_in_uv) {
1911 UV mask = ((UV)1 << checksum) - 1;
92d41999 1912 cuv &= mask;
a6ec74c1 1913 }
c4c5f44a 1914 sv = newSVuv(cuv);
a6ec74c1 1915 }
6e449a3a 1916 mXPUSHs(sv);
a6ec74c1
JH
1917 checksum = 0;
1918 }
fc241834 1919
49704364
WL
1920 if (symptr->flags & FLAG_SLASH){
1921 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1922 break;
49704364
WL
1923 if( next_symbol(symptr) ){
1924 if( symptr->howlen == e_number )
1925 Perl_croak(aTHX_ "Count after length/code in unpack" );
1926 if( beyond ){
1927 /* ...end of char buffer then no decent length available */
1928 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1929 } else {
1930 /* take top of stack (hope it's numeric) */
1931 len = POPi;
1932 if( len < 0 )
1933 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1934 }
1935 } else {
1936 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1937 }
1938 datumtype = symptr->code;
21c16052 1939 explicit_length = FALSE;
49704364
WL
1940 goto redo_switch;
1941 }
a6ec74c1 1942 }
49704364 1943
18529408
IZ
1944 if (new_s)
1945 *new_s = s;
1946 PUTBACK;
1947 return SP - PL_stack_base - start_sp_offset;
1948}
1949
1950PP(pp_unpack)
1951{
97aff369 1952 dVAR;
18529408 1953 dSP;
bab9c0ac 1954 dPOPPOPssrl;
18529408
IZ
1955 I32 gimme = GIMME_V;
1956 STRLEN llen;
1957 STRLEN rlen;
5c144d81
NC
1958 const char *pat = SvPV_const(left, llen);
1959 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1960 const char *strend = s + rlen;
1961 const char *patend = pat + llen;
08ca2aa3 1962 I32 cnt;
18529408
IZ
1963
1964 PUTBACK;
7accc089 1965 cnt = unpackstring(pat, patend, s, strend,
49704364 1966 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1967 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1968
18529408
IZ
1969 SPAGAIN;
1970 if ( !cnt && gimme == G_SCALAR )
1971 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1972 RETURN;
1973}
1974
f337b084 1975STATIC U8 *
f7fe979e 1976doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1977{
f337b084 1978 *h++ = PL_uuemap[len];
a6ec74c1 1979 while (len > 2) {
f337b084
TH
1980 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1981 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1982 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1983 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1984 s += 3;
1985 len -= 3;
1986 }
1987 if (len > 0) {
f7fe979e 1988 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1989 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1990 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1991 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1992 *h++ = PL_uuemap[0];
a6ec74c1 1993 }
f337b084
TH
1994 *h++ = '\n';
1995 return h;
a6ec74c1
JH
1996}
1997
1998STATIC SV *
f7fe979e 1999S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2000{
8b6e33c7
AL
2001 SV *result = newSVpvn(s, l);
2002 char *const result_c = SvPV_nolen(result); /* convenience */
2003 char *out = result_c;
2004 bool skip = 1;
2005 bool ignore = 0;
a6ec74c1 2006
7918f24d
NC
2007 PERL_ARGS_ASSERT_IS_AN_INT;
2008
a6ec74c1
JH
2009 while (*s) {
2010 switch (*s) {
2011 case ' ':
2012 break;
2013 case '+':
2014 if (!skip) {
2015 SvREFCNT_dec(result);
2016 return (NULL);
2017 }
2018 break;
2019 case '0':
2020 case '1':
2021 case '2':
2022 case '3':
2023 case '4':
2024 case '5':
2025 case '6':
2026 case '7':
2027 case '8':
2028 case '9':
2029 skip = 0;
2030 if (!ignore) {
2031 *(out++) = *s;
2032 }
2033 break;
2034 case '.':
2035 ignore = 1;
2036 break;
2037 default:
2038 SvREFCNT_dec(result);
2039 return (NULL);
2040 }
2041 s++;
2042 }
2043 *(out++) = '\0';
2044 SvCUR_set(result, out - result_c);
2045 return (result);
2046}
2047
2048/* pnum must be '\0' terminated */
2049STATIC int
2050S_div128(pTHX_ SV *pnum, bool *done)
2051{
8b6e33c7
AL
2052 STRLEN len;
2053 char * const s = SvPV(pnum, len);
2054 char *t = s;
2055 int m = 0;
2056
7918f24d
NC
2057 PERL_ARGS_ASSERT_DIV128;
2058
8b6e33c7
AL
2059 *done = 1;
2060 while (*t) {
2061 const int i = m * 10 + (*t - '0');
2062 const int r = (i >> 7); /* r < 10 */
2063 m = i & 0x7F;
2064 if (r) {
2065 *done = 0;
2066 }
2067 *(t++) = '0' + r;
a6ec74c1 2068 }
8b6e33c7
AL
2069 *(t++) = '\0';
2070 SvCUR_set(pnum, (STRLEN) (t - s));
2071 return (m);
a6ec74c1
JH
2072}
2073
18529408 2074/*
7accc089
JH
2075=for apidoc packlist
2076
2077The engine implementing pack() Perl function.
2078
bfce84ec
AL
2079=cut
2080*/
7accc089
JH
2081
2082void
5aaab254 2083Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2084{
97aff369 2085 dVAR;
aadb217d
JH
2086 tempsym_t sym;
2087
7918f24d
NC
2088 PERL_ARGS_ASSERT_PACKLIST;
2089
f7fe979e 2090 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2091
f337b084
TH
2092 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2093 Also make sure any UTF8 flag is loaded */
56eb0262 2094 SvPV_force_nolen(cat);
bfce84ec
AL
2095 if (DO_UTF8(cat))
2096 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2097
49704364
WL
2098 (void)pack_rec( cat, &sym, beglist, endlist );
2099}
2100
f337b084
TH
2101/* like sv_utf8_upgrade, but also repoint the group start markers */
2102STATIC void
2103marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2104 STRLEN len;
2105 tempsym_t *group;
f7fe979e
AL
2106 const char *from_ptr, *from_start, *from_end, **marks, **m;
2107 char *to_start, *to_ptr;
f337b084
TH
2108
2109 if (SvUTF8(sv)) return;
2110
aa07b2f6 2111 from_start = SvPVX_const(sv);
f337b084
TH
2112 from_end = from_start + SvCUR(sv);
2113 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2114 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2115 if (from_ptr == from_end) {
2116 /* Simple case: no character needs to be changed */
2117 SvUTF8_on(sv);
2118 return;
2119 }
2120
3473cf63 2121 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2122 Newx(to_start, len, char);
f337b084
TH
2123 Copy(from_start, to_start, from_ptr-from_start, char);
2124 to_ptr = to_start + (from_ptr-from_start);
2125
a02a5408 2126 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2127 for (group=sym_ptr; group; group = group->previous)
2128 marks[group->level] = from_start + group->strbeg;
2129 marks[sym_ptr->level+1] = from_end+1;
2130 for (m = marks; *m < from_ptr; m++)
2131 *m = to_start + (*m-from_start);
2132
2133 for (;from_ptr < from_end; from_ptr++) {
2134 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2135 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2136 }
2137 *to_ptr = 0;
2138
2139 while (*m == from_ptr) *m++ = to_ptr;
2140 if (m != marks + sym_ptr->level+1) {
2141 Safefree(marks);
2142 Safefree(to_start);
5637ef5b
NC
2143 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2144 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2145 }
2146 for (group=sym_ptr; group; group = group->previous)
2147 group->strbeg = marks[group->level] - to_start;
2148 Safefree(marks);
2149
2150 if (SvOOK(sv)) {
2151 if (SvIVX(sv)) {
b162af07 2152 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2153 from_start -= SvIVX(sv);
2154 SvIV_set(sv, 0);
2155 }
2156 SvFLAGS(sv) &= ~SVf_OOK;
2157 }
2158 if (SvLEN(sv) != 0)
2159 Safefree(from_start);
f880fe2f 2160 SvPV_set(sv, to_start);
b162af07
SP
2161 SvCUR_set(sv, to_ptr - to_start);
2162 SvLEN_set(sv, len);
f337b084
TH
2163 SvUTF8_on(sv);
2164}
2165
2166/* Exponential string grower. Makes string extension effectively O(n)
2167 needed says how many extra bytes we need (not counting the final '\0')
2168 Only grows the string if there is an actual lack of space
2169*/
2170STATIC char *
0bd48802 2171S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2172 const STRLEN cur = SvCUR(sv);
2173 const STRLEN len = SvLEN(sv);
f337b084 2174 STRLEN extend;
7918f24d
NC
2175
2176 PERL_ARGS_ASSERT_SV_EXP_GROW;
2177
f337b084
TH
2178 if (len - cur > needed) return SvPVX(sv);
2179 extend = needed > len ? needed : len;
2180 return SvGROW(sv, len+extend+1);
2181}
49704364
WL
2182
2183STATIC
2184SV **
f337b084 2185S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2186{
97aff369 2187 dVAR;
49704364 2188 tempsym_t lookahead;
f337b084
TH
2189 I32 items = endlist - beglist;
2190 bool found = next_symbol(symptr);
2191 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2192 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2193
7918f24d
NC
2194 PERL_ARGS_ASSERT_PACK_REC;
2195
f337b084
TH
2196 if (symptr->level == 0 && found && symptr->code == 'U') {
2197 marked_upgrade(aTHX_ cat, symptr);
2198 symptr->flags |= FLAG_DO_UTF8;
2199 utf8 = 0;
49704364 2200 }
f337b084 2201 symptr->strbeg = SvCUR(cat);
49704364
WL
2202
2203 while (found) {
f337b084
TH
2204 SV *fromstr;
2205 STRLEN fromlen;
2206 I32 len;
a0714e2c 2207 SV *lengthcode = NULL;
49704364 2208 I32 datumtype = symptr->code;
f337b084
TH
2209 howlen_t howlen = symptr->howlen;
2210 char *start = SvPVX(cat);
2211 char *cur = start + SvCUR(cat);
49704364 2212
f337b084
TH
2213#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2214
2215 switch (howlen) {
fc241834 2216 case e_star:
f337b084
TH
2217 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2218 0 : items;
2219 break;
2220 default:
2221 /* e_no_len and e_number */
2222 len = symptr->length;
49704364
WL
2223 break;
2224 }
2225
f337b084 2226 if (len) {
a7a3cfaa 2227 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2228
a7a3cfaa
TH
2229 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2230 /* We can process this letter. */
2231 STRLEN size = props & PACK_SIZE_MASK;
2232 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2233 }
f337b084
TH
2234 }
2235
49704364
WL
2236 /* Look ahead for next symbol. Do we have code/code? */
2237 lookahead = *symptr;
2238 found = next_symbol(&lookahead);
246f24af
TH
2239 if (symptr->flags & FLAG_SLASH) {
2240 IV count;
f337b084 2241 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2242 if (strchr("aAZ", lookahead.code)) {
2243 if (lookahead.howlen == e_number) count = lookahead.length;
2244 else {
ce399ba6 2245 if (items > 0) {
48a5da33 2246 count = sv_len_utf8(*beglist);
ce399ba6 2247 }
246f24af
TH
2248 else count = 0;
2249 if (lookahead.code == 'Z') count++;
2250 }
2251 } else {
2252 if (lookahead.howlen == e_number && lookahead.length < items)
2253 count = lookahead.length;
2254 else count = items;
2255 }
2256 lookahead.howlen = e_number;
2257 lookahead.length = count;
2258 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2259 }
49704364 2260
fc241834
RGS
2261 /* Code inside the switch must take care to properly update
2262 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2263 doesn't simply leave using break */
1109a392 2264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2265 default:
f337b084
TH
2266 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2267 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2268 case '%':
49704364 2269 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2270 {
2271 char *from;
28be1210 2272 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2273 case '.':
2274 if (howlen == e_star) from = start;
2275 else if (len == 0) from = cur;
2276 else {
2277 tempsym_t *group = symptr;
2278
2279 while (--len && group) group = group->previous;
2280 from = group ? start + group->strbeg : start;
2281 }
2282 fromstr = NEXTFROM;
2283 len = SvIV(fromstr);
2284 goto resize;
28be1210 2285 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2286 case '@':
28be1210
TH
2287 from = start + symptr->strbeg;
2288 resize:
28be1210 2289 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2290 if (len >= 0) {
2291 while (len && from < cur) {
2292 from += UTF8SKIP(from);
2293 len--;
2294 }
2295 if (from > cur)
2296 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2297 if (len) {
2298 /* Here we know from == cur */
2299 grow:
2300 GROWING(0, cat, start, cur, len);
2301 Zero(cur, len, char);
2302 cur += len;
2303 } else if (from < cur) {
2304 len = cur - from;
2305 goto shrink;
2306 } else goto no_change;
2307 } else {
2308 cur = from;
2309 len = -len;
2310 goto utf8_shrink;
f337b084 2311 }
28be1210
TH
2312 else {
2313 len -= cur - from;
f337b084 2314 if (len > 0) goto grow;
28be1210 2315 if (len == 0) goto no_change;
fc241834 2316 len = -len;
28be1210 2317 goto shrink;
f337b084 2318 }
a6ec74c1 2319 break;
28be1210 2320 }
fc241834 2321 case '(': {
49704364 2322 tempsym_t savsym = *symptr;
66c611c5
MHM
2323 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2324 symptr->flags |= group_modifiers;
49704364
WL
2325 symptr->patend = savsym.grpend;
2326 symptr->level++;
f337b084 2327 symptr->previous = &lookahead;
18529408 2328 while (len--) {
f337b084
TH
2329 U32 was_utf8;
2330 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2331 else symptr->flags &= ~FLAG_PARSE_UTF8;
2332 was_utf8 = SvUTF8(cat);
49704364 2333 symptr->patptr = savsym.grpbeg;
f337b084
TH
2334 beglist = pack_rec(cat, symptr, beglist, endlist);
2335 if (SvUTF8(cat) != was_utf8)
2336 /* This had better be an upgrade while in utf8==0 mode */
2337 utf8 = 1;
2338
49704364 2339 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2340 break; /* No way to continue */
2341 }
ee790063 2342 items = endlist - beglist;
f337b084
TH
2343 lookahead.flags = symptr->flags & ~group_modifiers;
2344 goto no_change;
18529408 2345 }
62f95557
IZ
2346 case 'X' | TYPE_IS_SHRIEKING:
2347 if (!len) /* Avoid division by 0 */
2348 len = 1;
f337b084
TH
2349 if (utf8) {
2350 char *hop, *last;
2351 I32 l = len;
2352 hop = last = start;
2353 while (hop < cur) {
2354 hop += UTF8SKIP(hop);
2355 if (--l == 0) {
2356 last = hop;
2357 l = len;
2358 }
2359 }
2360 if (last > cur)
2361 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2362 cur = last;
2363 break;
2364 }
2365 len = (cur-start) % len;
62f95557 2366 /* FALL THROUGH */
a6ec74c1 2367 case 'X':
f337b084
TH
2368 if (utf8) {
2369 if (len < 1) goto no_change;
28be1210 2370 utf8_shrink:
f337b084
TH
2371 while (len > 0) {
2372 if (cur <= start)
28be1210
TH
2373 Perl_croak(aTHX_ "'%c' outside of string in pack",
2374 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2375 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2376 if (cur <= start)
28be1210
TH
2377 Perl_croak(aTHX_ "'%c' outside of string in pack",
2378 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2379 }
2380 len--;
2381 }
2382 } else {
fc241834 2383 shrink:
f337b084 2384 if (cur - start < len)
28be1210
TH
2385 Perl_croak(aTHX_ "'%c' outside of string in pack",
2386 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2387 cur -= len;
2388 }
2389 if (cur < start+symptr->strbeg) {
2390 /* Make sure group starts don't point into the void */
2391 tempsym_t *group;
9e27e96a 2392 const STRLEN length = cur-start;
f337b084
TH
2393 for (group = symptr;
2394 group && length < group->strbeg;
2395 group = group->previous) group->strbeg = length;
2396 lookahead.strbeg = length;
2397 }
a6ec74c1 2398 break;
fc241834
RGS
2399 case 'x' | TYPE_IS_SHRIEKING: {
2400 I32 ai32;
62f95557
IZ
2401 if (!len) /* Avoid division by 0 */
2402 len = 1;
230e1fce 2403 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2404 else ai32 = (cur - start) % len;
2405 if (ai32 == 0) goto no_change;
2406 len -= ai32;
2407 }
2408 /* FALL THROUGH */
a6ec74c1 2409 case 'x':
f337b084 2410 goto grow;
a6ec74c1
JH
2411 case 'A':
2412 case 'Z':
f337b084 2413 case 'a': {
f7fe979e 2414 const char *aptr;
f337b084 2415
a6ec74c1 2416 fromstr = NEXTFROM;
e62f0680 2417 aptr = SvPV_const(fromstr, fromlen);
f337b084 2418 if (DO_UTF8(fromstr)) {
f7fe979e 2419 const char *end, *s;
f337b084
TH
2420
2421 if (!utf8 && !SvUTF8(cat)) {
2422 marked_upgrade(aTHX_ cat, symptr);
2423 lookahead.flags |= FLAG_DO_UTF8;
2424 lookahead.strbeg = symptr->strbeg;
2425 utf8 = 1;
2426 start = SvPVX(cat);
2427 cur = start + SvCUR(cat);
2428 }
fc241834 2429 if (howlen == e_star) {
f337b084
TH
2430 if (utf8) goto string_copy;
2431 len = fromlen+1;
2432 }
2433 s = aptr;
2434 end = aptr + fromlen;
2435 fromlen = datumtype == 'Z' ? len-1 : len;
2436 while ((I32) fromlen > 0 && s < end) {
2437 s += UTF8SKIP(s);
2438 fromlen--;
2439 }
2440 if (s > end)
2441 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2442 if (utf8) {
fc241834 2443 len = fromlen;
f337b084
TH
2444 if (datumtype == 'Z') len++;
2445 fromlen = s-aptr;
2446 len += fromlen;
fc241834 2447
f337b084 2448 goto string_copy;
fc241834 2449 }
f337b084
TH
2450 fromlen = len - fromlen;
2451 if (datumtype == 'Z') fromlen--;
2452 if (howlen == e_star) {
2453 len = fromlen;
2454 if (datumtype == 'Z') len++;
fc241834 2455 }
f337b084 2456 GROWING(0, cat, start, cur, len);
fc241834 2457 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2458 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2459 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2460 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2461 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2462 cur += fromlen;
a6ec74c1 2463 len -= fromlen;
f337b084
TH
2464 } else if (utf8) {
2465 if (howlen == e_star) {
2466 len = fromlen;
2467 if (datumtype == 'Z') len++;
a6ec74c1 2468 }
f337b084
TH
2469 if (len <= (I32) fromlen) {
2470 fromlen = len;
2471 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2472 }
fc241834 2473 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2474 upgrade, so:
2475 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2476 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2477 len -= fromlen;
2478 while (fromlen > 0) {
230e1fce 2479 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2480 aptr++;
2481 fromlen--;
fc241834 2482 }
f337b084
TH
2483 } else {
2484 string_copy:
2485 if (howlen == e_star) {
2486 len = fromlen;
2487 if (datumtype == 'Z') len++;
2488 }
2489 if (len <= (I32) fromlen) {
2490 fromlen = len;
2491 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2492 }
f337b084
TH
2493 GROWING(0, cat, start, cur, len);
2494 Copy(aptr, cur, fromlen, char);
2495 cur += fromlen;
2496 len -= fromlen;
a6ec74c1 2497 }
f337b084
TH
2498 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2499 cur += len;
3c4fb04a 2500 SvTAINT(cat);
a6ec74c1 2501 break;
f337b084 2502 }
a6ec74c1 2503 case 'B':
f337b084 2504 case 'b': {
b83604b4 2505 const char *str, *end;
f337b084
TH
2506 I32 l, field_len;
2507 U8 bits;
2508 bool utf8_source;
2509 U32 utf8_flags;
a6ec74c1 2510
fc241834 2511 fromstr = NEXTFROM;
b83604b4 2512 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2513 end = str + fromlen;
2514 if (DO_UTF8(fromstr)) {
2515 utf8_source = TRUE;
041457d9 2516 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2517 } else {
2518 utf8_source = FALSE;
2519 utf8_flags = 0; /* Unused, but keep compilers happy */
2520 }
2521 if (howlen == e_star) len = fromlen;
2522 field_len = (len+7)/8;
2523 GROWING(utf8, cat, start, cur, field_len);
2524 if (len > (I32)fromlen) len = fromlen;
2525 bits = 0;
2526 l = 0;
2527 if (datumtype == 'B')
2528 while (l++ < len) {
2529 if (utf8_source) {
95b63a38 2530 UV val = 0;
f337b084
TH
2531 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2532 bits |= val & 1;
2533 } else bits |= *str++ & 1;
2534 if (l & 7) bits <<= 1;
fc241834 2535 else {
f337b084
TH
2536 PUSH_BYTE(utf8, cur, bits);
2537 bits = 0;
a6ec74c1
JH
2538 }
2539 }
f337b084
TH
2540 else
2541 /* datumtype == 'b' */
2542 while (l++ < len) {
2543 if (utf8_source) {
95b63a38 2544 UV val = 0;
f337b084
TH
2545 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2546 if (val & 1) bits |= 0x80;
2547 } else if (*str++ & 1)
2548 bits |= 0x80;
2549 if (l & 7) bits >>= 1;
fc241834 2550 else {
f337b084
TH
2551 PUSH_BYTE(utf8, cur, bits);
2552 bits = 0;
a6ec74c1
JH
2553 }
2554 }
f337b084
TH
2555 l--;
2556 if (l & 7) {
fc241834 2557 if (datumtype == 'B')
f337b084 2558 bits <<= 7 - (l & 7);
fc241834 2559 else
f337b084
TH
2560 bits >>= 7 - (l & 7);
2561 PUSH_BYTE(utf8, cur, bits);
2562 l += 7;
a6ec74c1 2563 }
f337b084
TH
2564 /* Determine how many chars are left in the requested field */
2565 l /= 8;
2566 if (howlen == e_star) field_len = 0;
2567 else field_len -= l;
2568 Zero(cur, field_len, char);
2569 cur += field_len;
a6ec74c1 2570 break;
f337b084 2571 }
a6ec74c1 2572 case 'H':
f337b084 2573 case 'h': {
10516c54 2574 const char *str, *end;
f337b084
TH
2575 I32 l, field_len;
2576 U8 bits;
2577 bool utf8_source;
2578 U32 utf8_flags;
a6ec74c1 2579
fc241834 2580 fromstr = NEXTFROM;
10516c54 2581 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2582 end = str + fromlen;
2583 if (DO_UTF8(fromstr)) {
2584 utf8_source = TRUE;
041457d9 2585 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2586 } else {
2587 utf8_source = FALSE;
2588 utf8_flags = 0; /* Unused, but keep compilers happy */
2589 }
2590 if (howlen == e_star) len = fromlen;
2591 field_len = (len+1)/2;
2592 GROWING(utf8, cat, start, cur, field_len);
2593 if (!utf8 && len > (I32)fromlen) len = fromlen;
2594 bits = 0;
2595 l = 0;
2596 if (datumtype == 'H')
2597 while (l++ < len) {
2598 if (utf8_source) {
95b63a38 2599 UV val = 0;
f337b084
TH
2600 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2601 if (val < 256 && isALPHA(val))
2602 bits |= (val + 9) & 0xf;
a6ec74c1 2603 else
f337b084
TH
2604 bits |= val & 0xf;
2605 } else if (isALPHA(*str))
2606 bits |= (*str++ + 9) & 0xf;
2607 else
2608 bits |= *str++ & 0xf;
2609 if (l & 1) bits <<= 4;
fc241834 2610 else {
f337b084
TH
2611 PUSH_BYTE(utf8, cur, bits);
2612 bits = 0;
a6ec74c1
JH
2613 }
2614 }
f337b084
TH
2615 else
2616 while (l++ < len) {
2617 if (utf8_source) {
95b63a38 2618 UV val = 0;
f337b084
TH
2619 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2620 if (val < 256 && isALPHA(val))
2621 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2622 else
f337b084
TH
2623 bits |= (val & 0xf) << 4;
2624 } else if (isALPHA(*str))
2625 bits |= ((*str++ + 9) & 0xf) << 4;
2626 else
2627 bits |= (*str++ & 0xf) << 4;
2628 if (l & 1) bits >>= 4;
fc241834 2629 else {
f337b084
TH
2630 PUSH_BYTE(utf8, cur, bits);
2631 bits = 0;
a6ec74c1 2632 }
fc241834 2633 }
f337b084
TH
2634 l--;
2635 if (l & 1) {
2636 PUSH_BYTE(utf8, cur, bits);
2637 l++;
2638 }
2639 /* Determine how many chars are left in the requested field */
2640 l /= 2;
2641 if (howlen == e_star) field_len = 0;
2642 else field_len -= l;
2643 Zero(cur, field_len, char);
2644 cur += field_len;
2645 break;
fc241834
RGS
2646 }
2647 case 'c':
f337b084
TH
2648 while (len-- > 0) {
2649 IV aiv;
2650 fromstr = NEXTFROM;
2651 aiv = SvIV(fromstr);
a2a5de95
NC
2652 if ((-128 > aiv || aiv > 127))
2653 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2654 "Character in 'c' format wrapped in pack");
585ec06d 2655 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2656 }
2657 break;
2658 case 'C':
f337b084
TH
2659 if (len == 0) {
2660 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2661 break;
2662 }
a6ec74c1 2663 while (len-- > 0) {
f337b084 2664 IV aiv;
a6ec74c1 2665 fromstr = NEXTFROM;
f337b084 2666 aiv = SvIV(fromstr);
a2a5de95
NC
2667 if ((0 > aiv || aiv > 0xff))
2668 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2669 "Character in 'C' format wrapped in pack");
1651fc44 2670 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2671 }
fc241834
RGS
2672 break;
2673 case 'W': {
2674 char *end;
670f1322 2675 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2676
2677 end = start+SvLEN(cat)-1;
2678 if (utf8) end -= UTF8_MAXLEN-1;
2679 while (len-- > 0) {
2680 UV auv;
2681 fromstr = NEXTFROM;
2682 auv = SvUV(fromstr);
2683 if (in_bytes) auv = auv % 0x100;
2684 if (utf8) {
2685 W_utf8:
2686 if (cur > end) {
2687 *cur = '\0';
b162af07 2688 SvCUR_set(cat, cur - start);
fc241834
RGS
2689
2690 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2691 end = start+SvLEN(cat)-UTF8_MAXLEN;
2692 }
230e1fce
NC
2693 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2694 NATIVE_TO_UNI(auv),
041457d9 2695 warn_utf8 ?
230e1fce 2696 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2697 } else {
2698 if (auv >= 0x100) {
2699 if (!SvUTF8(cat)) {
2700 *cur = '\0';
b162af07 2701 SvCUR_set(cat, cur - start);
fc241834
RGS
2702 marked_upgrade(aTHX_ cat, symptr);
2703 lookahead.flags |= FLAG_DO_UTF8;
2704 lookahead.strbeg = symptr->strbeg;
2705 utf8 = 1;
2706 start = SvPVX(cat);
2707 cur = start + SvCUR(cat);
2708 end = start+SvLEN(cat)-UTF8_MAXLEN;
2709 goto W_utf8;
2710 }
a2a5de95
NC
2711 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2712 "Character in 'W' format wrapped in pack");
fc241834
RGS
2713 auv &= 0xff;
2714 }
2715 if (cur >= end) {
2716 *cur = '\0';
b162af07 2717 SvCUR_set(cat, cur - start);
fc241834
RGS
2718 GROWING(0, cat, start, cur, len+1);
2719 end = start+SvLEN(cat)-1;
2720 }
fe2774ed 2721 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2722 }
2723 }
2724 break;
fc241834
RGS
2725 }
2726 case 'U': {
2727 char *end;
2728
2729 if (len == 0) {
2730 if (!(symptr->flags & FLAG_DO_UTF8)) {
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2734 }
2735 utf8 = 0;
2736 goto no_change;
2737 }
2738
2739 end = start+SvLEN(cat);
2740 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2741 while (len-- > 0) {
fc241834 2742 UV auv;
a6ec74c1 2743 fromstr = NEXTFROM;
fc241834
RGS
2744 auv = SvUV(fromstr);
2745 if (utf8) {
230e1fce 2746 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2747 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2748 warn_utf8 ?
fc241834
RGS
2749 0 : UNICODE_ALLOW_ANY);
2750 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2751 *cur = '\0';
b162af07 2752 SvCUR_set(cat, cur - start);
fc241834
RGS
2753 GROWING(0, cat, start, cur,
2754 len+(endb-buffer)*UTF8_EXPAND);
2755 end = start+SvLEN(cat);
2756 }
64844641 2757 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2758 } else {
2759 if (cur >= end) {
2760 *cur = '\0';
b162af07 2761 SvCUR_set(cat, cur - start);
fc241834
RGS
2762 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2763 end = start+SvLEN(cat)-UTF8_MAXLEN;
2764 }
230e1fce 2765 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2766 warn_utf8 ?
230e1fce 2767 0 : UNICODE_ALLOW_ANY);
fc241834 2768 }
a6ec74c1 2769 }
a6ec74c1 2770 break;
fc241834 2771 }
a6ec74c1
JH
2772 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2773 case 'f':
a6ec74c1 2774 while (len-- > 0) {
f337b084
TH
2775 float afloat;
2776 NV anv;
a6ec74c1 2777 fromstr = NEXTFROM;
f337b084 2778 anv = SvNV(fromstr);
85bba25f 2779# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2780 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2781 * on Alpha; fake it if we don't have them.
2782 */
f337b084 2783 if (anv > FLT_MAX)
fc241834 2784 afloat = FLT_MAX;
f337b084 2785 else if (anv < -FLT_MAX)
fc241834 2786 afloat = -FLT_MAX;
f337b084 2787 else afloat = (float)anv;
baf3cf9c 2788# else
f337b084 2789 afloat = (float)anv;
baf3cf9c 2790# endif
1109a392 2791 DO_BO_PACK_N(afloat, float);
f337b084 2792 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2793 }
2794 break;
2795 case 'd':
a6ec74c1 2796 while (len-- > 0) {
f337b084
TH
2797 double adouble;
2798 NV anv;
a6ec74c1 2799 fromstr = NEXTFROM;
f337b084 2800 anv = SvNV(fromstr);
85bba25f 2801# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2802 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2803 * on Alpha; fake it if we don't have them.
2804 */
f337b084 2805 if (anv > DBL_MAX)
fc241834 2806 adouble = DBL_MAX;
f337b084 2807 else if (anv < -DBL_MAX)
fc241834 2808 adouble = -DBL_MAX;
f337b084 2809 else adouble = (double)anv;
baf3cf9c 2810# else
f337b084 2811 adouble = (double)anv;
baf3cf9c 2812# endif
1109a392 2813 DO_BO_PACK_N(adouble, double);
f337b084 2814 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2815 }
2816 break;
fc241834 2817 case 'F': {
275663fa 2818 NV_bytes anv;
1109a392 2819 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2820 while (len-- > 0) {
2821 fromstr = NEXTFROM;
cd07c537
DM
2822#ifdef __GNUC__
2823 /* to work round a gcc/x86 bug; don't use SvNV */
2824 anv.nv = sv_2nv(fromstr);
2825#else
275663fa 2826 anv.nv = SvNV(fromstr);
cd07c537 2827#endif
1109a392 2828 DO_BO_PACK_N(anv, NV);
275663fa 2829 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2830 }
2831 break;
fc241834 2832 }
92d41999 2833#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2834 case 'D': {
275663fa 2835 ld_bytes aldouble;
1109a392
MHM
2836 /* long doubles can have unused bits, which may be nonzero */
2837 Zero(&aldouble, 1, long double);
92d41999
JH
2838 while (len-- > 0) {
2839 fromstr = NEXTFROM;
cd07c537
DM
2840# ifdef __GNUC__
2841 /* to work round a gcc/x86 bug; don't use SvNV */
2842 aldouble.ld = (long double)sv_2nv(fromstr);
2843# else
275663fa 2844 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2845# endif
1109a392 2846 DO_BO_PACK_N(aldouble, long double);
275663fa 2847 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2848 }
2849 break;
fc241834 2850 }
92d41999 2851#endif
068bd2e7 2852 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2853 case 'n':
2854 while (len-- > 0) {
f337b084 2855 I16 ai16;
a6ec74c1 2856 fromstr = NEXTFROM;
ef108786 2857 ai16 = (I16)SvIV(fromstr);
ef108786 2858 ai16 = PerlSock_htons(ai16);
f337b084 2859 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2860 }
2861 break;
068bd2e7 2862 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2863 case 'v':
2864 while (len-- > 0) {
f337b084 2865 I16 ai16;
a6ec74c1 2866 fromstr = NEXTFROM;
ef108786 2867 ai16 = (I16)SvIV(fromstr);
ef108786 2868 ai16 = htovs(ai16);
f337b084 2869 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2870 }
2871 break;
49704364 2872 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2873#if SHORTSIZE != SIZE16
fc241834 2874 while (len-- > 0) {
f337b084 2875 unsigned short aushort;
fc241834
RGS
2876 fromstr = NEXTFROM;
2877 aushort = SvUV(fromstr);
2878 DO_BO_PACK(aushort, s);
f337b084 2879 PUSH_VAR(utf8, cur, aushort);
fc241834 2880 }
49704364
WL
2881 break;
2882#else
2883 /* Fall through! */
a6ec74c1 2884#endif
49704364 2885 case 'S':
fc241834 2886 while (len-- > 0) {
f337b084 2887 U16 au16;
fc241834
RGS
2888 fromstr = NEXTFROM;
2889 au16 = (U16)SvUV(fromstr);
2890 DO_BO_PACK(au16, 16);
f337b084 2891 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2892 }
2893 break;
49704364 2894 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2895#if SHORTSIZE != SIZE16
fc241834 2896 while (len-- > 0) {
f337b084 2897 short ashort;
fc241834
RGS
2898 fromstr = NEXTFROM;
2899 ashort = SvIV(fromstr);
2900 DO_BO_PACK(ashort, s);
f337b084 2901 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2902 }
49704364
WL
2903 break;
2904#else
2905 /* Fall through! */
a6ec74c1 2906#endif
49704364
WL
2907 case 's':
2908 while (len-- > 0) {
f337b084 2909 I16 ai16;
49704364 2910 fromstr = NEXTFROM;
ef108786
MHM
2911 ai16 = (I16)SvIV(fromstr);
2912 DO_BO_PACK(ai16, 16);
f337b084 2913 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2914 }
2915 break;
2916 case 'I':
49704364 2917 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2918 while (len-- > 0) {
f337b084 2919 unsigned int auint;
a6ec74c1
JH
2920 fromstr = NEXTFROM;
2921 auint = SvUV(fromstr);
1109a392 2922 DO_BO_PACK(auint, i);
f337b084 2923 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2924 }
2925 break;
92d41999
JH
2926 case 'j':
2927 while (len-- > 0) {
f337b084 2928 IV aiv;
92d41999
JH
2929 fromstr = NEXTFROM;
2930 aiv = SvIV(fromstr);
1109a392
MHM
2931#if IVSIZE == INTSIZE
2932 DO_BO_PACK(aiv, i);
2933#elif IVSIZE == LONGSIZE
2934 DO_BO_PACK(aiv, l);
2935#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2936 DO_BO_PACK(aiv, 64);
f337b084
TH
2937#else
2938 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 2939#endif
f337b084 2940 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2941 }
2942 break;
2943 case 'J':
2944 while (len-- > 0) {
f337b084 2945 UV auv;
92d41999
JH
2946 fromstr = NEXTFROM;
2947 auv = SvUV(fromstr);
1109a392
MHM
2948#if UVSIZE == INTSIZE
2949 DO_BO_PACK(auv, i);
2950#elif UVSIZE == LONGSIZE
2951 DO_BO_PACK(auv, l);
2952#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2953 DO_BO_PACK(auv, 64);
f337b084
TH
2954#else
2955 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 2956#endif
f337b084 2957 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2958 }
2959 break;
a6ec74c1
JH
2960 case 'w':
2961 while (len-- > 0) {
f337b084 2962 NV anv;
a6ec74c1 2963 fromstr = NEXTFROM;
15e9f109 2964 anv = SvNV(fromstr);
a6ec74c1 2965
f337b084
TH
2966 if (anv < 0) {
2967 *cur = '\0';
b162af07 2968 SvCUR_set(cat, cur - start);
49704364 2969 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2970 }
a6ec74c1 2971
196b62db
NC
2972 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2973 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2974 any negative IVs will have already been got by the croak()
2975 above. IOK is untrue for fractions, so we test them
2976 against UV_MAX_P1. */
f337b084
TH
2977 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2978 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2979 char *in = buf + sizeof(buf);
196b62db 2980 UV auv = SvUV(fromstr);
a6ec74c1
JH
2981
2982 do {
eb160463 2983 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2984 auv >>= 7;
2985 } while (auv);
2986 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2987 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2988 in, (buf + sizeof(buf)) - in);
2989 } else if (SvPOKp(fromstr))
2990 goto w_string;
a6ec74c1 2991 else if (SvNOKp(fromstr)) {
0258719b 2992 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2993 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2994 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2995 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2996 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2997 Some C compilers are strict about integral constant
2998 expressions so we conservatively divide by a slightly
2999 smaller integer instead of multiplying by the exact
3000 floating-point value.
0258719b
NC
3001 */
3002#ifdef NV_MAX_10_EXP
f337b084 3003 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3004 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3005#else
f337b084 3006 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3007 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3008#endif
a6ec74c1
JH
3009 char *in = buf + sizeof(buf);
3010
8b6e33c7 3011 anv = Perl_floor(anv);
a6ec74c1 3012 do {
8b6e33c7 3013 const NV next = Perl_floor(anv / 128);
a6ec74c1 3014 if (in <= buf) /* this cannot happen ;-) */
49704364 3015 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3016 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3017 anv = next;
3018 } while (anv > 0);
a6ec74c1 3019 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3020 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3021 in, (buf + sizeof(buf)) - in);
3022 } else {
8b6e33c7
AL
3023 const char *from;
3024 char *result, *in;
735b914b
JH
3025 SV *norm;
3026 STRLEN len;
3027 bool done;
3028
f337b084 3029 w_string:
735b914b 3030 /* Copy string and check for compliance */
349d4f2f 3031 from = SvPV_const(fromstr, len);
735b914b 3032 if ((norm = is_an_int(from, len)) == NULL)
49704364 3033 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3034
a02a5408 3035 Newx(result, len, char);
735b914b
JH
3036 in = result + len;
3037 done = FALSE;
f337b084 3038 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3039 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3040 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3041 in, (result + len) - in);
735b914b
JH
3042 Safefree(result);
3043 SvREFCNT_dec(norm); /* free norm */
fc241834 3044 }
a6ec74c1
JH
3045 }
3046 break;
3047 case 'i':
49704364 3048 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3049 while (len-- > 0) {
f337b084 3050 int aint;
a6ec74c1
JH
3051 fromstr = NEXTFROM;
3052 aint = SvIV(fromstr);
1109a392 3053 DO_BO_PACK(aint, i);
f337b084 3054 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3055 }
3056 break;
068bd2e7 3057 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3058 case 'N':
3059 while (len-- > 0) {
f337b084 3060 U32 au32;
a6ec74c1 3061 fromstr = NEXTFROM;
ef108786 3062 au32 = SvUV(fromstr);
ef108786 3063 au32 = PerlSock_htonl(au32);
f337b084 3064 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3065 }
3066 break;
068bd2e7 3067 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3068 case 'V':
3069 while (len-- > 0) {
f337b084 3070 U32 au32;
a6ec74c1 3071 fromstr = NEXTFROM;
ef108786 3072 au32 = SvUV(fromstr);
ef108786 3073 au32 = htovl(au32);
f337b084 3074 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3075 }
3076 break;
49704364 3077 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3078#if LONGSIZE != SIZE32
fc241834 3079 while (len-- > 0) {
f337b084 3080 unsigned long aulong;
fc241834
RGS
3081 fromstr = NEXTFROM;
3082 aulong = SvUV(fromstr);
3083 DO_BO_PACK(aulong, l);
f337b084 3084 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3085 }
49704364
WL
3086 break;
3087#else
3088 /* Fall though! */
a6ec74c1 3089#endif
49704364 3090 case 'L':
fc241834 3091 while (len-- > 0) {
f337b084 3092 U32 au32;
fc241834
RGS
3093 fromstr = NEXTFROM;
3094 au32 = SvUV(fromstr);
3095 DO_BO_PACK(au32, 32);
f337b084 3096 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3097 }
3098 break;
49704364 3099 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3100#if LONGSIZE != SIZE32
fc241834 3101 while (len-- > 0) {
f337b084 3102 long along;
fc241834
RGS
3103 fromstr = NEXTFROM;
3104 along = SvIV(fromstr);
3105 DO_BO_PACK(along, l);
f337b084 3106 PUSH_VAR(utf8, cur, along);
a6ec74c1 3107 }
49704364
WL
3108 break;
3109#else
3110 /* Fall though! */
a6ec74c1 3111#endif
49704364
WL
3112 case 'l':
3113 while (len-- > 0) {
f337b084 3114 I32 ai32;
49704364 3115 fromstr = NEXTFROM;
ef108786
MHM
3116 ai32 = SvIV(fromstr);
3117 DO_BO_PACK(ai32, 32);
f337b084 3118 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3119 }
3120 break;
3121#ifdef HAS_QUAD
3122 case 'Q':
3123 while (len-- > 0) {
f337b084 3124 Uquad_t auquad;
a6ec74c1 3125 fromstr = NEXTFROM;
f337b084 3126 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3127 DO_BO_PACK(auquad, 64);
f337b084 3128 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3129 }
3130 break;
3131 case 'q':
3132 while (len-- > 0) {
f337b084 3133 Quad_t aquad;
a6ec74c1
JH
3134 fromstr = NEXTFROM;
3135 aquad = (Quad_t)SvIV(fromstr);
1109a392 3136 DO_BO_PACK(aquad, 64);
f337b084 3137 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3138 }
3139 break;
f337b084 3140#endif /* HAS_QUAD */
a6ec74c1
JH
3141 case 'P':
3142 len = 1; /* assume SV is correct length */
f337b084 3143 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3144 /* Fall through! */
a6ec74c1
JH
3145 case 'p':
3146 while (len-- > 0) {
83003860 3147 const char *aptr;
f337b084 3148
a6ec74c1 3149 fromstr = NEXTFROM;
28a4f200
TH
3150 SvGETMAGIC(fromstr);
3151 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3152 else {
a6ec74c1
JH
3153 /* XXX better yet, could spirit away the string to
3154 * a safe spot and hang on to it until the result
3155 * of pack() (and all copies of the result) are
3156 * gone.
3157 */
041457d9 3158 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3159 !SvREADONLY(fromstr)))) {
3160 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3161 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3162 }
3163 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3164 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3165 else
2596d9fe 3166 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3167 }
07409e01 3168 DO_BO_PACK_PC(aptr);
f337b084 3169 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3170 }
3171 break;
fc241834 3172 case 'u': {
f7fe979e 3173 const char *aptr, *aend;
fc241834 3174 bool from_utf8;
f337b084 3175
a6ec74c1 3176 fromstr = NEXTFROM;
fc241834
RGS
3177 if (len <= 2) len = 45;
3178 else len = len / 3 * 3;
3179 if (len >= 64) {
a2a5de95
NC
3180 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3181 "Field too wide in 'u' format in pack");
fc241834
RGS
3182 len = 63;
3183 }
83003860 3184 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3185 from_utf8 = DO_UTF8(fromstr);
3186 if (from_utf8) {
3187 aend = aptr + fromlen;
3f63b0e5 3188 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3189 } else aend = NULL; /* Unused, but keep compilers happy */
3190 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3191 while (fromlen > 0) {
fc241834 3192 U8 *end;
a6ec74c1 3193 I32 todo;
fc241834 3194 U8 hunk[1+63/3*4+1];
a6ec74c1 3195
eb160463 3196 if ((I32)fromlen > len)
a6ec74c1
JH
3197 todo = len;
3198 else
3199 todo = fromlen;
fc241834
RGS
3200 if (from_utf8) {
3201 char buffer[64];
3202 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3203 'u' | TYPE_IS_PACK)) {
3204 *cur = '\0';
b162af07 3205 SvCUR_set(cat, cur - start);
5637ef5b
NC
3206 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3207 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3208 aptr, aend, buffer, (long) todo);
fc241834
RGS
3209 }
3210 end = doencodes(hunk, buffer, todo);
3211 } else {
3212 end = doencodes(hunk, aptr, todo);
3213 aptr += todo;
3214 }
3215 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3216 fromlen -= todo;
3217 }
a6ec74c1
JH
3218 break;
3219 }
f337b084
TH
3220 }
3221 *cur = '\0';
b162af07 3222 SvCUR_set(cat, cur - start);
f337b084 3223 no_change:
49704364 3224 *symptr = lookahead;
a6ec74c1 3225 }
49704364 3226 return beglist;
18529408
IZ
3227}
3228#undef NEXTFROM
3229
3230
3231PP(pp_pack)
3232{
97aff369 3233 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3234 SV *cat = TARG;
18529408 3235 STRLEN fromlen;
349d4f2f 3236 SV *pat_sv = *++MARK;
eb578fdb
KW
3237 const char *pat = SvPV_const(pat_sv, fromlen);
3238 const char *patend = pat + fromlen;
18529408
IZ
3239
3240 MARK++;
76f68e9b 3241 sv_setpvs(cat, "");
f337b084 3242 SvUTF8_off(cat);
18529408 3243
7accc089 3244 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3245
a6ec74c1
JH
3246 SvSETMAGIC(cat);
3247 SP = ORIGMARK;
3248 PUSHs(cat);
3249 RETURN;
3250}
a6ec74c1 3251
73cb7263
NC
3252/*
3253 * Local variables:
3254 * c-indentation-style: bsd
3255 * c-basic-offset: 4
14d04a33 3256 * indent-tabs-mode: nil
73cb7263
NC
3257 * End:
3258 *
14d04a33 3259 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3260 */