This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Eliminate the definitions for DO_BO_PACK_P and DO_BO_UNPACK_P.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132/* Only to be used inside a loop (see the break) */
133#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (utf8) { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 } else { \
137 Copy(s, OFF16(p), SIZE16, char); \
138 (s) += SIZE16; \
139 } \
140} STMT_END
141
142/* Only to be used inside a loop (see the break) */
143#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (utf8) { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 } else { \
147 Copy(s, OFF32(p), SIZE32, char); \
148 (s) += SIZE32; \
149 } \
150} STMT_END
151
152#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
154
155/* Only to be used inside a loop (see the break) */
275663fa 156#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
157STMT_START { \
158 if (utf8) { \
f337b084 159 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 160 (char *) (buf), len, datumtype)) break; \
08ca2aa3 161 } else { \
275663fa
TC
162 Copy(s, (char *) (buf), len, char); \
163 s += len; \
08ca2aa3 164 } \
08ca2aa3
TH
165} STMT_END
166
275663fa
TC
167#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
169
f337b084 170#define PUSH_VAR(utf8, aptr, var) \
230e1fce 171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 172
49704364
WL
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
66c611c5 176/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
177#define FLAG_WAS_UTF8 0x40
178#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 179#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 180#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
181#define FLAG_SLASH 0x04
182#define FLAG_COMMA 0x02
183#define FLAG_PACK 0x01
184
a6ec74c1
JH
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188 STRLEN len;
189 char *s = SvPV(sv, len);
190 char *t;
a6ec74c1 191
7918f24d
NC
192 PERL_ARGS_ASSERT_MUL128;
193
a6ec74c1 194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 195 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
196
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
199 sv = tmpNew;
200 s = SvPV(sv, len);
201 }
202 t = s + len - 1;
203 while (!*t) /* trailing '\0'? */
204 t--;
205 while (t > s) {
f7fe979e 206 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
207 *(t--) = '0' + (char)(i % 10);
208 m = (char)(i / 10);
a6ec74c1
JH
209 }
210 return (sv);
211}
212
213/* Explosives and implosives. */
214
215#if 'I' == 73 && 'J' == 74
216/* On an ASCII/ISO kind of system */
217#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
218#else
219/*
220 Some other sort of character set - use memchr() so we don't match
221 the null byte.
222 */
223#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
224#endif
225
66c611c5 226/* type modifiers */
62f95557 227#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
228#define TYPE_IS_BIG_ENDIAN 0x200
229#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 230#define TYPE_IS_PACK 0x800
1109a392 231#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 232#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
233#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
234
7212898e
NC
235# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
237
238# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
239
240# define DO_BO_UNPACK(var, type) \
1109a392 241 STMT_START { \
66c611c5 242 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
243 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
244 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
245 default: break; \
246 } \
247 } STMT_END
248
7212898e 249# define DO_BO_PACK(var, type) \
1109a392 250 STMT_START { \
66c611c5 251 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
252 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
253 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
254 default: break; \
255 } \
256 } STMT_END
257
07409e01 258# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 259 STMT_START { \
66c611c5 260 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 261 case TYPE_IS_BIG_ENDIAN: \
07409e01 262 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
263 break; \
264 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 265 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
266 break; \
267 default: \
268 break; \
269 } \
270 } STMT_END
271
07409e01 272# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 273 STMT_START { \
66c611c5 274 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 275 case TYPE_IS_BIG_ENDIAN: \
07409e01 276 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
277 break; \
278 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 279 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
280 break; \
281 default: \
282 break; \
283 } \
284 } STMT_END
285
7212898e 286# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
287 STMT_START { \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
292 break; \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# if PTRSIZE == INTSIZE
07409e01
NC
303# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
304# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 305# elif PTRSIZE == LONGSIZE
fa58a56f 306# if LONGSIZE < IVSIZE && IVSIZE == 8
fa58a56f
S
307# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
308# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
309# else
fa58a56f
S
310# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
311# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
312# endif
2b00a750 313# elif PTRSIZE == IVSIZE
2b00a750
JD
314# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
315# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e 316# else
45f723e1
NC
317# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
318# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 319# endif
1109a392 320
7212898e 321# if defined(my_htolen) && defined(my_letohn) && \
1109a392 322 defined(my_htoben) && defined(my_betohn)
7212898e 323# define DO_BO_UNPACK_N(var, type) \
1109a392 324 STMT_START { \
66c611c5 325 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
326 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
327 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
328 default: break; \
329 } \
330 } STMT_END
331
7212898e 332# define DO_BO_PACK_N(var, type) \
1109a392 333 STMT_START { \
66c611c5 334 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
335 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
336 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
337 default: break; \
338 } \
339 } STMT_END
7212898e
NC
340# else
341# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
342# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
343# endif
344
78d46eaa 345#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 346#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
347#define PACK_SIZE_MASK 0x3F
348
298bc19c 349#include "packsizetables.c"
78d46eaa 350
08ca2aa3 351STATIC U8
f7fe979e 352uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 353{
08ca2aa3 354 STRLEN retlen;
0bcc34c2 355 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 357 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
358 warnings), but these two mean we make no progress in the string and
359 might enter an infinite loop */
360 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
361 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
362 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 363 if (val >= 0x100) {
a2a5de95
NC
364 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
365 "Character in '%c' format wrapped in unpack",
366 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
367 val &= 0xff;
368 }
369 *s += retlen;
fe2774ed 370 return (U8)val;
08ca2aa3
TH
371}
372
f337b084
TH
373#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
374 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
375 *(U8 *)(s)++)
376
377STATIC bool
f7fe979e 378uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
379{
380 UV val;
381 STRLEN retlen;
f7fe979e 382 const char *from = *s;
08ca2aa3 383 int bad = 0;
f7fe979e 384 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
385 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
386 for (;buf_len > 0; buf_len--) {
387 if (from >= end) return FALSE;
f337b084 388 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
389 if (retlen == (STRLEN) -1 || retlen == 0) {
390 from += UTF8SKIP(from);
391 bad |= 1;
392 } else from += retlen;
393 if (val >= 0x100) {
394 bad |= 2;
395 val &= 0xff;
396 }
fe2774ed 397 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
398 }
399 /* We have enough characters for the buffer. Did we have problems ? */
400 if (bad) {
401 if (bad & 1) {
402 /* Rewalk the string fragment while warning */
f7fe979e 403 const char *ptr;
9e27e96a 404 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
405 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
406 if (ptr >= end) break;
f337b084 407 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
408 }
409 if (from > end) from = end;
410 }
a2a5de95
NC
411 if ((bad & 2))
412 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 413 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
414 "Character(s) in '%c' format wrapped in %s",
415 (int) TYPE_NO_MODIFIERS(datumtype),
416 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
417 }
418 *s = from;
419 return TRUE;
420}
421
422STATIC bool
f7fe979e 423next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 424{
97aff369 425 dVAR;
08ca2aa3 426 STRLEN retlen;
0bcc34c2 427 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
428 if (val >= 0x100 || !ISUUCHAR(val) ||
429 retlen == (STRLEN) -1 || retlen == 0) {
430 *out = 0;
431 return FALSE;
432 }
433 *out = PL_uudmap[val] & 077;
f337b084 434 *s += retlen;
08ca2aa3
TH
435 return TRUE;
436}
78d46eaa 437
64844641 438STATIC char *
14333449 439S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 440 const U8 * const end = start + len;
64844641 441
7918f24d
NC
442 PERL_ARGS_ASSERT_BYTES_TO_UNI;
443
f337b084 444 while (start < end) {
48fa4626
TS
445 const UV uv = NATIVE_TO_ASCII(*start);
446 if (UNI_IS_INVARIANT(uv))
447 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
448 else {
449 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
450 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
451 }
452 start++;
453 }
64844641 454 return dest;
f337b084
TH
455}
456
230e1fce
NC
457#define PUSH_BYTES(utf8, cur, buf, len) \
458STMT_START { \
64844641
AL
459 if (utf8) \
460 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
461 else { \
462 Copy(buf, cur, len, char); \
463 (cur) += (len); \
464 } \
f337b084
TH
465} STMT_END
466
467#define GROWING(utf8, cat, start, cur, in_len) \
468STMT_START { \
469 STRLEN glen = (in_len); \
3473cf63 470 if (utf8) glen *= UTF8_EXPAND; \
f337b084 471 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 472 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
473 (cur) = (start) + SvCUR(cat); \
474 } \
475} STMT_END
476
477#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
478STMT_START { \
f7fe979e 479 const STRLEN glen = (in_len); \
f337b084 480 STRLEN gl = glen; \
3473cf63 481 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
482 if ((cur) + gl >= (start) + SvLEN(cat)) { \
483 *cur = '\0'; \
b162af07 484 SvCUR_set((cat), (cur) - (start)); \
0bd48802 485 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
486 (cur) = (start) + SvCUR(cat); \
487 } \
488 PUSH_BYTES(utf8, cur, buf, glen); \
489} STMT_END
490
491#define PUSH_BYTE(utf8, s, byte) \
492STMT_START { \
493 if (utf8) { \
f7fe979e 494 const U8 au8 = (byte); \
64844641 495 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
496 } else *(U8 *)(s)++ = (byte); \
497} STMT_END
498
499/* Only to be used inside a loop (see the break) */
500#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
501STMT_START { \
502 STRLEN retlen; \
503 if (str >= end) break; \
504 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
505 if (retlen == (STRLEN) -1 || retlen == 0) { \
506 *cur = '\0'; \
507 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
508 } \
509 str += retlen; \
510} STMT_END
511
f7fe979e
AL
512static const char *_action( const tempsym_t* symptr )
513{
10edeb5d 514 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
515}
516
206947d2 517/* Returns the sizeof() struct described by pat */
028d1f6d 518STATIC I32
f337b084 519S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 520{
f337b084 521 I32 total = 0;
206947d2 522
7918f24d
NC
523 PERL_ARGS_ASSERT_MEASURE_STRUCT;
524
49704364 525 while (next_symbol(symptr)) {
f337b084 526 I32 len;
f7fe979e 527 int size;
f337b084
TH
528
529 switch (symptr->howlen) {
fc241834 530 case e_star:
49704364 531 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 532 _action( symptr ) );
49704364 533 break;
f337b084
TH
534 default:
535 /* e_no_len and e_number */
536 len = symptr->length;
537 break;
49704364
WL
538 }
539
a7a3cfaa 540 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 541 if (!size) {
f7fe979e 542 int star;
80a13697
NC
543 /* endianness doesn't influence the size of a type */
544 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
545 default:
546 Perl_croak(aTHX_ "Invalid type '%c' in %s",
547 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 548 _action( symptr ) );
28be1210
TH
549 case '.' | TYPE_IS_SHRIEKING:
550 case '@' | TYPE_IS_SHRIEKING:
80a13697 551 case '@':
28be1210 552 case '.':
80a13697
NC
553 case '/':
554 case 'U': /* XXXX Is it correct? */
555 case 'w':
556 case 'u':
557 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 558 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 559 _action( symptr ) );
80a13697
NC
560 case '%':
561 size = 0;
562 break;
563 case '(':
fc241834
RGS
564 {
565 tempsym_t savsym = *symptr;
566 symptr->patptr = savsym.grpbeg;
567 symptr->patend = savsym.grpend;
568 /* XXXX Theoretically, we need to measure many times at
569 different positions, since the subexpression may contain
570 alignment commands, but be not of aligned length.
571 Need to detect this and croak(). */
572 size = measure_struct(symptr);
573 *symptr = savsym;
574 break;
575 }
80a13697
NC
576 case 'X' | TYPE_IS_SHRIEKING:
577 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
578 */
579 if (!len) /* Avoid division by 0 */
580 len = 1;
581 len = total % len; /* Assumed: the start is aligned. */
582 /* FALL THROUGH */
583 case 'X':
584 size = -1;
585 if (total < len)
f7fe979e 586 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
587 break;
588 case 'x' | TYPE_IS_SHRIEKING:
589 if (!len) /* Avoid division by 0 */
590 len = 1;
591 star = total % len; /* Assumed: the start is aligned. */
592 if (star) /* Other portable ways? */
593 len = len - star;
594 else
595 len = 0;
596 /* FALL THROUGH */
597 case 'x':
598 case 'A':
599 case 'Z':
600 case 'a':
80a13697
NC
601 size = 1;
602 break;
603 case 'B':
604 case 'b':
605 len = (len + 7)/8;
606 size = 1;
607 break;
608 case 'H':
609 case 'h':
610 len = (len + 1)/2;
611 size = 1;
612 break;
78d46eaa 613
80a13697
NC
614 case 'P':
615 len = 1;
616 size = sizeof(char*);
78d46eaa
NC
617 break;
618 }
206947d2
IZ
619 }
620 total += len * size;
621 }
622 return total;
623}
624
49704364
WL
625
626/* locate matching closing parenthesis or bracket
627 * returns char pointer to char after match, or NULL
628 */
f7fe979e 629STATIC const char *
5aaab254 630S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 631{
7918f24d
NC
632 PERL_ARGS_ASSERT_GROUP_END;
633
49704364 634 while (patptr < patend) {
f7fe979e 635 const char c = *patptr++;
49704364
WL
636
637 if (isSPACE(c))
638 continue;
639 else if (c == ender)
640 return patptr-1;
641 else if (c == '#') {
642 while (patptr < patend && *patptr != '\n')
643 patptr++;
644 continue;
645 } else if (c == '(')
646 patptr = group_end(patptr, patend, ')') + 1;
647 else if (c == '[')
648 patptr = group_end(patptr, patend, ']') + 1;
18529408 649 }
49704364
WL
650 Perl_croak(aTHX_ "No group ending character '%c' found in template",
651 ender);
652 return 0;
18529408
IZ
653}
654
49704364
WL
655
656/* Convert unsigned decimal number to binary.
657 * Expects a pointer to the first digit and address of length variable
658 * Advances char pointer to 1st non-digit char and returns number
fc241834 659 */
f7fe979e 660STATIC const char *
5aaab254 661S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
662{
663 I32 len = *patptr++ - '0';
7918f24d
NC
664
665 PERL_ARGS_ASSERT_GET_NUM;
666
49704364
WL
667 while (isDIGIT(*patptr)) {
668 if (len >= 0x7FFFFFFF/10)
669 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
670 len = (len * 10) + (*patptr++ - '0');
671 }
672 *lenptr = len;
673 return patptr;
674}
675
676/* The marvellous template parsing routine: Using state stored in *symptr,
677 * locates next template code and count
678 */
679STATIC bool
f337b084 680S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 681{
f7fe979e 682 const char* patptr = symptr->patptr;
0bcc34c2 683 const char* const patend = symptr->patend;
49704364 684
7918f24d
NC
685 PERL_ARGS_ASSERT_NEXT_SYMBOL;
686
49704364
WL
687 symptr->flags &= ~FLAG_SLASH;
688
689 while (patptr < patend) {
690 if (isSPACE(*patptr))
691 patptr++;
692 else if (*patptr == '#') {
693 patptr++;
694 while (patptr < patend && *patptr != '\n')
695 patptr++;
696 if (patptr < patend)
697 patptr++;
698 } else {
fc241834 699 /* We should have found a template code */
49704364 700 I32 code = *patptr++ & 0xFF;
66c611c5 701 U32 inherited_modifiers = 0;
49704364
WL
702
703 if (code == ','){ /* grandfather in commas but with a warning */
704 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
705 symptr->flags |= FLAG_COMMA;
706 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 707 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
708 }
709 continue;
710 }
fc241834 711
49704364 712 /* for '(', skip to ')' */
fc241834 713 if (code == '(') {
49704364
WL
714 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
715 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 716 _action( symptr ) );
49704364
WL
717 symptr->grpbeg = patptr;
718 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
719 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
720 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 721 _action( symptr ) );
49704364
WL
722 }
723
66c611c5
MHM
724 /* look for group modifiers to inherit */
725 if (TYPE_ENDIANNESS(symptr->flags)) {
726 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
727 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
728 }
729
1109a392
MHM
730 /* look for modifiers */
731 while (patptr < patend) {
b7787f18
AL
732 const char *allowed;
733 I32 modifier;
1109a392
MHM
734 switch (*patptr) {
735 case '!':
736 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 737 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
738 break;
739 case '>':
740 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 741 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
742 break;
743 case '<':
744 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 745 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
746 break;
747 default:
b7787f18
AL
748 allowed = "";
749 modifier = 0;
1109a392
MHM
750 break;
751 }
66c611c5 752
1109a392
MHM
753 if (modifier == 0)
754 break;
66c611c5 755
1109a392
MHM
756 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
757 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 758 allowed, _action( symptr ) );
66c611c5
MHM
759
760 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 761 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 762 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
763 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
764 TYPE_ENDIANNESS_MASK)
765 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 766 *patptr, _action( symptr ) );
66c611c5 767
a2a5de95
NC
768 if ((code & modifier)) {
769 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
770 "Duplicate modifier '%c' after '%c' in %s",
771 *patptr, (int) TYPE_NO_MODIFIERS(code),
772 _action( symptr ) );
1109a392 773 }
66c611c5 774
1109a392
MHM
775 code |= modifier;
776 patptr++;
49704364
WL
777 }
778
66c611c5
MHM
779 /* inherit modifiers */
780 code |= inherited_modifiers;
781
fc241834 782 /* look for count and/or / */
49704364
WL
783 if (patptr < patend) {
784 if (isDIGIT(*patptr)) {
785 patptr = get_num( patptr, &symptr->length );
786 symptr->howlen = e_number;
787
788 } else if (*patptr == '*') {
789 patptr++;
790 symptr->howlen = e_star;
791
792 } else if (*patptr == '[') {
f7fe979e 793 const char* lenptr = ++patptr;
49704364
WL
794 symptr->howlen = e_number;
795 patptr = group_end( patptr, patend, ']' ) + 1;
796 /* what kind of [] is it? */
797 if (isDIGIT(*lenptr)) {
798 lenptr = get_num( lenptr, &symptr->length );
799 if( *lenptr != ']' )
800 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 801 _action( symptr ) );
49704364
WL
802 } else {
803 tempsym_t savsym = *symptr;
804 symptr->patend = patptr-1;
805 symptr->patptr = lenptr;
806 savsym.length = measure_struct(symptr);
807 *symptr = savsym;
808 }
809 } else {
810 symptr->howlen = e_no_len;
811 symptr->length = 1;
812 }
813
814 /* try to find / */
815 while (patptr < patend) {
816 if (isSPACE(*patptr))
817 patptr++;
818 else if (*patptr == '#') {
819 patptr++;
820 while (patptr < patend && *patptr != '\n')
821 patptr++;
822 if (patptr < patend)
823 patptr++;
824 } else {
66c611c5 825 if (*patptr == '/') {
49704364
WL
826 symptr->flags |= FLAG_SLASH;
827 patptr++;
66c611c5
MHM
828 if (patptr < patend &&
829 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 830 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 831 _action( symptr ) );
49704364
WL
832 }
833 break;
834 }
18529408 835 }
49704364
WL
836 } else {
837 /* at end - no count, no / */
838 symptr->howlen = e_no_len;
839 symptr->length = 1;
840 }
841
842 symptr->code = code;
fc241834 843 symptr->patptr = patptr;
49704364 844 return TRUE;
18529408 845 }
49704364 846 }
fc241834 847 symptr->patptr = patptr;
49704364 848 return FALSE;
18529408
IZ
849}
850
18529408 851/*
fc241834 852 There is no way to cleanly handle the case where we should process the
08ca2aa3 853 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
854 (e.g. estimates like strend-s as an upper bound for the number of
855 characters left wouldn't work). So if we foresee the need of this
856 (pattern starts with U or contains U0), we want to work on the encoded
857 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
858 themselves if they need to do a lot of unpacks like this on it
859*/
fc241834 860STATIC bool
08ca2aa3
TH
861need_utf8(const char *pat, const char *patend)
862{
863 bool first = TRUE;
7918f24d
NC
864
865 PERL_ARGS_ASSERT_NEED_UTF8;
866
08ca2aa3
TH
867 while (pat < patend) {
868 if (pat[0] == '#') {
869 pat++;
f7fe979e 870 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
871 if (!pat) return FALSE;
872 } else if (pat[0] == 'U') {
873 if (first || pat[1] == '0') return TRUE;
874 } else first = FALSE;
875 pat++;
876 }
877 return FALSE;
878}
879
880STATIC char
881first_symbol(const char *pat, const char *patend) {
7918f24d
NC
882 PERL_ARGS_ASSERT_FIRST_SYMBOL;
883
08ca2aa3
TH
884 while (pat < patend) {
885 if (pat[0] != '#') return pat[0];
886 pat++;
f7fe979e 887 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
888 if (!pat) return 0;
889 pat++;
890 }
891 return 0;
892}
893
894/*
7accc089
JH
895=for apidoc unpackstring
896
21ebfc7a
DM
897The engine implementing the unpack() Perl function.
898
899Using the template pat..patend, this function unpacks the string
900s..strend into a number of mortal SVs, which it pushes onto the perl
901argument (@_) stack (so you will need to issue a C<PUTBACK> before and
902C<SPAGAIN> after the call to this function). It returns the number of
903pushed elements.
904
905The strend and patend pointers should point to the byte following the last
906character of each string.
907
908Although this function returns its values on the perl argument stack, it
909doesn't take any parameters from that stack (and thus in particular
910there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
911example).
7accc089
JH
912
913=cut */
914
915I32
f7fe979e 916Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 917{
f7fe979e 918 tempsym_t sym;
08ca2aa3 919
7918f24d
NC
920 PERL_ARGS_ASSERT_UNPACKSTRING;
921
f337b084 922 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
923 else if (need_utf8(pat, patend)) {
924 /* We probably should try to avoid this in case a scalar context call
925 wouldn't get to the "U0" */
926 STRLEN len = strend - s;
230e1fce 927 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
928 SAVEFREEPV(s);
929 strend = s + len;
f337b084 930 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
931 }
932
f337b084
TH
933 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
934 flags |= FLAG_PARSE_UTF8;
08ca2aa3 935
f7fe979e 936 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
937
938 return unpack_rec(&sym, s, s, strend, NULL );
939}
940
4136a0f7 941STATIC I32
f7fe979e 942S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 943{
27da23d5 944 dVAR; dSP;
3297d27d 945 SV *sv = NULL;
f7fe979e 946 const I32 start_sp_offset = SP - PL_stack_base;
49704364 947 howlen_t howlen;
a6ec74c1 948 I32 checksum = 0;
92d41999 949 UV cuv = 0;
a6ec74c1 950 NV cdouble = 0.0;
f337b084 951 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 952 bool beyond = FALSE;
21c16052 953 bool explicit_length;
9e27e96a 954 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 955 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
956
957 PERL_ARGS_ASSERT_UNPACK_REC;
958
28be1210 959 symptr->strbeg = s - strbeg;
49704364 960
49704364 961 while (next_symbol(symptr)) {
a7a3cfaa 962 packprops_t props;
9e27e96a 963 I32 len;
f337b084 964 I32 datumtype = symptr->code;
206947d2 965 /* do first one only unless in list context
08ca2aa3 966 / is implemented by unpacking the count, then popping it from the
206947d2 967 stack, so must check that we're not in the middle of a / */
49704364 968 if ( unpack_only_one
206947d2 969 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 970 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 971 break;
49704364 972
f337b084 973 switch (howlen = symptr->howlen) {
fc241834
RGS
974 case e_star:
975 len = strend - strbeg; /* long enough */
49704364 976 break;
f337b084
TH
977 default:
978 /* e_no_len and e_number */
979 len = symptr->length;
980 break;
49704364 981 }
18529408 982
21c16052 983 explicit_length = TRUE;
a6ec74c1 984 redo_switch:
49704364 985 beyond = s >= strend;
a7a3cfaa
TH
986
987 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
988 if (props) {
989 /* props nonzero means we can process this letter. */
9e27e96a
AL
990 const long size = props & PACK_SIZE_MASK;
991 const long howmany = (strend - s) / size;
a7a3cfaa
TH
992 if (len > howmany)
993 len = howmany;
994
995 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
996 if (len && unpack_only_one) len = 1;
997 EXTEND(SP, len);
998 EXTEND_MORTAL(len);
78d46eaa
NC
999 }
1000 }
a7a3cfaa 1001
1109a392 1002 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1003 default:
1109a392 1004 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1005
a6ec74c1 1006 case '%':
49704364 1007 if (howlen == e_no_len)
18529408 1008 len = 16; /* len is not specified */
a6ec74c1 1009 checksum = len;
92d41999 1010 cuv = 0;
a6ec74c1 1011 cdouble = 0;
18529408 1012 continue;
a6ec74c1 1013 break;
18529408
IZ
1014 case '(':
1015 {
49704364 1016 tempsym_t savsym = *symptr;
9e27e96a 1017 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1018 symptr->flags |= group_modifiers;
49704364 1019 symptr->patend = savsym.grpend;
28be1210 1020 symptr->previous = &savsym;
49704364 1021 symptr->level++;
18529408 1022 PUTBACK;
c6f750d1 1023 if (len && unpack_only_one) len = 1;
18529408 1024 while (len--) {
49704364 1025 symptr->patptr = savsym.grpbeg;
f337b084
TH
1026 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1027 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1028 unpack_rec(symptr, s, strbeg, strend, &s);
1029 if (s == strend && savsym.howlen == e_star)
49704364 1030 break; /* No way to continue */
18529408
IZ
1031 }
1032 SPAGAIN;
28be1210 1033 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1034 *symptr = savsym;
18529408
IZ
1035 break;
1036 }
28be1210 1037 case '.' | TYPE_IS_SHRIEKING:
28be1210 1038 case '.': {
9e27e96a 1039 const char *from;
28be1210 1040 SV *sv;
9e27e96a 1041 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
1042 if (howlen == e_star) from = strbeg;
1043 else if (len <= 0) from = s;
1044 else {
1045 tempsym_t *group = symptr;
1046
1047 while (--len && group) group = group->previous;
1048 from = group ? strbeg + group->strbeg : strbeg;
1049 }
1050 sv = from <= s ?
00646304
CB
1051 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1052 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1053 mXPUSHs(sv);
28be1210
TH
1054 break;
1055 }
28be1210 1056 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 1057 case '@':
28be1210 1058 s = strbeg + symptr->strbeg;
28be1210 1059 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 1060 {
08ca2aa3
TH
1061 while (len > 0) {
1062 if (s >= strend)
1063 Perl_croak(aTHX_ "'@' outside of string in unpack");
1064 s += UTF8SKIP(s);
1065 len--;
1066 }
1067 if (s > strend)
1068 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1069 } else {
28be1210 1070 if (strend-s < len)
fc241834 1071 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1072 s += len;
08ca2aa3 1073 }
a6ec74c1 1074 break;
62f95557
IZ
1075 case 'X' | TYPE_IS_SHRIEKING:
1076 if (!len) /* Avoid division by 0 */
1077 len = 1;
08ca2aa3 1078 if (utf8) {
f7fe979e 1079 const char *hop, *last;
f337b084
TH
1080 I32 l = len;
1081 hop = last = strbeg;
1082 while (hop < s) {
1083 hop += UTF8SKIP(hop);
1084 if (--l == 0) {
08ca2aa3 1085 last = hop;
f337b084
TH
1086 l = len;
1087 }
fc241834 1088 }
f337b084
TH
1089 if (last > s)
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1091 s = last;
1092 break;
f337b084
TH
1093 }
1094 len = (s - strbeg) % len;
62f95557 1095 /* FALL THROUGH */
a6ec74c1 1096 case 'X':
08ca2aa3
TH
1097 if (utf8) {
1098 while (len > 0) {
1099 if (s <= strbeg)
1100 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1101 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1102 if (s <= strbeg)
1103 Perl_croak(aTHX_ "'X' outside of string in unpack");
1104 }
1105 len--;
1106 }
1107 } else {
fc241834
RGS
1108 if (len > s - strbeg)
1109 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1110 s -= len;
08ca2aa3 1111 }
a6ec74c1 1112 break;
9e27e96a
AL
1113 case 'x' | TYPE_IS_SHRIEKING: {
1114 I32 ai32;
62f95557
IZ
1115 if (!len) /* Avoid division by 0 */
1116 len = 1;
230e1fce
NC
1117 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1118 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1119 if (ai32 == 0) break;
1120 len -= ai32;
9e27e96a 1121 }
62f95557 1122 /* FALL THROUGH */
a6ec74c1 1123 case 'x':
08ca2aa3
TH
1124 if (utf8) {
1125 while (len>0) {
1126 if (s >= strend)
1127 Perl_croak(aTHX_ "'x' outside of string in unpack");
1128 s += UTF8SKIP(s);
1129 len--;
1130 }
1131 } else {
fc241834
RGS
1132 if (len > strend - s)
1133 Perl_croak(aTHX_ "'x' outside of string in unpack");
1134 s += len;
f337b084 1135 }
a6ec74c1
JH
1136 break;
1137 case '/':
49704364
WL
1138 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1139 break;
a6ec74c1
JH
1140 case 'A':
1141 case 'Z':
1142 case 'a':
08ca2aa3
TH
1143 if (checksum) {
1144 /* Preliminary length estimate is assumed done in 'W' */
1145 if (len > strend - s) len = strend - s;
1146 goto W_checksum;
1147 }
1148 if (utf8) {
1149 I32 l;
f7fe979e 1150 const char *hop;
08ca2aa3
TH
1151 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1152 if (hop >= strend) {
1153 if (hop > strend)
1154 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1155 break;
fc241834 1156 }
a6ec74c1 1157 }
08ca2aa3
TH
1158 if (hop > strend)
1159 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1160 len = hop - s;
1161 } else if (len > strend - s)
1162 len = strend - s;
1163
1164 if (datumtype == 'Z') {
1165 /* 'Z' strips stuff after first null */
f7fe979e 1166 const char *ptr, *end;
f337b084
TH
1167 end = s + len;
1168 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1169 sv = newSVpvn(s, ptr-s);
1170 if (howlen == e_star) /* exact for 'Z*' */
1171 len = ptr-s + (ptr != strend ? 1 : 0);
1172 } else if (datumtype == 'A') {
1173 /* 'A' strips both nulls and spaces */
f7fe979e 1174 const char *ptr;
18bdf90a
TH
1175 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1176 for (ptr = s+len-1; ptr >= s; ptr--)
1177 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1178 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1179 if (ptr >= s) ptr += UTF8SKIP(ptr);
1180 else ptr++;
28be1210 1181 if (ptr > s+len)
18bdf90a
TH
1182 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1183 } else {
1184 for (ptr = s+len-1; ptr >= s; ptr--)
1185 if (*ptr != 0 && !isSPACE(*ptr)) break;
1186 ptr++;
1187 }
08ca2aa3
TH
1188 sv = newSVpvn(s, ptr-s);
1189 } else sv = newSVpvn(s, len);
1190
1191 if (utf8) {
1192 SvUTF8_on(sv);
1193 /* Undo any upgrade done due to need_utf8() */
f337b084 1194 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1195 sv_utf8_downgrade(sv, 0);
a6ec74c1 1196 }
6e449a3a 1197 mXPUSHs(sv);
08ca2aa3 1198 s += len;
a6ec74c1
JH
1199 break;
1200 case 'B':
08ca2aa3
TH
1201 case 'b': {
1202 char *str;
49704364 1203 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1204 len = (strend - s) * 8;
1205 if (checksum) {
f337b084 1206 if (utf8)
08ca2aa3 1207 while (len >= 8 && s < strend) {
f337b084 1208 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1209 len -= 8;
1210 }
f337b084 1211 else
fc241834 1212 while (len >= 8) {
08ca2aa3 1213 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1214 len -= 8;
1215 }
08ca2aa3
TH
1216 if (len && s < strend) {
1217 U8 bits;
f337b084
TH
1218 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1219 if (datumtype == 'b')
a6ec74c1 1220 while (len-- > 0) {
92d41999 1221 if (bits & 1) cuv++;
a6ec74c1
JH
1222 bits >>= 1;
1223 }
f337b084 1224 else
a6ec74c1 1225 while (len-- > 0) {
08ca2aa3 1226 if (bits & 0x80) cuv++;
a6ec74c1
JH
1227 bits <<= 1;
1228 }
fc241834 1229 }
a6ec74c1
JH
1230 break;
1231 }
08ca2aa3 1232
561b68a9 1233 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1234 SvPOK_on(sv);
1235 str = SvPVX(sv);
1236 if (datumtype == 'b') {
f337b084 1237 U8 bits = 0;
f7fe979e 1238 const I32 ai32 = len;
08ca2aa3
TH
1239 for (len = 0; len < ai32; len++) {
1240 if (len & 7) bits >>= 1;
1241 else if (utf8) {
1242 if (s >= strend) break;
f337b084 1243 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1244 } else bits = *(U8 *) s++;
1245 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1246 }
08ca2aa3 1247 } else {
f337b084 1248 U8 bits = 0;
f7fe979e 1249 const I32 ai32 = len;
08ca2aa3
TH
1250 for (len = 0; len < ai32; len++) {
1251 if (len & 7) bits <<= 1;
1252 else if (utf8) {
1253 if (s >= strend) break;
f337b084 1254 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1255 } else bits = *(U8 *) s++;
1256 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1257 }
1258 }
1259 *str = '\0';
aa07b2f6 1260 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1261 XPUSHs(sv);
a6ec74c1 1262 break;
08ca2aa3 1263 }
a6ec74c1 1264 case 'H':
08ca2aa3 1265 case 'h': {
3297d27d 1266 char *str = NULL;
fc241834 1267 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1268 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1269 len = (strend - s) * 2;
858fe5e1
TC
1270 if (!checksum) {
1271 sv = sv_2mortal(newSV(len ? len : 1));
1272 SvPOK_on(sv);
1273 str = SvPVX(sv);
1274 }
a6ec74c1 1275 if (datumtype == 'h') {
f337b084 1276 U8 bits = 0;
9e27e96a 1277 I32 ai32 = len;
fc241834
RGS
1278 for (len = 0; len < ai32; len++) {
1279 if (len & 1) bits >>= 4;
1280 else if (utf8) {
1281 if (s >= strend) break;
f337b084 1282 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1283 } else bits = * (U8 *) s++;
858fe5e1
TC
1284 if (!checksum)
1285 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1286 }
08ca2aa3 1287 } else {
f337b084 1288 U8 bits = 0;
f7fe979e 1289 const I32 ai32 = len;
08ca2aa3
TH
1290 for (len = 0; len < ai32; len++) {
1291 if (len & 1) bits <<= 4;
1292 else if (utf8) {
1293 if (s >= strend) break;
f337b084 1294 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1295 } else bits = *(U8 *) s++;
858fe5e1
TC
1296 if (!checksum)
1297 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1298 }
1299 }
858fe5e1
TC
1300 if (!checksum) {
1301 *str = '\0';
1302 SvCUR_set(sv, str - SvPVX_const(sv));
1303 XPUSHs(sv);
1304 }
a6ec74c1 1305 break;
08ca2aa3 1306 }
1651fc44
ML
1307 case 'C':
1308 if (len == 0) {
1309 if (explicit_length)
1310 /* Switch to "character" mode */
1311 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1312 break;
1313 }
1314 /* FALL THROUGH */
a6ec74c1 1315 case 'c':
1651fc44
ML
1316 while (len-- > 0 && s < strend) {
1317 int aint;
1318 if (utf8)
1319 {
1320 STRLEN retlen;
1321 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1322 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1325 s += retlen;
1326 }
1327 else
1328 aint = *(U8 *)(s)++;
1329 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1330 aint -= 256;
08ca2aa3 1331 if (!checksum)
6e449a3a 1332 mPUSHi(aint);
73cb7263
NC
1333 else if (checksum > bits_in_uv)
1334 cdouble += (NV)aint;
1335 else
1336 cuv += aint;
a6ec74c1
JH
1337 }
1338 break;
08ca2aa3
TH
1339 case 'W':
1340 W_checksum:
1651fc44 1341 if (utf8) {
08ca2aa3 1342 while (len-- > 0 && s < strend) {
08ca2aa3 1343 STRLEN retlen;
f7fe979e 1344 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1345 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1346 if (retlen == (STRLEN) -1 || retlen == 0)
1347 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1348 s += retlen;
1349 if (!checksum)
6e449a3a 1350 mPUSHu(val);
08ca2aa3
TH
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV) val;
d6d3e8bd 1353 else
08ca2aa3 1354 cuv += val;
fc241834 1355 }
08ca2aa3 1356 } else if (!checksum)
a6ec74c1 1357 while (len-- > 0) {
f7fe979e 1358 const U8 ch = *(U8 *) s++;
6e449a3a 1359 mPUSHu(ch);
a6ec74c1 1360 }
08ca2aa3
TH
1361 else if (checksum > bits_in_uv)
1362 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1363 else
1364 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1365 break;
1366 case 'U':
35bcd338 1367 if (len == 0) {
c5333953 1368 if (explicit_length && howlen != e_star) {
08ca2aa3 1369 /* Switch to "bytes in UTF-8" mode */
f337b084 1370 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1371 else
1372 /* Should be impossible due to the need_utf8() test */
1373 Perl_croak(aTHX_ "U0 mode on a byte string");
1374 }
35bcd338
JH
1375 break;
1376 }
08ca2aa3 1377 if (len > strend - s) len = strend - s;
fc241834 1378 if (!checksum) {
08ca2aa3
TH
1379 if (len && unpack_only_one) len = 1;
1380 EXTEND(SP, len);
1381 EXTEND_MORTAL(len);
fc241834 1382 }
08ca2aa3
TH
1383 while (len-- > 0 && s < strend) {
1384 STRLEN retlen;
1385 UV auv;
1386 if (utf8) {
1387 U8 result[UTF8_MAXLEN];
f7fe979e 1388 const char *ptr = s;
08ca2aa3 1389 STRLEN len;
08ca2aa3
TH
1390 /* Bug: warns about bad utf8 even if we are short on bytes
1391 and will break out of the loop */
230e1fce
NC
1392 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1393 'U'))
08ca2aa3
TH
1394 break;
1395 len = UTF8SKIP(result);
fc241834 1396 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1397 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1398 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1399 s = ptr;
1400 } else {
cde84f2a 1401 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1402 if (retlen == (STRLEN) -1 || retlen == 0)
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1404 s += retlen;
1405 }
1406 if (!checksum)
6e449a3a 1407 mPUSHu(auv);
73cb7263 1408 else if (checksum > bits_in_uv)
08ca2aa3 1409 cdouble += (NV) auv;
73cb7263 1410 else
08ca2aa3 1411 cuv += auv;
a6ec74c1
JH
1412 }
1413 break;
49704364
WL
1414 case 's' | TYPE_IS_SHRIEKING:
1415#if SHORTSIZE != SIZE16
73cb7263 1416 while (len-- > 0) {
08ca2aa3 1417 short ashort;
f337b084
TH
1418 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1419 DO_BO_UNPACK(ashort, s);
08ca2aa3 1420 if (!checksum)
6e449a3a 1421 mPUSHi(ashort);
73cb7263
NC
1422 else if (checksum > bits_in_uv)
1423 cdouble += (NV)ashort;
1424 else
1425 cuv += ashort;
49704364
WL
1426 }
1427 break;
1428#else
1429 /* Fallthrough! */
a6ec74c1 1430#endif
49704364 1431 case 's':
73cb7263 1432 while (len-- > 0) {
08ca2aa3
TH
1433 I16 ai16;
1434
1435#if U16SIZE > SIZE16
1436 ai16 = 0;
1437#endif
f337b084 1438 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1439 DO_BO_UNPACK(ai16, 16);
1109a392 1440#if U16SIZE > SIZE16
73cb7263
NC
1441 if (ai16 > 32767)
1442 ai16 -= 65536;
a6ec74c1 1443#endif
08ca2aa3 1444 if (!checksum)
6e449a3a 1445 mPUSHi(ai16);
73cb7263
NC
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)ai16;
1448 else
1449 cuv += ai16;
a6ec74c1
JH
1450 }
1451 break;
49704364
WL
1452 case 'S' | TYPE_IS_SHRIEKING:
1453#if SHORTSIZE != SIZE16
73cb7263 1454 while (len-- > 0) {
08ca2aa3 1455 unsigned short aushort;
f337b084
TH
1456 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1457 DO_BO_UNPACK(aushort, s);
08ca2aa3 1458 if (!checksum)
6e449a3a 1459 mPUSHu(aushort);
73cb7263
NC
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)aushort;
1462 else
1463 cuv += aushort;
49704364
WL
1464 }
1465 break;
1466#else
486ec47a 1467 /* Fallthrough! */
49704364 1468#endif
a6ec74c1
JH
1469 case 'v':
1470 case 'n':
1471 case 'S':
73cb7263 1472 while (len-- > 0) {
08ca2aa3
TH
1473 U16 au16;
1474#if U16SIZE > SIZE16
1475 au16 = 0;
1476#endif
f337b084 1477 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1478 DO_BO_UNPACK(au16, 16);
a6ec74c1 1479#ifdef HAS_NTOHS
73cb7263
NC
1480 if (datumtype == 'n')
1481 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1482#endif
1483#ifdef HAS_VTOHS
73cb7263
NC
1484 if (datumtype == 'v')
1485 au16 = vtohs(au16);
a6ec74c1 1486#endif
08ca2aa3 1487 if (!checksum)
6e449a3a 1488 mPUSHu(au16);
73cb7263 1489 else if (checksum > bits_in_uv)
f337b084 1490 cdouble += (NV) au16;
73cb7263
NC
1491 else
1492 cuv += au16;
a6ec74c1
JH
1493 }
1494 break;
068bd2e7
MHM
1495 case 'v' | TYPE_IS_SHRIEKING:
1496 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1497 while (len-- > 0) {
08ca2aa3
TH
1498 I16 ai16;
1499# if U16SIZE > SIZE16
1500 ai16 = 0;
1501# endif
f337b084 1502 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1503# ifdef HAS_NTOHS
73cb7263 1504 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1505 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1506# endif /* HAS_NTOHS */
1507# ifdef HAS_VTOHS
73cb7263 1508 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1509 ai16 = (I16) vtohs((U16) ai16);
1510# endif /* HAS_VTOHS */
1511 if (!checksum)
6e449a3a 1512 mPUSHi(ai16);
73cb7263 1513 else if (checksum > bits_in_uv)
08ca2aa3 1514 cdouble += (NV) ai16;
73cb7263
NC
1515 else
1516 cuv += ai16;
068bd2e7
MHM
1517 }
1518 break;
a6ec74c1 1519 case 'i':
49704364 1520 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1521 while (len-- > 0) {
08ca2aa3 1522 int aint;
f337b084
TH
1523 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1524 DO_BO_UNPACK(aint, i);
08ca2aa3 1525 if (!checksum)
6e449a3a 1526 mPUSHi(aint);
73cb7263
NC
1527 else if (checksum > bits_in_uv)
1528 cdouble += (NV)aint;
1529 else
1530 cuv += aint;
a6ec74c1
JH
1531 }
1532 break;
1533 case 'I':
49704364 1534 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1535 while (len-- > 0) {
08ca2aa3 1536 unsigned int auint;
f337b084
TH
1537 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1538 DO_BO_UNPACK(auint, i);
08ca2aa3 1539 if (!checksum)
6e449a3a 1540 mPUSHu(auint);
73cb7263
NC
1541 else if (checksum > bits_in_uv)
1542 cdouble += (NV)auint;
1543 else
1544 cuv += auint;
a6ec74c1
JH
1545 }
1546 break;
92d41999 1547 case 'j':
73cb7263 1548 while (len-- > 0) {
08ca2aa3 1549 IV aiv;
f337b084 1550 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1551#if IVSIZE == INTSIZE
f337b084 1552 DO_BO_UNPACK(aiv, i);
1109a392 1553#elif IVSIZE == LONGSIZE
f337b084 1554 DO_BO_UNPACK(aiv, l);
1109a392 1555#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1556 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1557#else
1558 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1559#endif
08ca2aa3 1560 if (!checksum)
6e449a3a 1561 mPUSHi(aiv);
73cb7263
NC
1562 else if (checksum > bits_in_uv)
1563 cdouble += (NV)aiv;
1564 else
1565 cuv += aiv;
92d41999
JH
1566 }
1567 break;
1568 case 'J':
73cb7263 1569 while (len-- > 0) {
08ca2aa3 1570 UV auv;
f337b084 1571 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1572#if IVSIZE == INTSIZE
f337b084 1573 DO_BO_UNPACK(auv, i);
08ca2aa3 1574#elif IVSIZE == LONGSIZE
f337b084 1575 DO_BO_UNPACK(auv, l);
08ca2aa3 1576#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1577 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1578#else
1579 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1580#endif
08ca2aa3 1581 if (!checksum)
6e449a3a 1582 mPUSHu(auv);
73cb7263
NC
1583 else if (checksum > bits_in_uv)
1584 cdouble += (NV)auv;
1585 else
1586 cuv += auv;
92d41999
JH
1587 }
1588 break;
49704364
WL
1589 case 'l' | TYPE_IS_SHRIEKING:
1590#if LONGSIZE != SIZE32
73cb7263 1591 while (len-- > 0) {
08ca2aa3 1592 long along;
f337b084
TH
1593 SHIFT_VAR(utf8, s, strend, along, datumtype);
1594 DO_BO_UNPACK(along, l);
08ca2aa3 1595 if (!checksum)
6e449a3a 1596 mPUSHi(along);
73cb7263
NC
1597 else if (checksum > bits_in_uv)
1598 cdouble += (NV)along;
1599 else
1600 cuv += along;
49704364
WL
1601 }
1602 break;
1603#else
1604 /* Fallthrough! */
a6ec74c1 1605#endif
49704364 1606 case 'l':
73cb7263 1607 while (len-- > 0) {
08ca2aa3
TH
1608 I32 ai32;
1609#if U32SIZE > SIZE32
1610 ai32 = 0;
1611#endif
f337b084 1612 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1613 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1614#if U32SIZE > SIZE32
08ca2aa3 1615 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1616#endif
08ca2aa3 1617 if (!checksum)
6e449a3a 1618 mPUSHi(ai32);
73cb7263
NC
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV)ai32;
1621 else
1622 cuv += ai32;
a6ec74c1
JH
1623 }
1624 break;
49704364
WL
1625 case 'L' | TYPE_IS_SHRIEKING:
1626#if LONGSIZE != SIZE32
73cb7263 1627 while (len-- > 0) {
08ca2aa3 1628 unsigned long aulong;
f337b084
TH
1629 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1630 DO_BO_UNPACK(aulong, l);
08ca2aa3 1631 if (!checksum)
6e449a3a 1632 mPUSHu(aulong);
73cb7263
NC
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV)aulong;
1635 else
1636 cuv += aulong;
49704364
WL
1637 }
1638 break;
1639#else
1640 /* Fall through! */
1641#endif
a6ec74c1
JH
1642 case 'V':
1643 case 'N':
1644 case 'L':
73cb7263 1645 while (len-- > 0) {
08ca2aa3
TH
1646 U32 au32;
1647#if U32SIZE > SIZE32
1648 au32 = 0;
1649#endif
f337b084 1650 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1651 DO_BO_UNPACK(au32, 32);
a6ec74c1 1652#ifdef HAS_NTOHL
73cb7263
NC
1653 if (datumtype == 'N')
1654 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1655#endif
1656#ifdef HAS_VTOHL
73cb7263
NC
1657 if (datumtype == 'V')
1658 au32 = vtohl(au32);
a6ec74c1 1659#endif
08ca2aa3 1660 if (!checksum)
6e449a3a 1661 mPUSHu(au32);
fc241834
RGS
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)au32;
1664 else
1665 cuv += au32;
a6ec74c1
JH
1666 }
1667 break;
068bd2e7
MHM
1668 case 'V' | TYPE_IS_SHRIEKING:
1669 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1670 while (len-- > 0) {
08ca2aa3 1671 I32 ai32;
f8e5a5db 1672#if U32SIZE > SIZE32
08ca2aa3 1673 ai32 = 0;
f8e5a5db 1674#endif
f337b084 1675 SHIFT32(utf8, s, strend, &ai32, datumtype);
f8e5a5db 1676#ifdef HAS_NTOHL
73cb7263
NC
1677 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1678 ai32 = (I32)PerlSock_ntohl((U32)ai32);
f8e5a5db
NC
1679#endif
1680#ifdef HAS_VTOHL
73cb7263
NC
1681 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1682 ai32 = (I32)vtohl((U32)ai32);
f8e5a5db 1683#endif
08ca2aa3 1684 if (!checksum)
6e449a3a 1685 mPUSHi(ai32);
73cb7263
NC
1686 else if (checksum > bits_in_uv)
1687 cdouble += (NV)ai32;
1688 else
1689 cuv += ai32;
068bd2e7
MHM
1690 }
1691 break;
a6ec74c1 1692 case 'p':
a6ec74c1 1693 while (len-- > 0) {
f7fe979e 1694 const char *aptr;
f337b084 1695 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1696 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1697 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1698 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1699 }
1700 break;
1701 case 'w':
a6ec74c1
JH
1702 {
1703 UV auv = 0;
1704 U32 bytes = 0;
fc241834 1705
08ca2aa3
TH
1706 while (len > 0 && s < strend) {
1707 U8 ch;
f337b084 1708 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1709 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1710 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1711 if (ch < 0x80) {
a6ec74c1 1712 bytes = 0;
6e449a3a 1713 mPUSHu(auv);
a6ec74c1
JH
1714 len--;
1715 auv = 0;
08ca2aa3 1716 continue;
a6ec74c1 1717 }
08ca2aa3 1718 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1719 const char *t;
a6ec74c1 1720
f5992bc4 1721 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1722 while (s < strend) {
f337b084 1723 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1724 sv = mul128(sv, (U8)(ch & 0x7f));
1725 if (!(ch & 0x80)) {
a6ec74c1
JH
1726 bytes = 0;
1727 break;
1728 }
1729 }
10516c54 1730 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1731 while (*t == '0')
1732 t++;
1733 sv_chop(sv, t);
6e449a3a 1734 mPUSHs(sv);
a6ec74c1
JH
1735 len--;
1736 auv = 0;
1737 }
1738 }
1739 if ((s >= strend) && bytes)
49704364 1740 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1741 }
1742 break;
1743 case 'P':
49704364
WL
1744 if (symptr->howlen == e_star)
1745 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1746 EXTEND(SP, 1);
2d3e0934 1747 if (s + sizeof(char*) <= strend) {
08ca2aa3 1748 char *aptr;
f337b084 1749 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1750 DO_BO_UNPACK_PC(aptr);
fc241834 1751 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1752 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1753 }
a6ec74c1
JH
1754 break;
1755#ifdef HAS_QUAD
1756 case 'q':
73cb7263 1757 while (len-- > 0) {
08ca2aa3 1758 Quad_t aquad;
f337b084
TH
1759 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1760 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1761 if (!checksum)
6e449a3a
MHM
1762 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1763 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1764 else if (checksum > bits_in_uv)
1765 cdouble += (NV)aquad;
1766 else
1767 cuv += aquad;
1768 }
a6ec74c1
JH
1769 break;
1770 case 'Q':
73cb7263 1771 while (len-- > 0) {
08ca2aa3 1772 Uquad_t auquad;
f337b084
TH
1773 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1774 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1775 if (!checksum)
6e449a3a
MHM
1776 mPUSHs(auquad <= UV_MAX ?
1777 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV)auquad;
1780 else
1781 cuv += auquad;
a6ec74c1
JH
1782 }
1783 break;
08ca2aa3 1784#endif /* HAS_QUAD */
a6ec74c1
JH
1785 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1786 case 'f':
73cb7263 1787 while (len-- > 0) {
08ca2aa3 1788 float afloat;
f337b084 1789 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1790 DO_BO_UNPACK_N(afloat, float);
1791 if (!checksum)
6e449a3a 1792 mPUSHn(afloat);
08ca2aa3 1793 else
73cb7263 1794 cdouble += afloat;
fc241834 1795 }
a6ec74c1
JH
1796 break;
1797 case 'd':
73cb7263 1798 while (len-- > 0) {
08ca2aa3 1799 double adouble;
f337b084 1800 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1801 DO_BO_UNPACK_N(adouble, double);
1802 if (!checksum)
6e449a3a 1803 mPUSHn(adouble);
08ca2aa3 1804 else
73cb7263 1805 cdouble += adouble;
fc241834 1806 }
a6ec74c1 1807 break;
92d41999 1808 case 'F':
73cb7263 1809 while (len-- > 0) {
275663fa
TC
1810 NV_bytes anv;
1811 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1812 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 1813 if (!checksum)
275663fa 1814 mPUSHn(anv.nv);
08ca2aa3 1815 else
275663fa 1816 cdouble += anv.nv;
fc241834 1817 }
92d41999
JH
1818 break;
1819#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1820 case 'D':
73cb7263 1821 while (len-- > 0) {
275663fa
TC
1822 ld_bytes aldouble;
1823 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1824 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 1825 if (!checksum)
275663fa 1826 mPUSHn(aldouble.ld);
08ca2aa3 1827 else
275663fa 1828 cdouble += aldouble.ld;
92d41999
JH
1829 }
1830 break;
1831#endif
a6ec74c1 1832 case 'u':
858fe5e1 1833 if (!checksum) {
f7fe979e 1834 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1835 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1836 if (l) SvPOK_on(sv);
1837 }
1838 if (utf8) {
1839 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1840 I32 a, b, c, d;
db187877 1841 char hunk[3];
08ca2aa3 1842
08ca2aa3
TH
1843 while (len > 0) {
1844 next_uni_uu(aTHX_ &s, strend, &a);
1845 next_uni_uu(aTHX_ &s, strend, &b);
1846 next_uni_uu(aTHX_ &s, strend, &c);
1847 next_uni_uu(aTHX_ &s, strend, &d);
1848 hunk[0] = (char)((a << 2) | (b >> 4));
1849 hunk[1] = (char)((b << 4) | (c >> 2));
1850 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1851 if (!checksum)
1852 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1853 len -= 3;
1854 }
1855 if (s < strend) {
f7fe979e
AL
1856 if (*s == '\n') {
1857 s++;
1858 }
08ca2aa3
TH
1859 else {
1860 /* possible checksum byte */
f7fe979e
AL
1861 const char *skip = s+UTF8SKIP(s);
1862 if (skip < strend && *skip == '\n')
1863 s = skip+1;
08ca2aa3
TH
1864 }
1865 }
1866 }
1867 } else {
fc241834
RGS
1868 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1869 I32 a, b, c, d;
db187877 1870 char hunk[3];
a6ec74c1 1871
fc241834
RGS
1872 len = PL_uudmap[*(U8*)s++] & 077;
1873 while (len > 0) {
1874 if (s < strend && ISUUCHAR(*s))
1875 a = PL_uudmap[*(U8*)s++] & 077;
1876 else
1877 a = 0;
1878 if (s < strend && ISUUCHAR(*s))
1879 b = PL_uudmap[*(U8*)s++] & 077;
1880 else
1881 b = 0;
1882 if (s < strend && ISUUCHAR(*s))
1883 c = PL_uudmap[*(U8*)s++] & 077;
1884 else
1885 c = 0;
1886 if (s < strend && ISUUCHAR(*s))
1887 d = PL_uudmap[*(U8*)s++] & 077;
1888 else
1889 d = 0;
1890 hunk[0] = (char)((a << 2) | (b >> 4));
1891 hunk[1] = (char)((b << 4) | (c >> 2));
1892 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1893 if (!checksum)
1894 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1895 len -= 3;
1896 }
1897 if (*s == '\n')
1898 s++;
1899 else /* possible checksum byte */
1900 if (s + 1 < strend && s[1] == '\n')
1901 s += 2;
a6ec74c1 1902 }
08ca2aa3 1903 }
858fe5e1
TC
1904 if (!checksum)
1905 XPUSHs(sv);
a6ec74c1
JH
1906 break;
1907 }
49704364 1908
a6ec74c1 1909 if (checksum) {
1109a392 1910 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1911 (checksum > bits_in_uv &&
08ca2aa3
TH
1912 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1913 NV trouble, anv;
a6ec74c1 1914
08ca2aa3 1915 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1916 while (checksum >= 16) {
1917 checksum -= 16;
08ca2aa3 1918 anv *= 65536.0;
a6ec74c1 1919 }
a6ec74c1 1920 while (cdouble < 0.0)
08ca2aa3
TH
1921 cdouble += anv;
1922 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1923 sv = newSVnv(cdouble);
a6ec74c1
JH
1924 }
1925 else {
fa8ec7c1
NC
1926 if (checksum < bits_in_uv) {
1927 UV mask = ((UV)1 << checksum) - 1;
92d41999 1928 cuv &= mask;
a6ec74c1 1929 }
c4c5f44a 1930 sv = newSVuv(cuv);
a6ec74c1 1931 }
6e449a3a 1932 mXPUSHs(sv);
a6ec74c1
JH
1933 checksum = 0;
1934 }
fc241834 1935
49704364
WL
1936 if (symptr->flags & FLAG_SLASH){
1937 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1938 break;
49704364
WL
1939 if( next_symbol(symptr) ){
1940 if( symptr->howlen == e_number )
1941 Perl_croak(aTHX_ "Count after length/code in unpack" );
1942 if( beyond ){
1943 /* ...end of char buffer then no decent length available */
1944 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1945 } else {
1946 /* take top of stack (hope it's numeric) */
1947 len = POPi;
1948 if( len < 0 )
1949 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1950 }
1951 } else {
1952 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1953 }
1954 datumtype = symptr->code;
21c16052 1955 explicit_length = FALSE;
49704364
WL
1956 goto redo_switch;
1957 }
a6ec74c1 1958 }
49704364 1959
18529408
IZ
1960 if (new_s)
1961 *new_s = s;
1962 PUTBACK;
1963 return SP - PL_stack_base - start_sp_offset;
1964}
1965
1966PP(pp_unpack)
1967{
97aff369 1968 dVAR;
18529408 1969 dSP;
bab9c0ac 1970 dPOPPOPssrl;
18529408
IZ
1971 I32 gimme = GIMME_V;
1972 STRLEN llen;
1973 STRLEN rlen;
5c144d81
NC
1974 const char *pat = SvPV_const(left, llen);
1975 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1976 const char *strend = s + rlen;
1977 const char *patend = pat + llen;
08ca2aa3 1978 I32 cnt;
18529408
IZ
1979
1980 PUTBACK;
7accc089 1981 cnt = unpackstring(pat, patend, s, strend,
49704364 1982 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1983 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1984
18529408
IZ
1985 SPAGAIN;
1986 if ( !cnt && gimme == G_SCALAR )
1987 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1988 RETURN;
1989}
1990
f337b084 1991STATIC U8 *
f7fe979e 1992doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1993{
f337b084 1994 *h++ = PL_uuemap[len];
a6ec74c1 1995 while (len > 2) {
f337b084
TH
1996 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1997 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1998 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1999 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2000 s += 3;
2001 len -= 3;
2002 }
2003 if (len > 0) {
f7fe979e 2004 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2005 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2006 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2007 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2008 *h++ = PL_uuemap[0];
a6ec74c1 2009 }
f337b084
TH
2010 *h++ = '\n';
2011 return h;
a6ec74c1
JH
2012}
2013
2014STATIC SV *
f7fe979e 2015S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2016{
8b6e33c7
AL
2017 SV *result = newSVpvn(s, l);
2018 char *const result_c = SvPV_nolen(result); /* convenience */
2019 char *out = result_c;
2020 bool skip = 1;
2021 bool ignore = 0;
a6ec74c1 2022
7918f24d
NC
2023 PERL_ARGS_ASSERT_IS_AN_INT;
2024
a6ec74c1
JH
2025 while (*s) {
2026 switch (*s) {
2027 case ' ':
2028 break;
2029 case '+':
2030 if (!skip) {
2031 SvREFCNT_dec(result);
2032 return (NULL);
2033 }
2034 break;
2035 case '0':
2036 case '1':
2037 case '2':
2038 case '3':
2039 case '4':
2040 case '5':
2041 case '6':
2042 case '7':
2043 case '8':
2044 case '9':
2045 skip = 0;
2046 if (!ignore) {
2047 *(out++) = *s;
2048 }
2049 break;
2050 case '.':
2051 ignore = 1;
2052 break;
2053 default:
2054 SvREFCNT_dec(result);
2055 return (NULL);
2056 }
2057 s++;
2058 }
2059 *(out++) = '\0';
2060 SvCUR_set(result, out - result_c);
2061 return (result);
2062}
2063
2064/* pnum must be '\0' terminated */
2065STATIC int
2066S_div128(pTHX_ SV *pnum, bool *done)
2067{
8b6e33c7
AL
2068 STRLEN len;
2069 char * const s = SvPV(pnum, len);
2070 char *t = s;
2071 int m = 0;
2072
7918f24d
NC
2073 PERL_ARGS_ASSERT_DIV128;
2074
8b6e33c7
AL
2075 *done = 1;
2076 while (*t) {
2077 const int i = m * 10 + (*t - '0');
2078 const int r = (i >> 7); /* r < 10 */
2079 m = i & 0x7F;
2080 if (r) {
2081 *done = 0;
2082 }
2083 *(t++) = '0' + r;
a6ec74c1 2084 }
8b6e33c7
AL
2085 *(t++) = '\0';
2086 SvCUR_set(pnum, (STRLEN) (t - s));
2087 return (m);
a6ec74c1
JH
2088}
2089
18529408 2090/*
7accc089
JH
2091=for apidoc packlist
2092
2093The engine implementing pack() Perl function.
2094
bfce84ec
AL
2095=cut
2096*/
7accc089
JH
2097
2098void
5aaab254 2099Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2100{
97aff369 2101 dVAR;
aadb217d
JH
2102 tempsym_t sym;
2103
7918f24d
NC
2104 PERL_ARGS_ASSERT_PACKLIST;
2105
f7fe979e 2106 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2107
f337b084
TH
2108 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2109 Also make sure any UTF8 flag is loaded */
56eb0262 2110 SvPV_force_nolen(cat);
bfce84ec
AL
2111 if (DO_UTF8(cat))
2112 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2113
49704364
WL
2114 (void)pack_rec( cat, &sym, beglist, endlist );
2115}
2116
f337b084
TH
2117/* like sv_utf8_upgrade, but also repoint the group start markers */
2118STATIC void
2119marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2120 STRLEN len;
2121 tempsym_t *group;
f7fe979e
AL
2122 const char *from_ptr, *from_start, *from_end, **marks, **m;
2123 char *to_start, *to_ptr;
f337b084
TH
2124
2125 if (SvUTF8(sv)) return;
2126
aa07b2f6 2127 from_start = SvPVX_const(sv);
f337b084
TH
2128 from_end = from_start + SvCUR(sv);
2129 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2130 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2131 if (from_ptr == from_end) {
2132 /* Simple case: no character needs to be changed */
2133 SvUTF8_on(sv);
2134 return;
2135 }
2136
3473cf63 2137 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2138 Newx(to_start, len, char);
f337b084
TH
2139 Copy(from_start, to_start, from_ptr-from_start, char);
2140 to_ptr = to_start + (from_ptr-from_start);
2141
a02a5408 2142 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2143 for (group=sym_ptr; group; group = group->previous)
2144 marks[group->level] = from_start + group->strbeg;
2145 marks[sym_ptr->level+1] = from_end+1;
2146 for (m = marks; *m < from_ptr; m++)
2147 *m = to_start + (*m-from_start);
2148
2149 for (;from_ptr < from_end; from_ptr++) {
2150 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2151 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2152 }
2153 *to_ptr = 0;
2154
2155 while (*m == from_ptr) *m++ = to_ptr;
2156 if (m != marks + sym_ptr->level+1) {
2157 Safefree(marks);
2158 Safefree(to_start);
5637ef5b
NC
2159 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2160 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2161 }
2162 for (group=sym_ptr; group; group = group->previous)
2163 group->strbeg = marks[group->level] - to_start;
2164 Safefree(marks);
2165
2166 if (SvOOK(sv)) {
2167 if (SvIVX(sv)) {
b162af07 2168 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2169 from_start -= SvIVX(sv);
2170 SvIV_set(sv, 0);
2171 }
2172 SvFLAGS(sv) &= ~SVf_OOK;
2173 }
2174 if (SvLEN(sv) != 0)
2175 Safefree(from_start);
f880fe2f 2176 SvPV_set(sv, to_start);
b162af07
SP
2177 SvCUR_set(sv, to_ptr - to_start);
2178 SvLEN_set(sv, len);
f337b084
TH
2179 SvUTF8_on(sv);
2180}
2181
2182/* Exponential string grower. Makes string extension effectively O(n)
2183 needed says how many extra bytes we need (not counting the final '\0')
2184 Only grows the string if there is an actual lack of space
2185*/
2186STATIC char *
0bd48802 2187S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2188 const STRLEN cur = SvCUR(sv);
2189 const STRLEN len = SvLEN(sv);
f337b084 2190 STRLEN extend;
7918f24d
NC
2191
2192 PERL_ARGS_ASSERT_SV_EXP_GROW;
2193
f337b084
TH
2194 if (len - cur > needed) return SvPVX(sv);
2195 extend = needed > len ? needed : len;
2196 return SvGROW(sv, len+extend+1);
2197}
49704364
WL
2198
2199STATIC
2200SV **
f337b084 2201S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2202{
97aff369 2203 dVAR;
49704364 2204 tempsym_t lookahead;
f337b084
TH
2205 I32 items = endlist - beglist;
2206 bool found = next_symbol(symptr);
2207 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2208 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2209
7918f24d
NC
2210 PERL_ARGS_ASSERT_PACK_REC;
2211
f337b084
TH
2212 if (symptr->level == 0 && found && symptr->code == 'U') {
2213 marked_upgrade(aTHX_ cat, symptr);
2214 symptr->flags |= FLAG_DO_UTF8;
2215 utf8 = 0;
49704364 2216 }
f337b084 2217 symptr->strbeg = SvCUR(cat);
49704364
WL
2218
2219 while (found) {
f337b084
TH
2220 SV *fromstr;
2221 STRLEN fromlen;
2222 I32 len;
a0714e2c 2223 SV *lengthcode = NULL;
49704364 2224 I32 datumtype = symptr->code;
f337b084
TH
2225 howlen_t howlen = symptr->howlen;
2226 char *start = SvPVX(cat);
2227 char *cur = start + SvCUR(cat);
49704364 2228
f337b084
TH
2229#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2230
2231 switch (howlen) {
fc241834 2232 case e_star:
f337b084
TH
2233 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2234 0 : items;
2235 break;
2236 default:
2237 /* e_no_len and e_number */
2238 len = symptr->length;
49704364
WL
2239 break;
2240 }
2241
f337b084 2242 if (len) {
a7a3cfaa 2243 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2244
a7a3cfaa
TH
2245 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2246 /* We can process this letter. */
2247 STRLEN size = props & PACK_SIZE_MASK;
2248 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2249 }
f337b084
TH
2250 }
2251
49704364
WL
2252 /* Look ahead for next symbol. Do we have code/code? */
2253 lookahead = *symptr;
2254 found = next_symbol(&lookahead);
246f24af
TH
2255 if (symptr->flags & FLAG_SLASH) {
2256 IV count;
f337b084 2257 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2258 if (strchr("aAZ", lookahead.code)) {
2259 if (lookahead.howlen == e_number) count = lookahead.length;
2260 else {
ce399ba6 2261 if (items > 0) {
48a5da33 2262 count = sv_len_utf8(*beglist);
ce399ba6 2263 }
246f24af
TH
2264 else count = 0;
2265 if (lookahead.code == 'Z') count++;
2266 }
2267 } else {
2268 if (lookahead.howlen == e_number && lookahead.length < items)
2269 count = lookahead.length;
2270 else count = items;
2271 }
2272 lookahead.howlen = e_number;
2273 lookahead.length = count;
2274 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2275 }
49704364 2276
fc241834
RGS
2277 /* Code inside the switch must take care to properly update
2278 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2279 doesn't simply leave using break */
1109a392 2280 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2281 default:
f337b084
TH
2282 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2283 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2284 case '%':
49704364 2285 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2286 {
2287 char *from;
28be1210 2288 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2289 case '.':
2290 if (howlen == e_star) from = start;
2291 else if (len == 0) from = cur;
2292 else {
2293 tempsym_t *group = symptr;
2294
2295 while (--len && group) group = group->previous;
2296 from = group ? start + group->strbeg : start;
2297 }
2298 fromstr = NEXTFROM;
2299 len = SvIV(fromstr);
2300 goto resize;
28be1210 2301 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2302 case '@':
28be1210
TH
2303 from = start + symptr->strbeg;
2304 resize:
28be1210 2305 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2306 if (len >= 0) {
2307 while (len && from < cur) {
2308 from += UTF8SKIP(from);
2309 len--;
2310 }
2311 if (from > cur)
2312 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2313 if (len) {
2314 /* Here we know from == cur */
2315 grow:
2316 GROWING(0, cat, start, cur, len);
2317 Zero(cur, len, char);
2318 cur += len;
2319 } else if (from < cur) {
2320 len = cur - from;
2321 goto shrink;
2322 } else goto no_change;
2323 } else {
2324 cur = from;
2325 len = -len;
2326 goto utf8_shrink;
f337b084 2327 }
28be1210
TH
2328 else {
2329 len -= cur - from;
f337b084 2330 if (len > 0) goto grow;
28be1210 2331 if (len == 0) goto no_change;
fc241834 2332 len = -len;
28be1210 2333 goto shrink;
f337b084 2334 }
a6ec74c1 2335 break;
28be1210 2336 }
fc241834 2337 case '(': {
49704364 2338 tempsym_t savsym = *symptr;
66c611c5
MHM
2339 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2340 symptr->flags |= group_modifiers;
49704364
WL
2341 symptr->patend = savsym.grpend;
2342 symptr->level++;
f337b084 2343 symptr->previous = &lookahead;
18529408 2344 while (len--) {
f337b084
TH
2345 U32 was_utf8;
2346 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2347 else symptr->flags &= ~FLAG_PARSE_UTF8;
2348 was_utf8 = SvUTF8(cat);
49704364 2349 symptr->patptr = savsym.grpbeg;
f337b084
TH
2350 beglist = pack_rec(cat, symptr, beglist, endlist);
2351 if (SvUTF8(cat) != was_utf8)
2352 /* This had better be an upgrade while in utf8==0 mode */
2353 utf8 = 1;
2354
49704364 2355 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2356 break; /* No way to continue */
2357 }
ee790063 2358 items = endlist - beglist;
f337b084
TH
2359 lookahead.flags = symptr->flags & ~group_modifiers;
2360 goto no_change;
18529408 2361 }
62f95557
IZ
2362 case 'X' | TYPE_IS_SHRIEKING:
2363 if (!len) /* Avoid division by 0 */
2364 len = 1;
f337b084
TH
2365 if (utf8) {
2366 char *hop, *last;
2367 I32 l = len;
2368 hop = last = start;
2369 while (hop < cur) {
2370 hop += UTF8SKIP(hop);
2371 if (--l == 0) {
2372 last = hop;
2373 l = len;
2374 }
2375 }
2376 if (last > cur)
2377 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2378 cur = last;
2379 break;
2380 }
2381 len = (cur-start) % len;
62f95557 2382 /* FALL THROUGH */
a6ec74c1 2383 case 'X':
f337b084
TH
2384 if (utf8) {
2385 if (len < 1) goto no_change;
28be1210 2386 utf8_shrink:
f337b084
TH
2387 while (len > 0) {
2388 if (cur <= start)
28be1210
TH
2389 Perl_croak(aTHX_ "'%c' outside of string in pack",
2390 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2391 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2392 if (cur <= start)
28be1210
TH
2393 Perl_croak(aTHX_ "'%c' outside of string in pack",
2394 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2395 }
2396 len--;
2397 }
2398 } else {
fc241834 2399 shrink:
f337b084 2400 if (cur - start < len)
28be1210
TH
2401 Perl_croak(aTHX_ "'%c' outside of string in pack",
2402 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2403 cur -= len;
2404 }
2405 if (cur < start+symptr->strbeg) {
2406 /* Make sure group starts don't point into the void */
2407 tempsym_t *group;
9e27e96a 2408 const STRLEN length = cur-start;
f337b084
TH
2409 for (group = symptr;
2410 group && length < group->strbeg;
2411 group = group->previous) group->strbeg = length;
2412 lookahead.strbeg = length;
2413 }
a6ec74c1 2414 break;
fc241834
RGS
2415 case 'x' | TYPE_IS_SHRIEKING: {
2416 I32 ai32;
62f95557
IZ
2417 if (!len) /* Avoid division by 0 */
2418 len = 1;
230e1fce 2419 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2420 else ai32 = (cur - start) % len;
2421 if (ai32 == 0) goto no_change;
2422 len -= ai32;
2423 }
2424 /* FALL THROUGH */
a6ec74c1 2425 case 'x':
f337b084 2426 goto grow;
a6ec74c1
JH
2427 case 'A':
2428 case 'Z':
f337b084 2429 case 'a': {
f7fe979e 2430 const char *aptr;
f337b084 2431
a6ec74c1 2432 fromstr = NEXTFROM;
e62f0680 2433 aptr = SvPV_const(fromstr, fromlen);
f337b084 2434 if (DO_UTF8(fromstr)) {
f7fe979e 2435 const char *end, *s;
f337b084
TH
2436
2437 if (!utf8 && !SvUTF8(cat)) {
2438 marked_upgrade(aTHX_ cat, symptr);
2439 lookahead.flags |= FLAG_DO_UTF8;
2440 lookahead.strbeg = symptr->strbeg;
2441 utf8 = 1;
2442 start = SvPVX(cat);
2443 cur = start + SvCUR(cat);
2444 }
fc241834 2445 if (howlen == e_star) {
f337b084
TH
2446 if (utf8) goto string_copy;
2447 len = fromlen+1;
2448 }
2449 s = aptr;
2450 end = aptr + fromlen;
2451 fromlen = datumtype == 'Z' ? len-1 : len;
2452 while ((I32) fromlen > 0 && s < end) {
2453 s += UTF8SKIP(s);
2454 fromlen--;
2455 }
2456 if (s > end)
2457 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2458 if (utf8) {
fc241834 2459 len = fromlen;
f337b084
TH
2460 if (datumtype == 'Z') len++;
2461 fromlen = s-aptr;
2462 len += fromlen;
fc241834 2463
f337b084 2464 goto string_copy;
fc241834 2465 }
f337b084
TH
2466 fromlen = len - fromlen;
2467 if (datumtype == 'Z') fromlen--;
2468 if (howlen == e_star) {
2469 len = fromlen;
2470 if (datumtype == 'Z') len++;
fc241834 2471 }
f337b084 2472 GROWING(0, cat, start, cur, len);
fc241834 2473 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2474 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2475 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2476 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2477 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2478 cur += fromlen;
a6ec74c1 2479 len -= fromlen;
f337b084
TH
2480 } else if (utf8) {
2481 if (howlen == e_star) {
2482 len = fromlen;
2483 if (datumtype == 'Z') len++;
a6ec74c1 2484 }
f337b084
TH
2485 if (len <= (I32) fromlen) {
2486 fromlen = len;
2487 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2488 }
fc241834 2489 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2490 upgrade, so:
2491 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2492 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2493 len -= fromlen;
2494 while (fromlen > 0) {
230e1fce 2495 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2496 aptr++;
2497 fromlen--;
fc241834 2498 }
f337b084
TH
2499 } else {
2500 string_copy:
2501 if (howlen == e_star) {
2502 len = fromlen;
2503 if (datumtype == 'Z') len++;
2504 }
2505 if (len <= (I32) fromlen) {
2506 fromlen = len;
2507 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2508 }
f337b084
TH
2509 GROWING(0, cat, start, cur, len);
2510 Copy(aptr, cur, fromlen, char);
2511 cur += fromlen;
2512 len -= fromlen;
a6ec74c1 2513 }
f337b084
TH
2514 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2515 cur += len;
3c4fb04a 2516 SvTAINT(cat);
a6ec74c1 2517 break;
f337b084 2518 }
a6ec74c1 2519 case 'B':
f337b084 2520 case 'b': {
b83604b4 2521 const char *str, *end;
f337b084
TH
2522 I32 l, field_len;
2523 U8 bits;
2524 bool utf8_source;
2525 U32 utf8_flags;
a6ec74c1 2526
fc241834 2527 fromstr = NEXTFROM;
b83604b4 2528 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2529 end = str + fromlen;
2530 if (DO_UTF8(fromstr)) {
2531 utf8_source = TRUE;
041457d9 2532 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2533 } else {
2534 utf8_source = FALSE;
2535 utf8_flags = 0; /* Unused, but keep compilers happy */
2536 }
2537 if (howlen == e_star) len = fromlen;
2538 field_len = (len+7)/8;
2539 GROWING(utf8, cat, start, cur, field_len);
2540 if (len > (I32)fromlen) len = fromlen;
2541 bits = 0;
2542 l = 0;
2543 if (datumtype == 'B')
2544 while (l++ < len) {
2545 if (utf8_source) {
95b63a38 2546 UV val = 0;
f337b084
TH
2547 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2548 bits |= val & 1;
2549 } else bits |= *str++ & 1;
2550 if (l & 7) bits <<= 1;
fc241834 2551 else {
f337b084
TH
2552 PUSH_BYTE(utf8, cur, bits);
2553 bits = 0;
a6ec74c1
JH
2554 }
2555 }
f337b084
TH
2556 else
2557 /* datumtype == 'b' */
2558 while (l++ < len) {
2559 if (utf8_source) {
95b63a38 2560 UV val = 0;
f337b084
TH
2561 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2562 if (val & 1) bits |= 0x80;
2563 } else if (*str++ & 1)
2564 bits |= 0x80;
2565 if (l & 7) bits >>= 1;
fc241834 2566 else {
f337b084
TH
2567 PUSH_BYTE(utf8, cur, bits);
2568 bits = 0;
a6ec74c1
JH
2569 }
2570 }
f337b084
TH
2571 l--;
2572 if (l & 7) {
fc241834 2573 if (datumtype == 'B')
f337b084 2574 bits <<= 7 - (l & 7);
fc241834 2575 else
f337b084
TH
2576 bits >>= 7 - (l & 7);
2577 PUSH_BYTE(utf8, cur, bits);
2578 l += 7;
a6ec74c1 2579 }
f337b084
TH
2580 /* Determine how many chars are left in the requested field */
2581 l /= 8;
2582 if (howlen == e_star) field_len = 0;
2583 else field_len -= l;
2584 Zero(cur, field_len, char);
2585 cur += field_len;
a6ec74c1 2586 break;
f337b084 2587 }
a6ec74c1 2588 case 'H':
f337b084 2589 case 'h': {
10516c54 2590 const char *str, *end;
f337b084
TH
2591 I32 l, field_len;
2592 U8 bits;
2593 bool utf8_source;
2594 U32 utf8_flags;
a6ec74c1 2595
fc241834 2596 fromstr = NEXTFROM;
10516c54 2597 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2598 end = str + fromlen;
2599 if (DO_UTF8(fromstr)) {
2600 utf8_source = TRUE;
041457d9 2601 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2602 } else {
2603 utf8_source = FALSE;
2604 utf8_flags = 0; /* Unused, but keep compilers happy */
2605 }
2606 if (howlen == e_star) len = fromlen;
2607 field_len = (len+1)/2;
2608 GROWING(utf8, cat, start, cur, field_len);
2609 if (!utf8 && len > (I32)fromlen) len = fromlen;
2610 bits = 0;
2611 l = 0;
2612 if (datumtype == 'H')
2613 while (l++ < len) {
2614 if (utf8_source) {
95b63a38 2615 UV val = 0;
f337b084
TH
2616 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2617 if (val < 256 && isALPHA(val))
2618 bits |= (val + 9) & 0xf;
a6ec74c1 2619 else
f337b084
TH
2620 bits |= val & 0xf;
2621 } else if (isALPHA(*str))
2622 bits |= (*str++ + 9) & 0xf;
2623 else
2624 bits |= *str++ & 0xf;
2625 if (l & 1) bits <<= 4;
fc241834 2626 else {
f337b084
TH
2627 PUSH_BYTE(utf8, cur, bits);
2628 bits = 0;
a6ec74c1
JH
2629 }
2630 }
f337b084
TH
2631 else
2632 while (l++ < len) {
2633 if (utf8_source) {
95b63a38 2634 UV val = 0;
f337b084
TH
2635 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2636 if (val < 256 && isALPHA(val))
2637 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2638 else
f337b084
TH
2639 bits |= (val & 0xf) << 4;
2640 } else if (isALPHA(*str))
2641 bits |= ((*str++ + 9) & 0xf) << 4;
2642 else
2643 bits |= (*str++ & 0xf) << 4;
2644 if (l & 1) bits >>= 4;
fc241834 2645 else {
f337b084
TH
2646 PUSH_BYTE(utf8, cur, bits);
2647 bits = 0;
a6ec74c1 2648 }
fc241834 2649 }
f337b084
TH
2650 l--;
2651 if (l & 1) {
2652 PUSH_BYTE(utf8, cur, bits);
2653 l++;
2654 }
2655 /* Determine how many chars are left in the requested field */
2656 l /= 2;
2657 if (howlen == e_star) field_len = 0;
2658 else field_len -= l;
2659 Zero(cur, field_len, char);
2660 cur += field_len;
2661 break;
fc241834
RGS
2662 }
2663 case 'c':
f337b084
TH
2664 while (len-- > 0) {
2665 IV aiv;
2666 fromstr = NEXTFROM;
2667 aiv = SvIV(fromstr);
a2a5de95
NC
2668 if ((-128 > aiv || aiv > 127))
2669 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2670 "Character in 'c' format wrapped in pack");
585ec06d 2671 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2672 }
2673 break;
2674 case 'C':
f337b084
TH
2675 if (len == 0) {
2676 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2677 break;
2678 }
a6ec74c1 2679 while (len-- > 0) {
f337b084 2680 IV aiv;
a6ec74c1 2681 fromstr = NEXTFROM;
f337b084 2682 aiv = SvIV(fromstr);
a2a5de95
NC
2683 if ((0 > aiv || aiv > 0xff))
2684 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2685 "Character in 'C' format wrapped in pack");
1651fc44 2686 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2687 }
fc241834
RGS
2688 break;
2689 case 'W': {
2690 char *end;
670f1322 2691 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2692
2693 end = start+SvLEN(cat)-1;
2694 if (utf8) end -= UTF8_MAXLEN-1;
2695 while (len-- > 0) {
2696 UV auv;
2697 fromstr = NEXTFROM;
2698 auv = SvUV(fromstr);
2699 if (in_bytes) auv = auv % 0x100;
2700 if (utf8) {
2701 W_utf8:
2702 if (cur > end) {
2703 *cur = '\0';
b162af07 2704 SvCUR_set(cat, cur - start);
fc241834
RGS
2705
2706 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2707 end = start+SvLEN(cat)-UTF8_MAXLEN;
2708 }
230e1fce
NC
2709 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2710 NATIVE_TO_UNI(auv),
041457d9 2711 warn_utf8 ?
230e1fce 2712 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2713 } else {
2714 if (auv >= 0x100) {
2715 if (!SvUTF8(cat)) {
2716 *cur = '\0';
b162af07 2717 SvCUR_set(cat, cur - start);
fc241834
RGS
2718 marked_upgrade(aTHX_ cat, symptr);
2719 lookahead.flags |= FLAG_DO_UTF8;
2720 lookahead.strbeg = symptr->strbeg;
2721 utf8 = 1;
2722 start = SvPVX(cat);
2723 cur = start + SvCUR(cat);
2724 end = start+SvLEN(cat)-UTF8_MAXLEN;
2725 goto W_utf8;
2726 }
a2a5de95
NC
2727 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2728 "Character in 'W' format wrapped in pack");
fc241834
RGS
2729 auv &= 0xff;
2730 }
2731 if (cur >= end) {
2732 *cur = '\0';
b162af07 2733 SvCUR_set(cat, cur - start);
fc241834
RGS
2734 GROWING(0, cat, start, cur, len+1);
2735 end = start+SvLEN(cat)-1;
2736 }
fe2774ed 2737 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2738 }
2739 }
2740 break;
fc241834
RGS
2741 }
2742 case 'U': {
2743 char *end;
2744
2745 if (len == 0) {
2746 if (!(symptr->flags & FLAG_DO_UTF8)) {
2747 marked_upgrade(aTHX_ cat, symptr);
2748 lookahead.flags |= FLAG_DO_UTF8;
2749 lookahead.strbeg = symptr->strbeg;
2750 }
2751 utf8 = 0;
2752 goto no_change;
2753 }
2754
2755 end = start+SvLEN(cat);
2756 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2757 while (len-- > 0) {
fc241834 2758 UV auv;
a6ec74c1 2759 fromstr = NEXTFROM;
fc241834
RGS
2760 auv = SvUV(fromstr);
2761 if (utf8) {
230e1fce 2762 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2763 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2764 warn_utf8 ?
fc241834
RGS
2765 0 : UNICODE_ALLOW_ANY);
2766 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2767 *cur = '\0';
b162af07 2768 SvCUR_set(cat, cur - start);
fc241834
RGS
2769 GROWING(0, cat, start, cur,
2770 len+(endb-buffer)*UTF8_EXPAND);
2771 end = start+SvLEN(cat);
2772 }
64844641 2773 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2774 } else {
2775 if (cur >= end) {
2776 *cur = '\0';
b162af07 2777 SvCUR_set(cat, cur - start);
fc241834
RGS
2778 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2779 end = start+SvLEN(cat)-UTF8_MAXLEN;
2780 }
230e1fce 2781 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2782 warn_utf8 ?
230e1fce 2783 0 : UNICODE_ALLOW_ANY);
fc241834 2784 }
a6ec74c1 2785 }
a6ec74c1 2786 break;
fc241834 2787 }
a6ec74c1
JH
2788 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2789 case 'f':
a6ec74c1 2790 while (len-- > 0) {
f337b084
TH
2791 float afloat;
2792 NV anv;
a6ec74c1 2793 fromstr = NEXTFROM;
f337b084 2794 anv = SvNV(fromstr);
85bba25f 2795# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2796 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2797 * on Alpha; fake it if we don't have them.
2798 */
f337b084 2799 if (anv > FLT_MAX)
fc241834 2800 afloat = FLT_MAX;
f337b084 2801 else if (anv < -FLT_MAX)
fc241834 2802 afloat = -FLT_MAX;
f337b084 2803 else afloat = (float)anv;
baf3cf9c 2804# else
f337b084 2805 afloat = (float)anv;
baf3cf9c 2806# endif
1109a392 2807 DO_BO_PACK_N(afloat, float);
f337b084 2808 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2809 }
2810 break;
2811 case 'd':
a6ec74c1 2812 while (len-- > 0) {
f337b084
TH
2813 double adouble;
2814 NV anv;
a6ec74c1 2815 fromstr = NEXTFROM;
f337b084 2816 anv = SvNV(fromstr);
85bba25f 2817# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2818 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2819 * on Alpha; fake it if we don't have them.
2820 */
f337b084 2821 if (anv > DBL_MAX)
fc241834 2822 adouble = DBL_MAX;
f337b084 2823 else if (anv < -DBL_MAX)
fc241834 2824 adouble = -DBL_MAX;
f337b084 2825 else adouble = (double)anv;
baf3cf9c 2826# else
f337b084 2827 adouble = (double)anv;
baf3cf9c 2828# endif
1109a392 2829 DO_BO_PACK_N(adouble, double);
f337b084 2830 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2831 }
2832 break;
fc241834 2833 case 'F': {
275663fa 2834 NV_bytes anv;
1109a392 2835 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2836 while (len-- > 0) {
2837 fromstr = NEXTFROM;
cd07c537
DM
2838#ifdef __GNUC__
2839 /* to work round a gcc/x86 bug; don't use SvNV */
2840 anv.nv = sv_2nv(fromstr);
2841#else
275663fa 2842 anv.nv = SvNV(fromstr);
cd07c537 2843#endif
1109a392 2844 DO_BO_PACK_N(anv, NV);
275663fa 2845 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2846 }
2847 break;
fc241834 2848 }
92d41999 2849#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2850 case 'D': {
275663fa 2851 ld_bytes aldouble;
1109a392
MHM
2852 /* long doubles can have unused bits, which may be nonzero */
2853 Zero(&aldouble, 1, long double);
92d41999
JH
2854 while (len-- > 0) {
2855 fromstr = NEXTFROM;
cd07c537
DM
2856# ifdef __GNUC__
2857 /* to work round a gcc/x86 bug; don't use SvNV */
2858 aldouble.ld = (long double)sv_2nv(fromstr);
2859# else
275663fa 2860 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2861# endif
1109a392 2862 DO_BO_PACK_N(aldouble, long double);
275663fa 2863 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2864 }
2865 break;
fc241834 2866 }
92d41999 2867#endif
068bd2e7 2868 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2869 case 'n':
2870 while (len-- > 0) {
f337b084 2871 I16 ai16;
a6ec74c1 2872 fromstr = NEXTFROM;
ef108786 2873 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2874#ifdef HAS_HTONS
ef108786 2875 ai16 = PerlSock_htons(ai16);
a6ec74c1 2876#endif
f337b084 2877 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2878 }
2879 break;
068bd2e7 2880 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2881 case 'v':
2882 while (len-- > 0) {
f337b084 2883 I16 ai16;
a6ec74c1 2884 fromstr = NEXTFROM;
ef108786 2885 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2886#ifdef HAS_HTOVS
ef108786 2887 ai16 = htovs(ai16);
a6ec74c1 2888#endif
f337b084 2889 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2890 }
2891 break;
49704364 2892 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2893#if SHORTSIZE != SIZE16
fc241834 2894 while (len-- > 0) {
f337b084 2895 unsigned short aushort;
fc241834
RGS
2896 fromstr = NEXTFROM;
2897 aushort = SvUV(fromstr);
2898 DO_BO_PACK(aushort, s);
f337b084 2899 PUSH_VAR(utf8, cur, aushort);
fc241834 2900 }
49704364
WL
2901 break;
2902#else
2903 /* Fall through! */
a6ec74c1 2904#endif
49704364 2905 case 'S':
fc241834 2906 while (len-- > 0) {
f337b084 2907 U16 au16;
fc241834
RGS
2908 fromstr = NEXTFROM;
2909 au16 = (U16)SvUV(fromstr);
2910 DO_BO_PACK(au16, 16);
f337b084 2911 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2912 }
2913 break;
49704364 2914 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2915#if SHORTSIZE != SIZE16
fc241834 2916 while (len-- > 0) {
f337b084 2917 short ashort;
fc241834
RGS
2918 fromstr = NEXTFROM;
2919 ashort = SvIV(fromstr);
2920 DO_BO_PACK(ashort, s);
f337b084 2921 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2922 }
49704364
WL
2923 break;
2924#else
2925 /* Fall through! */
a6ec74c1 2926#endif
49704364
WL
2927 case 's':
2928 while (len-- > 0) {
f337b084 2929 I16 ai16;
49704364 2930 fromstr = NEXTFROM;
ef108786
MHM
2931 ai16 = (I16)SvIV(fromstr);
2932 DO_BO_PACK(ai16, 16);
f337b084 2933 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2934 }
2935 break;
2936 case 'I':
49704364 2937 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2938 while (len-- > 0) {
f337b084 2939 unsigned int auint;
a6ec74c1
JH
2940 fromstr = NEXTFROM;
2941 auint = SvUV(fromstr);
1109a392 2942 DO_BO_PACK(auint, i);
f337b084 2943 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2944 }
2945 break;
92d41999
JH
2946 case 'j':
2947 while (len-- > 0) {
f337b084 2948 IV aiv;
92d41999
JH
2949 fromstr = NEXTFROM;
2950 aiv = SvIV(fromstr);
1109a392
MHM
2951#if IVSIZE == INTSIZE
2952 DO_BO_PACK(aiv, i);
2953#elif IVSIZE == LONGSIZE
2954 DO_BO_PACK(aiv, l);
2955#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2956 DO_BO_PACK(aiv, 64);
f337b084
TH
2957#else
2958 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 2959#endif
f337b084 2960 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2961 }
2962 break;
2963 case 'J':
2964 while (len-- > 0) {
f337b084 2965 UV auv;
92d41999
JH
2966 fromstr = NEXTFROM;
2967 auv = SvUV(fromstr);
1109a392
MHM
2968#if UVSIZE == INTSIZE
2969 DO_BO_PACK(auv, i);
2970#elif UVSIZE == LONGSIZE
2971 DO_BO_PACK(auv, l);
2972#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2973 DO_BO_PACK(auv, 64);
f337b084
TH
2974#else
2975 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 2976#endif
f337b084 2977 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2978 }
2979 break;
a6ec74c1
JH
2980 case 'w':
2981 while (len-- > 0) {
f337b084 2982 NV anv;
a6ec74c1 2983 fromstr = NEXTFROM;
15e9f109 2984 anv = SvNV(fromstr);
a6ec74c1 2985
f337b084
TH
2986 if (anv < 0) {
2987 *cur = '\0';
b162af07 2988 SvCUR_set(cat, cur - start);
49704364 2989 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2990 }
a6ec74c1 2991
196b62db
NC
2992 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2993 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2994 any negative IVs will have already been got by the croak()
2995 above. IOK is untrue for fractions, so we test them
2996 against UV_MAX_P1. */
f337b084
TH
2997 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2998 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2999 char *in = buf + sizeof(buf);
196b62db 3000 UV auv = SvUV(fromstr);
a6ec74c1
JH
3001
3002 do {
eb160463 3003 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3004 auv >>= 7;
3005 } while (auv);
3006 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3007 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3008 in, (buf + sizeof(buf)) - in);
3009 } else if (SvPOKp(fromstr))
3010 goto w_string;
a6ec74c1 3011 else if (SvNOKp(fromstr)) {
0258719b 3012 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3013 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3014 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3015 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3016 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3017 Some C compilers are strict about integral constant
3018 expressions so we conservatively divide by a slightly
3019 smaller integer instead of multiplying by the exact
3020 floating-point value.
0258719b
NC
3021 */
3022#ifdef NV_MAX_10_EXP
f337b084 3023 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3024 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3025#else
f337b084 3026 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3027 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3028#endif
a6ec74c1
JH
3029 char *in = buf + sizeof(buf);
3030
8b6e33c7 3031 anv = Perl_floor(anv);
a6ec74c1 3032 do {
8b6e33c7 3033 const NV next = Perl_floor(anv / 128);
a6ec74c1 3034 if (in <= buf) /* this cannot happen ;-) */
49704364 3035 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3036 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3037 anv = next;
3038 } while (anv > 0);
a6ec74c1 3039 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3040 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3041 in, (buf + sizeof(buf)) - in);
3042 } else {
8b6e33c7
AL
3043 const char *from;
3044 char *result, *in;
735b914b
JH
3045 SV *norm;
3046 STRLEN len;
3047 bool done;
3048
f337b084 3049 w_string:
735b914b 3050 /* Copy string and check for compliance */
349d4f2f 3051 from = SvPV_const(fromstr, len);
735b914b 3052 if ((norm = is_an_int(from, len)) == NULL)
49704364 3053 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3054
a02a5408 3055 Newx(result, len, char);
735b914b
JH
3056 in = result + len;
3057 done = FALSE;
f337b084 3058 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3059 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3060 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3061 in, (result + len) - in);
735b914b
JH
3062 Safefree(result);
3063 SvREFCNT_dec(norm); /* free norm */
fc241834 3064 }
a6ec74c1
JH
3065 }
3066 break;
3067 case 'i':
49704364 3068 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3069 while (len-- > 0) {
f337b084 3070 int aint;
a6ec74c1
JH
3071 fromstr = NEXTFROM;
3072 aint = SvIV(fromstr);
1109a392 3073 DO_BO_PACK(aint, i);
f337b084 3074 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3075 }
3076 break;
068bd2e7 3077 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3078 case 'N':
3079 while (len-- > 0) {
f337b084 3080 U32 au32;
a6ec74c1 3081 fromstr = NEXTFROM;
ef108786 3082 au32 = SvUV(fromstr);
a6ec74c1 3083#ifdef HAS_HTONL
ef108786 3084 au32 = PerlSock_htonl(au32);
a6ec74c1 3085#endif
f337b084 3086 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3087 }
3088 break;
068bd2e7 3089 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
3090 case 'V':
3091 while (len-- > 0) {
f337b084 3092 U32 au32;
a6ec74c1 3093 fromstr = NEXTFROM;
ef108786 3094 au32 = SvUV(fromstr);
a6ec74c1 3095#ifdef HAS_HTOVL
ef108786 3096 au32 = htovl(au32);
a6ec74c1 3097#endif
f337b084 3098 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3099 }
3100 break;
49704364 3101 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3102#if LONGSIZE != SIZE32
fc241834 3103 while (len-- > 0) {
f337b084 3104 unsigned long aulong;
fc241834
RGS
3105 fromstr = NEXTFROM;
3106 aulong = SvUV(fromstr);
3107 DO_BO_PACK(aulong, l);
f337b084 3108 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3109 }
49704364
WL
3110 break;
3111#else
3112 /* Fall though! */
a6ec74c1 3113#endif
49704364 3114 case 'L':
fc241834 3115 while (len-- > 0) {
f337b084 3116 U32 au32;
fc241834
RGS
3117 fromstr = NEXTFROM;
3118 au32 = SvUV(fromstr);
3119 DO_BO_PACK(au32, 32);
f337b084 3120 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3121 }
3122 break;
49704364 3123 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3124#if LONGSIZE != SIZE32
fc241834 3125 while (len-- > 0) {
f337b084 3126 long along;
fc241834
RGS
3127 fromstr = NEXTFROM;
3128 along = SvIV(fromstr);
3129 DO_BO_PACK(along, l);
f337b084 3130 PUSH_VAR(utf8, cur, along);
a6ec74c1 3131 }
49704364
WL
3132 break;
3133#else
3134 /* Fall though! */
a6ec74c1 3135#endif
49704364
WL
3136 case 'l':
3137 while (len-- > 0) {
f337b084 3138 I32 ai32;
49704364 3139 fromstr = NEXTFROM;
ef108786
MHM
3140 ai32 = SvIV(fromstr);
3141 DO_BO_PACK(ai32, 32);
f337b084 3142 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3143 }
3144 break;
3145#ifdef HAS_QUAD
3146 case 'Q':
3147 while (len-- > 0) {
f337b084 3148 Uquad_t auquad;
a6ec74c1 3149 fromstr = NEXTFROM;
f337b084 3150 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3151 DO_BO_PACK(auquad, 64);
f337b084 3152 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3153 }
3154 break;
3155 case 'q':
3156 while (len-- > 0) {
f337b084 3157 Quad_t aquad;
a6ec74c1
JH
3158 fromstr = NEXTFROM;
3159 aquad = (Quad_t)SvIV(fromstr);
1109a392 3160 DO_BO_PACK(aquad, 64);
f337b084 3161 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3162 }
3163 break;
f337b084 3164#endif /* HAS_QUAD */
a6ec74c1
JH
3165 case 'P':
3166 len = 1; /* assume SV is correct length */
f337b084 3167 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3168 /* Fall through! */
a6ec74c1
JH
3169 case 'p':
3170 while (len-- > 0) {
83003860 3171 const char *aptr;
f337b084 3172
a6ec74c1 3173 fromstr = NEXTFROM;
28a4f200
TH
3174 SvGETMAGIC(fromstr);
3175 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3176 else {
a6ec74c1
JH
3177 /* XXX better yet, could spirit away the string to
3178 * a safe spot and hang on to it until the result
3179 * of pack() (and all copies of the result) are
3180 * gone.
3181 */
041457d9 3182 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3183 !SvREADONLY(fromstr)))) {
3184 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3185 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3186 }
3187 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3188 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3189 else
2596d9fe 3190 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3191 }
07409e01 3192 DO_BO_PACK_PC(aptr);
f337b084 3193 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3194 }
3195 break;
fc241834 3196 case 'u': {
f7fe979e 3197 const char *aptr, *aend;
fc241834 3198 bool from_utf8;
f337b084 3199
a6ec74c1 3200 fromstr = NEXTFROM;
fc241834
RGS
3201 if (len <= 2) len = 45;
3202 else len = len / 3 * 3;
3203 if (len >= 64) {
a2a5de95
NC
3204 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3205 "Field too wide in 'u' format in pack");
fc241834
RGS
3206 len = 63;
3207 }
83003860 3208 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3209 from_utf8 = DO_UTF8(fromstr);
3210 if (from_utf8) {
3211 aend = aptr + fromlen;
3f63b0e5 3212 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3213 } else aend = NULL; /* Unused, but keep compilers happy */
3214 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3215 while (fromlen > 0) {
fc241834 3216 U8 *end;
a6ec74c1 3217 I32 todo;
fc241834 3218 U8 hunk[1+63/3*4+1];
a6ec74c1 3219
eb160463 3220 if ((I32)fromlen > len)
a6ec74c1
JH
3221 todo = len;
3222 else
3223 todo = fromlen;
fc241834
RGS
3224 if (from_utf8) {
3225 char buffer[64];
3226 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3227 'u' | TYPE_IS_PACK)) {
3228 *cur = '\0';
b162af07 3229 SvCUR_set(cat, cur - start);
5637ef5b
NC
3230 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3231 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3232 aptr, aend, buffer, (long) todo);
fc241834
RGS
3233 }
3234 end = doencodes(hunk, buffer, todo);
3235 } else {
3236 end = doencodes(hunk, aptr, todo);
3237 aptr += todo;
3238 }
3239 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3240 fromlen -= todo;
3241 }
a6ec74c1
JH
3242 break;
3243 }
f337b084
TH
3244 }
3245 *cur = '\0';
b162af07 3246 SvCUR_set(cat, cur - start);
f337b084 3247 no_change:
49704364 3248 *symptr = lookahead;
a6ec74c1 3249 }
49704364 3250 return beglist;
18529408
IZ
3251}
3252#undef NEXTFROM
3253
3254
3255PP(pp_pack)
3256{
97aff369 3257 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3258 SV *cat = TARG;
18529408 3259 STRLEN fromlen;
349d4f2f 3260 SV *pat_sv = *++MARK;
eb578fdb
KW
3261 const char *pat = SvPV_const(pat_sv, fromlen);
3262 const char *patend = pat + fromlen;
18529408
IZ
3263
3264 MARK++;
76f68e9b 3265 sv_setpvs(cat, "");
f337b084 3266 SvUTF8_off(cat);
18529408 3267
7accc089 3268 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3269
a6ec74c1
JH
3270 SvSETMAGIC(cat);
3271 SP = ORIGMARK;
3272 PUSHs(cat);
3273 RETURN;
3274}
a6ec74c1 3275
73cb7263
NC
3276/*
3277 * Local variables:
3278 * c-indentation-style: bsd
3279 * c-basic-offset: 4
14d04a33 3280 * indent-tabs-mode: nil
73cb7263
NC
3281 * End:
3282 *
14d04a33 3283 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3284 */