This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Eliminate pre-5.9.x conditional code for PERL_PACK_CAN_BYTEORDER
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_SHRIEKSIGN
87#endif
88
89#ifndef CHAR_BIT
90# define CHAR_BIT 8
7212898e 91#endif
3473cf63
RGS
92/* Maximum number of bytes to which a byte can grow due to upgrade */
93#define UTF8_EXPAND 2
7212898e 94
a6ec74c1 95/*
a6ec74c1
JH
96 * Offset for integer pack/unpack.
97 *
98 * On architectures where I16 and I32 aren't really 16 and 32 bits,
99 * which for now are all Crays, pack and unpack have to play games.
100 */
101
102/*
103 * These values are required for portability of pack() output.
104 * If they're not right on your machine, then pack() and unpack()
105 * wouldn't work right anyway; you'll need to apply the Cray hack.
106 * (I'd like to check them with #if, but you can't use sizeof() in
107 * the preprocessor.) --???
108 */
109/*
110 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
111 defines are now in config.h. --Andy Dougherty April 1998
112 */
113#define SIZE16 2
114#define SIZE32 4
115
116/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
117 --jhi Feb 1999 */
118
1109a392
MHM
119#if U16SIZE > SIZE16 || U32SIZE > SIZE32
120# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
121# define OFF16(p) ((char*)(p))
122# define OFF32(p) ((char*)(p))
a6ec74c1 123# else
1109a392 124# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
125# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
126# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
127# else
08ca2aa3 128 ++++ bad cray byte order
a6ec74c1
JH
129# endif
130# endif
a6ec74c1 131#else
08ca2aa3
TH
132# define OFF16(p) ((char *) (p))
133# define OFF32(p) ((char *) (p))
a6ec74c1
JH
134#endif
135
f337b084
TH
136/* Only to be used inside a loop (see the break) */
137#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
138 if (utf8) { \
139 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
140 } else { \
141 Copy(s, OFF16(p), SIZE16, char); \
142 (s) += SIZE16; \
143 } \
144} STMT_END
145
146/* Only to be used inside a loop (see the break) */
147#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
148 if (utf8) { \
149 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
150 } else { \
151 Copy(s, OFF32(p), SIZE32, char); \
152 (s) += SIZE32; \
153 } \
154} STMT_END
155
156#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
157#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
158
159/* Only to be used inside a loop (see the break) */
275663fa 160#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
161STMT_START { \
162 if (utf8) { \
f337b084 163 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 164 (char *) (buf), len, datumtype)) break; \
08ca2aa3 165 } else { \
275663fa
TC
166 Copy(s, (char *) (buf), len, char); \
167 s += len; \
08ca2aa3 168 } \
08ca2aa3
TH
169} STMT_END
170
275663fa
TC
171#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
173
f337b084 174#define PUSH_VAR(utf8, aptr, var) \
230e1fce 175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 176
49704364
WL
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
66c611c5 180/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 183#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
a6ec74c1
JH
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
a6ec74c1 195
7918f24d
NC
196 PERL_ARGS_ASSERT_MUL128;
197
a6ec74c1 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 199 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
f7fe979e 210 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
a6ec74c1
JH
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
66c611c5 230/* type modifiers */
62f95557 231#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 234#define TYPE_IS_PACK 0x800
1109a392 235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
7212898e 239#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 240# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 241#else
28be1210 242# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
243#endif
244
7212898e
NC
245# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
246# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
247
248# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
249
250# define DO_BO_UNPACK(var, type) \
1109a392 251 STMT_START { \
66c611c5 252 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
253 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
254 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
255 default: break; \
256 } \
257 } STMT_END
258
7212898e 259# define DO_BO_PACK(var, type) \
1109a392 260 STMT_START { \
66c611c5 261 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
262 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
263 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
264 default: break; \
265 } \
266 } STMT_END
267
07409e01 268# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 269 STMT_START { \
66c611c5 270 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 271 case TYPE_IS_BIG_ENDIAN: \
07409e01 272 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
273 break; \
274 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 275 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 default: \
278 break; \
279 } \
280 } STMT_END
281
07409e01 282# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 283 STMT_START { \
66c611c5 284 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 285 case TYPE_IS_BIG_ENDIAN: \
07409e01 286 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
287 break; \
288 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 289 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 default: \
292 break; \
293 } \
294 } STMT_END
295
7212898e 296# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
297 STMT_START { \
298 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
299 case TYPE_IS_BIG_ENDIAN: \
300 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
301 "platform", #action, #type); \
302 break; \
303 case TYPE_IS_LITTLE_ENDIAN: \
304 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
305 "platform", #action, #type); \
306 break; \
307 default: \
308 break; \
309 } \
310 } STMT_END
311
7212898e 312# if PTRSIZE == INTSIZE
07409e01
NC
313# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
314# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
315# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
316# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 317# elif PTRSIZE == LONGSIZE
fa58a56f
S
318# if LONGSIZE < IVSIZE && IVSIZE == 8
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
323# else
324# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
325# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
326# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
327# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
328# endif
2b00a750
JD
329# elif PTRSIZE == IVSIZE
330# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
331# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
332# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
333# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
334# else
335# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
336# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
337# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
338# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 339# endif
1109a392 340
7212898e 341# if defined(my_htolen) && defined(my_letohn) && \
1109a392 342 defined(my_htoben) && defined(my_betohn)
7212898e 343# define DO_BO_UNPACK_N(var, type) \
1109a392 344 STMT_START { \
66c611c5 345 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
346 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
347 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 default: break; \
349 } \
350 } STMT_END
351
7212898e 352# define DO_BO_PACK_N(var, type) \
1109a392 353 STMT_START { \
66c611c5 354 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
355 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
356 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 default: break; \
358 } \
359 } STMT_END
7212898e
NC
360# else
361# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
362# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
363# endif
364
78d46eaa 365#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 366#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
367#define PACK_SIZE_MASK 0x3F
368
298bc19c 369#include "packsizetables.c"
78d46eaa 370
08ca2aa3 371STATIC U8
f7fe979e 372uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 373{
08ca2aa3 374 STRLEN retlen;
0bcc34c2 375 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 376 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 377 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
378 warnings), but these two mean we make no progress in the string and
379 might enter an infinite loop */
380 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
381 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
382 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 383 if (val >= 0x100) {
a2a5de95
NC
384 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
385 "Character in '%c' format wrapped in unpack",
386 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
387 val &= 0xff;
388 }
389 *s += retlen;
fe2774ed 390 return (U8)val;
08ca2aa3
TH
391}
392
f337b084
TH
393#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
394 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
395 *(U8 *)(s)++)
396
397STATIC bool
f7fe979e 398uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
399{
400 UV val;
401 STRLEN retlen;
f7fe979e 402 const char *from = *s;
08ca2aa3 403 int bad = 0;
f7fe979e 404 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
405 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
406 for (;buf_len > 0; buf_len--) {
407 if (from >= end) return FALSE;
f337b084 408 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
409 if (retlen == (STRLEN) -1 || retlen == 0) {
410 from += UTF8SKIP(from);
411 bad |= 1;
412 } else from += retlen;
413 if (val >= 0x100) {
414 bad |= 2;
415 val &= 0xff;
416 }
fe2774ed 417 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
418 }
419 /* We have enough characters for the buffer. Did we have problems ? */
420 if (bad) {
421 if (bad & 1) {
422 /* Rewalk the string fragment while warning */
f7fe979e 423 const char *ptr;
9e27e96a 424 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
425 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
426 if (ptr >= end) break;
f337b084 427 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
428 }
429 if (from > end) from = end;
430 }
a2a5de95
NC
431 if ((bad & 2))
432 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 433 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
434 "Character(s) in '%c' format wrapped in %s",
435 (int) TYPE_NO_MODIFIERS(datumtype),
436 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
437 }
438 *s = from;
439 return TRUE;
440}
441
442STATIC bool
f7fe979e 443next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 444{
97aff369 445 dVAR;
08ca2aa3 446 STRLEN retlen;
0bcc34c2 447 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
448 if (val >= 0x100 || !ISUUCHAR(val) ||
449 retlen == (STRLEN) -1 || retlen == 0) {
450 *out = 0;
451 return FALSE;
452 }
453 *out = PL_uudmap[val] & 077;
f337b084 454 *s += retlen;
08ca2aa3
TH
455 return TRUE;
456}
78d46eaa 457
64844641 458STATIC char *
14333449 459S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 460 const U8 * const end = start + len;
64844641 461
7918f24d
NC
462 PERL_ARGS_ASSERT_BYTES_TO_UNI;
463
f337b084 464 while (start < end) {
48fa4626
TS
465 const UV uv = NATIVE_TO_ASCII(*start);
466 if (UNI_IS_INVARIANT(uv))
467 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
468 else {
469 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
470 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
471 }
472 start++;
473 }
64844641 474 return dest;
f337b084
TH
475}
476
230e1fce
NC
477#define PUSH_BYTES(utf8, cur, buf, len) \
478STMT_START { \
64844641
AL
479 if (utf8) \
480 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
481 else { \
482 Copy(buf, cur, len, char); \
483 (cur) += (len); \
484 } \
f337b084
TH
485} STMT_END
486
487#define GROWING(utf8, cat, start, cur, in_len) \
488STMT_START { \
489 STRLEN glen = (in_len); \
3473cf63 490 if (utf8) glen *= UTF8_EXPAND; \
f337b084 491 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 492 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
493 (cur) = (start) + SvCUR(cat); \
494 } \
495} STMT_END
496
497#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
498STMT_START { \
f7fe979e 499 const STRLEN glen = (in_len); \
f337b084 500 STRLEN gl = glen; \
3473cf63 501 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
502 if ((cur) + gl >= (start) + SvLEN(cat)) { \
503 *cur = '\0'; \
b162af07 504 SvCUR_set((cat), (cur) - (start)); \
0bd48802 505 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
506 (cur) = (start) + SvCUR(cat); \
507 } \
508 PUSH_BYTES(utf8, cur, buf, glen); \
509} STMT_END
510
511#define PUSH_BYTE(utf8, s, byte) \
512STMT_START { \
513 if (utf8) { \
f7fe979e 514 const U8 au8 = (byte); \
64844641 515 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
516 } else *(U8 *)(s)++ = (byte); \
517} STMT_END
518
519/* Only to be used inside a loop (see the break) */
520#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
521STMT_START { \
522 STRLEN retlen; \
523 if (str >= end) break; \
524 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
525 if (retlen == (STRLEN) -1 || retlen == 0) { \
526 *cur = '\0'; \
527 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
528 } \
529 str += retlen; \
530} STMT_END
531
f7fe979e
AL
532static const char *_action( const tempsym_t* symptr )
533{
10edeb5d 534 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
535}
536
206947d2 537/* Returns the sizeof() struct described by pat */
028d1f6d 538STATIC I32
f337b084 539S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 540{
f337b084 541 I32 total = 0;
206947d2 542
7918f24d
NC
543 PERL_ARGS_ASSERT_MEASURE_STRUCT;
544
49704364 545 while (next_symbol(symptr)) {
f337b084 546 I32 len;
f7fe979e 547 int size;
f337b084
TH
548
549 switch (symptr->howlen) {
fc241834 550 case e_star:
49704364 551 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 552 _action( symptr ) );
49704364 553 break;
f337b084
TH
554 default:
555 /* e_no_len and e_number */
556 len = symptr->length;
557 break;
49704364
WL
558 }
559
a7a3cfaa 560 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 561 if (!size) {
f7fe979e 562 int star;
80a13697
NC
563 /* endianness doesn't influence the size of a type */
564 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
565 default:
566 Perl_croak(aTHX_ "Invalid type '%c' in %s",
567 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 568 _action( symptr ) );
28be1210
TH
569#ifdef PERL_PACK_CAN_SHRIEKSIGN
570 case '.' | TYPE_IS_SHRIEKING:
571 case '@' | TYPE_IS_SHRIEKING:
572#endif
80a13697 573 case '@':
28be1210 574 case '.':
80a13697
NC
575 case '/':
576 case 'U': /* XXXX Is it correct? */
577 case 'w':
578 case 'u':
579 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 580 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 581 _action( symptr ) );
80a13697
NC
582 case '%':
583 size = 0;
584 break;
585 case '(':
fc241834
RGS
586 {
587 tempsym_t savsym = *symptr;
588 symptr->patptr = savsym.grpbeg;
589 symptr->patend = savsym.grpend;
590 /* XXXX Theoretically, we need to measure many times at
591 different positions, since the subexpression may contain
592 alignment commands, but be not of aligned length.
593 Need to detect this and croak(). */
594 size = measure_struct(symptr);
595 *symptr = savsym;
596 break;
597 }
80a13697
NC
598 case 'X' | TYPE_IS_SHRIEKING:
599 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
600 */
601 if (!len) /* Avoid division by 0 */
602 len = 1;
603 len = total % len; /* Assumed: the start is aligned. */
604 /* FALL THROUGH */
605 case 'X':
606 size = -1;
607 if (total < len)
f7fe979e 608 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
609 break;
610 case 'x' | TYPE_IS_SHRIEKING:
611 if (!len) /* Avoid division by 0 */
612 len = 1;
613 star = total % len; /* Assumed: the start is aligned. */
614 if (star) /* Other portable ways? */
615 len = len - star;
616 else
617 len = 0;
618 /* FALL THROUGH */
619 case 'x':
620 case 'A':
621 case 'Z':
622 case 'a':
80a13697
NC
623 size = 1;
624 break;
625 case 'B':
626 case 'b':
627 len = (len + 7)/8;
628 size = 1;
629 break;
630 case 'H':
631 case 'h':
632 len = (len + 1)/2;
633 size = 1;
634 break;
78d46eaa 635
80a13697
NC
636 case 'P':
637 len = 1;
638 size = sizeof(char*);
78d46eaa
NC
639 break;
640 }
206947d2
IZ
641 }
642 total += len * size;
643 }
644 return total;
645}
646
49704364
WL
647
648/* locate matching closing parenthesis or bracket
649 * returns char pointer to char after match, or NULL
650 */
f7fe979e 651STATIC const char *
5aaab254 652S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 653{
7918f24d
NC
654 PERL_ARGS_ASSERT_GROUP_END;
655
49704364 656 while (patptr < patend) {
f7fe979e 657 const char c = *patptr++;
49704364
WL
658
659 if (isSPACE(c))
660 continue;
661 else if (c == ender)
662 return patptr-1;
663 else if (c == '#') {
664 while (patptr < patend && *patptr != '\n')
665 patptr++;
666 continue;
667 } else if (c == '(')
668 patptr = group_end(patptr, patend, ')') + 1;
669 else if (c == '[')
670 patptr = group_end(patptr, patend, ']') + 1;
18529408 671 }
49704364
WL
672 Perl_croak(aTHX_ "No group ending character '%c' found in template",
673 ender);
674 return 0;
18529408
IZ
675}
676
49704364
WL
677
678/* Convert unsigned decimal number to binary.
679 * Expects a pointer to the first digit and address of length variable
680 * Advances char pointer to 1st non-digit char and returns number
fc241834 681 */
f7fe979e 682STATIC const char *
5aaab254 683S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
684{
685 I32 len = *patptr++ - '0';
7918f24d
NC
686
687 PERL_ARGS_ASSERT_GET_NUM;
688
49704364
WL
689 while (isDIGIT(*patptr)) {
690 if (len >= 0x7FFFFFFF/10)
691 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
692 len = (len * 10) + (*patptr++ - '0');
693 }
694 *lenptr = len;
695 return patptr;
696}
697
698/* The marvellous template parsing routine: Using state stored in *symptr,
699 * locates next template code and count
700 */
701STATIC bool
f337b084 702S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 703{
f7fe979e 704 const char* patptr = symptr->patptr;
0bcc34c2 705 const char* const patend = symptr->patend;
49704364 706
7918f24d
NC
707 PERL_ARGS_ASSERT_NEXT_SYMBOL;
708
49704364
WL
709 symptr->flags &= ~FLAG_SLASH;
710
711 while (patptr < patend) {
712 if (isSPACE(*patptr))
713 patptr++;
714 else if (*patptr == '#') {
715 patptr++;
716 while (patptr < patend && *patptr != '\n')
717 patptr++;
718 if (patptr < patend)
719 patptr++;
720 } else {
fc241834 721 /* We should have found a template code */
49704364 722 I32 code = *patptr++ & 0xFF;
66c611c5 723 U32 inherited_modifiers = 0;
49704364
WL
724
725 if (code == ','){ /* grandfather in commas but with a warning */
726 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
727 symptr->flags |= FLAG_COMMA;
728 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 729 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
730 }
731 continue;
732 }
fc241834 733
49704364 734 /* for '(', skip to ')' */
fc241834 735 if (code == '(') {
49704364
WL
736 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
737 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 738 _action( symptr ) );
49704364
WL
739 symptr->grpbeg = patptr;
740 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
741 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
742 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 743 _action( symptr ) );
49704364
WL
744 }
745
66c611c5
MHM
746 /* look for group modifiers to inherit */
747 if (TYPE_ENDIANNESS(symptr->flags)) {
748 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
749 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
750 }
751
1109a392
MHM
752 /* look for modifiers */
753 while (patptr < patend) {
b7787f18
AL
754 const char *allowed;
755 I32 modifier;
1109a392
MHM
756 switch (*patptr) {
757 case '!':
758 modifier = TYPE_IS_SHRIEKING;
7212898e 759 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392
MHM
760 break;
761 case '>':
762 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 763 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
764 break;
765 case '<':
766 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 767 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
768 break;
769 default:
b7787f18
AL
770 allowed = "";
771 modifier = 0;
1109a392
MHM
772 break;
773 }
66c611c5 774
1109a392
MHM
775 if (modifier == 0)
776 break;
66c611c5 777
1109a392
MHM
778 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
779 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 780 allowed, _action( symptr ) );
66c611c5
MHM
781
782 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 783 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 784 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
785 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
786 TYPE_ENDIANNESS_MASK)
787 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 788 *patptr, _action( symptr ) );
66c611c5 789
a2a5de95
NC
790 if ((code & modifier)) {
791 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
792 "Duplicate modifier '%c' after '%c' in %s",
793 *patptr, (int) TYPE_NO_MODIFIERS(code),
794 _action( symptr ) );
1109a392 795 }
66c611c5 796
1109a392
MHM
797 code |= modifier;
798 patptr++;
49704364
WL
799 }
800
66c611c5
MHM
801 /* inherit modifiers */
802 code |= inherited_modifiers;
803
fc241834 804 /* look for count and/or / */
49704364
WL
805 if (patptr < patend) {
806 if (isDIGIT(*patptr)) {
807 patptr = get_num( patptr, &symptr->length );
808 symptr->howlen = e_number;
809
810 } else if (*patptr == '*') {
811 patptr++;
812 symptr->howlen = e_star;
813
814 } else if (*patptr == '[') {
f7fe979e 815 const char* lenptr = ++patptr;
49704364
WL
816 symptr->howlen = e_number;
817 patptr = group_end( patptr, patend, ']' ) + 1;
818 /* what kind of [] is it? */
819 if (isDIGIT(*lenptr)) {
820 lenptr = get_num( lenptr, &symptr->length );
821 if( *lenptr != ']' )
822 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 823 _action( symptr ) );
49704364
WL
824 } else {
825 tempsym_t savsym = *symptr;
826 symptr->patend = patptr-1;
827 symptr->patptr = lenptr;
828 savsym.length = measure_struct(symptr);
829 *symptr = savsym;
830 }
831 } else {
832 symptr->howlen = e_no_len;
833 symptr->length = 1;
834 }
835
836 /* try to find / */
837 while (patptr < patend) {
838 if (isSPACE(*patptr))
839 patptr++;
840 else if (*patptr == '#') {
841 patptr++;
842 while (patptr < patend && *patptr != '\n')
843 patptr++;
844 if (patptr < patend)
845 patptr++;
846 } else {
66c611c5 847 if (*patptr == '/') {
49704364
WL
848 symptr->flags |= FLAG_SLASH;
849 patptr++;
66c611c5
MHM
850 if (patptr < patend &&
851 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 852 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 853 _action( symptr ) );
49704364
WL
854 }
855 break;
856 }
18529408 857 }
49704364
WL
858 } else {
859 /* at end - no count, no / */
860 symptr->howlen = e_no_len;
861 symptr->length = 1;
862 }
863
864 symptr->code = code;
fc241834 865 symptr->patptr = patptr;
49704364 866 return TRUE;
18529408 867 }
49704364 868 }
fc241834 869 symptr->patptr = patptr;
49704364 870 return FALSE;
18529408
IZ
871}
872
18529408 873/*
fc241834 874 There is no way to cleanly handle the case where we should process the
08ca2aa3 875 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
876 (e.g. estimates like strend-s as an upper bound for the number of
877 characters left wouldn't work). So if we foresee the need of this
878 (pattern starts with U or contains U0), we want to work on the encoded
879 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
880 themselves if they need to do a lot of unpacks like this on it
881*/
fc241834 882STATIC bool
08ca2aa3
TH
883need_utf8(const char *pat, const char *patend)
884{
885 bool first = TRUE;
7918f24d
NC
886
887 PERL_ARGS_ASSERT_NEED_UTF8;
888
08ca2aa3
TH
889 while (pat < patend) {
890 if (pat[0] == '#') {
891 pat++;
f7fe979e 892 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
893 if (!pat) return FALSE;
894 } else if (pat[0] == 'U') {
895 if (first || pat[1] == '0') return TRUE;
896 } else first = FALSE;
897 pat++;
898 }
899 return FALSE;
900}
901
902STATIC char
903first_symbol(const char *pat, const char *patend) {
7918f24d
NC
904 PERL_ARGS_ASSERT_FIRST_SYMBOL;
905
08ca2aa3
TH
906 while (pat < patend) {
907 if (pat[0] != '#') return pat[0];
908 pat++;
f7fe979e 909 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
910 if (!pat) return 0;
911 pat++;
912 }
913 return 0;
914}
915
916/*
7accc089
JH
917=for apidoc unpackstring
918
21ebfc7a
DM
919The engine implementing the unpack() Perl function.
920
921Using the template pat..patend, this function unpacks the string
922s..strend into a number of mortal SVs, which it pushes onto the perl
923argument (@_) stack (so you will need to issue a C<PUTBACK> before and
924C<SPAGAIN> after the call to this function). It returns the number of
925pushed elements.
926
927The strend and patend pointers should point to the byte following the last
928character of each string.
929
930Although this function returns its values on the perl argument stack, it
931doesn't take any parameters from that stack (and thus in particular
932there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
933example).
7accc089
JH
934
935=cut */
936
937I32
f7fe979e 938Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 939{
f7fe979e 940 tempsym_t sym;
08ca2aa3 941
7918f24d
NC
942 PERL_ARGS_ASSERT_UNPACKSTRING;
943
f337b084 944 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
945 else if (need_utf8(pat, patend)) {
946 /* We probably should try to avoid this in case a scalar context call
947 wouldn't get to the "U0" */
948 STRLEN len = strend - s;
230e1fce 949 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
950 SAVEFREEPV(s);
951 strend = s + len;
f337b084 952 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
953 }
954
f337b084
TH
955 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
956 flags |= FLAG_PARSE_UTF8;
08ca2aa3 957
f7fe979e 958 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
959
960 return unpack_rec(&sym, s, s, strend, NULL );
961}
962
4136a0f7 963STATIC I32
f7fe979e 964S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 965{
27da23d5 966 dVAR; dSP;
3297d27d 967 SV *sv = NULL;
f7fe979e 968 const I32 start_sp_offset = SP - PL_stack_base;
49704364 969 howlen_t howlen;
a6ec74c1 970 I32 checksum = 0;
92d41999 971 UV cuv = 0;
a6ec74c1 972 NV cdouble = 0.0;
f337b084 973 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 974 bool beyond = FALSE;
21c16052 975 bool explicit_length;
9e27e96a 976 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 977 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
978
979 PERL_ARGS_ASSERT_UNPACK_REC;
980
28be1210 981 symptr->strbeg = s - strbeg;
49704364 982
49704364 983 while (next_symbol(symptr)) {
a7a3cfaa 984 packprops_t props;
9e27e96a 985 I32 len;
f337b084 986 I32 datumtype = symptr->code;
206947d2 987 /* do first one only unless in list context
08ca2aa3 988 / is implemented by unpacking the count, then popping it from the
206947d2 989 stack, so must check that we're not in the middle of a / */
49704364 990 if ( unpack_only_one
206947d2 991 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 992 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 993 break;
49704364 994
f337b084 995 switch (howlen = symptr->howlen) {
fc241834
RGS
996 case e_star:
997 len = strend - strbeg; /* long enough */
49704364 998 break;
f337b084
TH
999 default:
1000 /* e_no_len and e_number */
1001 len = symptr->length;
1002 break;
49704364 1003 }
18529408 1004
21c16052 1005 explicit_length = TRUE;
a6ec74c1 1006 redo_switch:
49704364 1007 beyond = s >= strend;
a7a3cfaa
TH
1008
1009 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1010 if (props) {
1011 /* props nonzero means we can process this letter. */
9e27e96a
AL
1012 const long size = props & PACK_SIZE_MASK;
1013 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1014 if (len > howmany)
1015 len = howmany;
1016
1017 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1018 if (len && unpack_only_one) len = 1;
1019 EXTEND(SP, len);
1020 EXTEND_MORTAL(len);
78d46eaa
NC
1021 }
1022 }
a7a3cfaa 1023
1109a392 1024 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1025 default:
1109a392 1026 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1027
a6ec74c1 1028 case '%':
49704364 1029 if (howlen == e_no_len)
18529408 1030 len = 16; /* len is not specified */
a6ec74c1 1031 checksum = len;
92d41999 1032 cuv = 0;
a6ec74c1 1033 cdouble = 0;
18529408 1034 continue;
a6ec74c1 1035 break;
18529408
IZ
1036 case '(':
1037 {
49704364 1038 tempsym_t savsym = *symptr;
9e27e96a 1039 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1040 symptr->flags |= group_modifiers;
49704364 1041 symptr->patend = savsym.grpend;
28be1210 1042 symptr->previous = &savsym;
49704364 1043 symptr->level++;
18529408 1044 PUTBACK;
c6f750d1 1045 if (len && unpack_only_one) len = 1;
18529408 1046 while (len--) {
49704364 1047 symptr->patptr = savsym.grpbeg;
f337b084
TH
1048 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1049 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1050 unpack_rec(symptr, s, strbeg, strend, &s);
1051 if (s == strend && savsym.howlen == e_star)
49704364 1052 break; /* No way to continue */
18529408
IZ
1053 }
1054 SPAGAIN;
28be1210 1055 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1056 *symptr = savsym;
18529408
IZ
1057 break;
1058 }
28be1210
TH
1059#ifdef PERL_PACK_CAN_SHRIEKSIGN
1060 case '.' | TYPE_IS_SHRIEKING:
1061#endif
1062 case '.': {
9e27e96a 1063 const char *from;
28be1210
TH
1064 SV *sv;
1065#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1066 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1067#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1068 const bool u8 = utf8;
28be1210
TH
1069#endif
1070 if (howlen == e_star) from = strbeg;
1071 else if (len <= 0) from = s;
1072 else {
1073 tempsym_t *group = symptr;
1074
1075 while (--len && group) group = group->previous;
1076 from = group ? strbeg + group->strbeg : strbeg;
1077 }
1078 sv = from <= s ?
00646304
CB
1079 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1080 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1081 mXPUSHs(sv);
28be1210
TH
1082 break;
1083 }
1084#ifdef PERL_PACK_CAN_SHRIEKSIGN
1085 case '@' | TYPE_IS_SHRIEKING:
1086#endif
a6ec74c1 1087 case '@':
28be1210
TH
1088 s = strbeg + symptr->strbeg;
1089#ifdef PERL_PACK_CAN_SHRIEKSIGN
1090 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1091#else /* PERL_PACK_CAN_SHRIEKSIGN */
1092 if (utf8)
1093#endif
1094 {
08ca2aa3
TH
1095 while (len > 0) {
1096 if (s >= strend)
1097 Perl_croak(aTHX_ "'@' outside of string in unpack");
1098 s += UTF8SKIP(s);
1099 len--;
1100 }
1101 if (s > strend)
1102 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1103 } else {
28be1210 1104 if (strend-s < len)
fc241834 1105 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1106 s += len;
08ca2aa3 1107 }
a6ec74c1 1108 break;
62f95557
IZ
1109 case 'X' | TYPE_IS_SHRIEKING:
1110 if (!len) /* Avoid division by 0 */
1111 len = 1;
08ca2aa3 1112 if (utf8) {
f7fe979e 1113 const char *hop, *last;
f337b084
TH
1114 I32 l = len;
1115 hop = last = strbeg;
1116 while (hop < s) {
1117 hop += UTF8SKIP(hop);
1118 if (--l == 0) {
08ca2aa3 1119 last = hop;
f337b084
TH
1120 l = len;
1121 }
fc241834 1122 }
f337b084
TH
1123 if (last > s)
1124 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1125 s = last;
1126 break;
f337b084
TH
1127 }
1128 len = (s - strbeg) % len;
62f95557 1129 /* FALL THROUGH */
a6ec74c1 1130 case 'X':
08ca2aa3
TH
1131 if (utf8) {
1132 while (len > 0) {
1133 if (s <= strbeg)
1134 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1135 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1136 if (s <= strbeg)
1137 Perl_croak(aTHX_ "'X' outside of string in unpack");
1138 }
1139 len--;
1140 }
1141 } else {
fc241834
RGS
1142 if (len > s - strbeg)
1143 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1144 s -= len;
08ca2aa3 1145 }
a6ec74c1 1146 break;
9e27e96a
AL
1147 case 'x' | TYPE_IS_SHRIEKING: {
1148 I32 ai32;
62f95557
IZ
1149 if (!len) /* Avoid division by 0 */
1150 len = 1;
230e1fce
NC
1151 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1152 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1153 if (ai32 == 0) break;
1154 len -= ai32;
9e27e96a 1155 }
62f95557 1156 /* FALL THROUGH */
a6ec74c1 1157 case 'x':
08ca2aa3
TH
1158 if (utf8) {
1159 while (len>0) {
1160 if (s >= strend)
1161 Perl_croak(aTHX_ "'x' outside of string in unpack");
1162 s += UTF8SKIP(s);
1163 len--;
1164 }
1165 } else {
fc241834
RGS
1166 if (len > strend - s)
1167 Perl_croak(aTHX_ "'x' outside of string in unpack");
1168 s += len;
f337b084 1169 }
a6ec74c1
JH
1170 break;
1171 case '/':
49704364
WL
1172 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1173 break;
a6ec74c1
JH
1174 case 'A':
1175 case 'Z':
1176 case 'a':
08ca2aa3
TH
1177 if (checksum) {
1178 /* Preliminary length estimate is assumed done in 'W' */
1179 if (len > strend - s) len = strend - s;
1180 goto W_checksum;
1181 }
1182 if (utf8) {
1183 I32 l;
f7fe979e 1184 const char *hop;
08ca2aa3
TH
1185 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1186 if (hop >= strend) {
1187 if (hop > strend)
1188 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1189 break;
fc241834 1190 }
a6ec74c1 1191 }
08ca2aa3
TH
1192 if (hop > strend)
1193 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1194 len = hop - s;
1195 } else if (len > strend - s)
1196 len = strend - s;
1197
1198 if (datumtype == 'Z') {
1199 /* 'Z' strips stuff after first null */
f7fe979e 1200 const char *ptr, *end;
f337b084
TH
1201 end = s + len;
1202 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1203 sv = newSVpvn(s, ptr-s);
1204 if (howlen == e_star) /* exact for 'Z*' */
1205 len = ptr-s + (ptr != strend ? 1 : 0);
1206 } else if (datumtype == 'A') {
1207 /* 'A' strips both nulls and spaces */
f7fe979e 1208 const char *ptr;
18bdf90a
TH
1209 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1210 for (ptr = s+len-1; ptr >= s; ptr--)
1211 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1212 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1213 if (ptr >= s) ptr += UTF8SKIP(ptr);
1214 else ptr++;
28be1210 1215 if (ptr > s+len)
18bdf90a
TH
1216 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1217 } else {
1218 for (ptr = s+len-1; ptr >= s; ptr--)
1219 if (*ptr != 0 && !isSPACE(*ptr)) break;
1220 ptr++;
1221 }
08ca2aa3
TH
1222 sv = newSVpvn(s, ptr-s);
1223 } else sv = newSVpvn(s, len);
1224
1225 if (utf8) {
1226 SvUTF8_on(sv);
1227 /* Undo any upgrade done due to need_utf8() */
f337b084 1228 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1229 sv_utf8_downgrade(sv, 0);
a6ec74c1 1230 }
6e449a3a 1231 mXPUSHs(sv);
08ca2aa3 1232 s += len;
a6ec74c1
JH
1233 break;
1234 case 'B':
08ca2aa3
TH
1235 case 'b': {
1236 char *str;
49704364 1237 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1238 len = (strend - s) * 8;
1239 if (checksum) {
f337b084 1240 if (utf8)
08ca2aa3 1241 while (len >= 8 && s < strend) {
f337b084 1242 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1243 len -= 8;
1244 }
f337b084 1245 else
fc241834 1246 while (len >= 8) {
08ca2aa3 1247 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1248 len -= 8;
1249 }
08ca2aa3
TH
1250 if (len && s < strend) {
1251 U8 bits;
f337b084
TH
1252 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1253 if (datumtype == 'b')
a6ec74c1 1254 while (len-- > 0) {
92d41999 1255 if (bits & 1) cuv++;
a6ec74c1
JH
1256 bits >>= 1;
1257 }
f337b084 1258 else
a6ec74c1 1259 while (len-- > 0) {
08ca2aa3 1260 if (bits & 0x80) cuv++;
a6ec74c1
JH
1261 bits <<= 1;
1262 }
fc241834 1263 }
a6ec74c1
JH
1264 break;
1265 }
08ca2aa3 1266
561b68a9 1267 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1268 SvPOK_on(sv);
1269 str = SvPVX(sv);
1270 if (datumtype == 'b') {
f337b084 1271 U8 bits = 0;
f7fe979e 1272 const I32 ai32 = len;
08ca2aa3
TH
1273 for (len = 0; len < ai32; len++) {
1274 if (len & 7) bits >>= 1;
1275 else if (utf8) {
1276 if (s >= strend) break;
f337b084 1277 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1278 } else bits = *(U8 *) s++;
1279 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1280 }
08ca2aa3 1281 } else {
f337b084 1282 U8 bits = 0;
f7fe979e 1283 const I32 ai32 = len;
08ca2aa3
TH
1284 for (len = 0; len < ai32; len++) {
1285 if (len & 7) bits <<= 1;
1286 else if (utf8) {
1287 if (s >= strend) break;
f337b084 1288 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1289 } else bits = *(U8 *) s++;
1290 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1291 }
1292 }
1293 *str = '\0';
aa07b2f6 1294 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1295 XPUSHs(sv);
a6ec74c1 1296 break;
08ca2aa3 1297 }
a6ec74c1 1298 case 'H':
08ca2aa3 1299 case 'h': {
3297d27d 1300 char *str = NULL;
fc241834 1301 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1302 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1303 len = (strend - s) * 2;
858fe5e1
TC
1304 if (!checksum) {
1305 sv = sv_2mortal(newSV(len ? len : 1));
1306 SvPOK_on(sv);
1307 str = SvPVX(sv);
1308 }
a6ec74c1 1309 if (datumtype == 'h') {
f337b084 1310 U8 bits = 0;
9e27e96a 1311 I32 ai32 = len;
fc241834
RGS
1312 for (len = 0; len < ai32; len++) {
1313 if (len & 1) bits >>= 4;
1314 else if (utf8) {
1315 if (s >= strend) break;
f337b084 1316 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1317 } else bits = * (U8 *) s++;
858fe5e1
TC
1318 if (!checksum)
1319 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1320 }
08ca2aa3 1321 } else {
f337b084 1322 U8 bits = 0;
f7fe979e 1323 const I32 ai32 = len;
08ca2aa3
TH
1324 for (len = 0; len < ai32; len++) {
1325 if (len & 1) bits <<= 4;
1326 else if (utf8) {
1327 if (s >= strend) break;
f337b084 1328 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1329 } else bits = *(U8 *) s++;
858fe5e1
TC
1330 if (!checksum)
1331 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1332 }
1333 }
858fe5e1
TC
1334 if (!checksum) {
1335 *str = '\0';
1336 SvCUR_set(sv, str - SvPVX_const(sv));
1337 XPUSHs(sv);
1338 }
a6ec74c1 1339 break;
08ca2aa3 1340 }
1651fc44
ML
1341 case 'C':
1342 if (len == 0) {
1343 if (explicit_length)
1344 /* Switch to "character" mode */
1345 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1346 break;
1347 }
1348 /* FALL THROUGH */
a6ec74c1 1349 case 'c':
1651fc44
ML
1350 while (len-- > 0 && s < strend) {
1351 int aint;
1352 if (utf8)
1353 {
1354 STRLEN retlen;
1355 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1357 if (retlen == (STRLEN) -1 || retlen == 0)
1358 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1359 s += retlen;
1360 }
1361 else
1362 aint = *(U8 *)(s)++;
1363 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1364 aint -= 256;
08ca2aa3 1365 if (!checksum)
6e449a3a 1366 mPUSHi(aint);
73cb7263
NC
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)aint;
1369 else
1370 cuv += aint;
a6ec74c1
JH
1371 }
1372 break;
08ca2aa3
TH
1373 case 'W':
1374 W_checksum:
1651fc44 1375 if (utf8) {
08ca2aa3 1376 while (len-- > 0 && s < strend) {
08ca2aa3 1377 STRLEN retlen;
f7fe979e 1378 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1379 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1380 if (retlen == (STRLEN) -1 || retlen == 0)
1381 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1382 s += retlen;
1383 if (!checksum)
6e449a3a 1384 mPUSHu(val);
08ca2aa3
TH
1385 else if (checksum > bits_in_uv)
1386 cdouble += (NV) val;
d6d3e8bd 1387 else
08ca2aa3 1388 cuv += val;
fc241834 1389 }
08ca2aa3 1390 } else if (!checksum)
a6ec74c1 1391 while (len-- > 0) {
f7fe979e 1392 const U8 ch = *(U8 *) s++;
6e449a3a 1393 mPUSHu(ch);
a6ec74c1 1394 }
08ca2aa3
TH
1395 else if (checksum > bits_in_uv)
1396 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1397 else
1398 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1399 break;
1400 case 'U':
35bcd338 1401 if (len == 0) {
c5333953 1402 if (explicit_length && howlen != e_star) {
08ca2aa3 1403 /* Switch to "bytes in UTF-8" mode */
f337b084 1404 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1405 else
1406 /* Should be impossible due to the need_utf8() test */
1407 Perl_croak(aTHX_ "U0 mode on a byte string");
1408 }
35bcd338
JH
1409 break;
1410 }
08ca2aa3 1411 if (len > strend - s) len = strend - s;
fc241834 1412 if (!checksum) {
08ca2aa3
TH
1413 if (len && unpack_only_one) len = 1;
1414 EXTEND(SP, len);
1415 EXTEND_MORTAL(len);
fc241834 1416 }
08ca2aa3
TH
1417 while (len-- > 0 && s < strend) {
1418 STRLEN retlen;
1419 UV auv;
1420 if (utf8) {
1421 U8 result[UTF8_MAXLEN];
f7fe979e 1422 const char *ptr = s;
08ca2aa3 1423 STRLEN len;
08ca2aa3
TH
1424 /* Bug: warns about bad utf8 even if we are short on bytes
1425 and will break out of the loop */
230e1fce
NC
1426 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1427 'U'))
08ca2aa3
TH
1428 break;
1429 len = UTF8SKIP(result);
fc241834 1430 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1431 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1432 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1433 s = ptr;
1434 } else {
cde84f2a 1435 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1436 if (retlen == (STRLEN) -1 || retlen == 0)
1437 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1438 s += retlen;
1439 }
1440 if (!checksum)
6e449a3a 1441 mPUSHu(auv);
73cb7263 1442 else if (checksum > bits_in_uv)
08ca2aa3 1443 cdouble += (NV) auv;
73cb7263 1444 else
08ca2aa3 1445 cuv += auv;
a6ec74c1
JH
1446 }
1447 break;
49704364
WL
1448 case 's' | TYPE_IS_SHRIEKING:
1449#if SHORTSIZE != SIZE16
73cb7263 1450 while (len-- > 0) {
08ca2aa3 1451 short ashort;
f337b084
TH
1452 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1453 DO_BO_UNPACK(ashort, s);
08ca2aa3 1454 if (!checksum)
6e449a3a 1455 mPUSHi(ashort);
73cb7263
NC
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)ashort;
1458 else
1459 cuv += ashort;
49704364
WL
1460 }
1461 break;
1462#else
1463 /* Fallthrough! */
a6ec74c1 1464#endif
49704364 1465 case 's':
73cb7263 1466 while (len-- > 0) {
08ca2aa3
TH
1467 I16 ai16;
1468
1469#if U16SIZE > SIZE16
1470 ai16 = 0;
1471#endif
f337b084 1472 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1473 DO_BO_UNPACK(ai16, 16);
1109a392 1474#if U16SIZE > SIZE16
73cb7263
NC
1475 if (ai16 > 32767)
1476 ai16 -= 65536;
a6ec74c1 1477#endif
08ca2aa3 1478 if (!checksum)
6e449a3a 1479 mPUSHi(ai16);
73cb7263
NC
1480 else if (checksum > bits_in_uv)
1481 cdouble += (NV)ai16;
1482 else
1483 cuv += ai16;
a6ec74c1
JH
1484 }
1485 break;
49704364
WL
1486 case 'S' | TYPE_IS_SHRIEKING:
1487#if SHORTSIZE != SIZE16
73cb7263 1488 while (len-- > 0) {
08ca2aa3 1489 unsigned short aushort;
f337b084
TH
1490 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1491 DO_BO_UNPACK(aushort, s);
08ca2aa3 1492 if (!checksum)
6e449a3a 1493 mPUSHu(aushort);
73cb7263
NC
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)aushort;
1496 else
1497 cuv += aushort;
49704364
WL
1498 }
1499 break;
1500#else
486ec47a 1501 /* Fallthrough! */
49704364 1502#endif
a6ec74c1
JH
1503 case 'v':
1504 case 'n':
1505 case 'S':
73cb7263 1506 while (len-- > 0) {
08ca2aa3
TH
1507 U16 au16;
1508#if U16SIZE > SIZE16
1509 au16 = 0;
1510#endif
f337b084 1511 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1512 DO_BO_UNPACK(au16, 16);
a6ec74c1 1513#ifdef HAS_NTOHS
73cb7263
NC
1514 if (datumtype == 'n')
1515 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1516#endif
1517#ifdef HAS_VTOHS
73cb7263
NC
1518 if (datumtype == 'v')
1519 au16 = vtohs(au16);
a6ec74c1 1520#endif
08ca2aa3 1521 if (!checksum)
6e449a3a 1522 mPUSHu(au16);
73cb7263 1523 else if (checksum > bits_in_uv)
f337b084 1524 cdouble += (NV) au16;
73cb7263
NC
1525 else
1526 cuv += au16;
a6ec74c1
JH
1527 }
1528 break;
7212898e 1529#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1530 case 'v' | TYPE_IS_SHRIEKING:
1531 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1532 while (len-- > 0) {
08ca2aa3
TH
1533 I16 ai16;
1534# if U16SIZE > SIZE16
1535 ai16 = 0;
1536# endif
f337b084 1537 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1538# ifdef HAS_NTOHS
73cb7263 1539 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1540 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1541# endif /* HAS_NTOHS */
1542# ifdef HAS_VTOHS
73cb7263 1543 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1544 ai16 = (I16) vtohs((U16) ai16);
1545# endif /* HAS_VTOHS */
1546 if (!checksum)
6e449a3a 1547 mPUSHi(ai16);
73cb7263 1548 else if (checksum > bits_in_uv)
08ca2aa3 1549 cdouble += (NV) ai16;
73cb7263
NC
1550 else
1551 cuv += ai16;
068bd2e7
MHM
1552 }
1553 break;
08ca2aa3 1554#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1555 case 'i':
49704364 1556 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1557 while (len-- > 0) {
08ca2aa3 1558 int aint;
f337b084
TH
1559 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1560 DO_BO_UNPACK(aint, i);
08ca2aa3 1561 if (!checksum)
6e449a3a 1562 mPUSHi(aint);
73cb7263
NC
1563 else if (checksum > bits_in_uv)
1564 cdouble += (NV)aint;
1565 else
1566 cuv += aint;
a6ec74c1
JH
1567 }
1568 break;
1569 case 'I':
49704364 1570 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1571 while (len-- > 0) {
08ca2aa3 1572 unsigned int auint;
f337b084
TH
1573 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1574 DO_BO_UNPACK(auint, i);
08ca2aa3 1575 if (!checksum)
6e449a3a 1576 mPUSHu(auint);
73cb7263
NC
1577 else if (checksum > bits_in_uv)
1578 cdouble += (NV)auint;
1579 else
1580 cuv += auint;
a6ec74c1
JH
1581 }
1582 break;
92d41999 1583 case 'j':
73cb7263 1584 while (len-- > 0) {
08ca2aa3 1585 IV aiv;
f337b084 1586 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1587#if IVSIZE == INTSIZE
f337b084 1588 DO_BO_UNPACK(aiv, i);
1109a392 1589#elif IVSIZE == LONGSIZE
f337b084 1590 DO_BO_UNPACK(aiv, l);
1109a392 1591#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1592 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1593#else
1594 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1595#endif
08ca2aa3 1596 if (!checksum)
6e449a3a 1597 mPUSHi(aiv);
73cb7263
NC
1598 else if (checksum > bits_in_uv)
1599 cdouble += (NV)aiv;
1600 else
1601 cuv += aiv;
92d41999
JH
1602 }
1603 break;
1604 case 'J':
73cb7263 1605 while (len-- > 0) {
08ca2aa3 1606 UV auv;
f337b084 1607 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1608#if IVSIZE == INTSIZE
f337b084 1609 DO_BO_UNPACK(auv, i);
08ca2aa3 1610#elif IVSIZE == LONGSIZE
f337b084 1611 DO_BO_UNPACK(auv, l);
08ca2aa3 1612#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1613 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1614#else
1615 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1616#endif
08ca2aa3 1617 if (!checksum)
6e449a3a 1618 mPUSHu(auv);
73cb7263
NC
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV)auv;
1621 else
1622 cuv += auv;
92d41999
JH
1623 }
1624 break;
49704364
WL
1625 case 'l' | TYPE_IS_SHRIEKING:
1626#if LONGSIZE != SIZE32
73cb7263 1627 while (len-- > 0) {
08ca2aa3 1628 long along;
f337b084
TH
1629 SHIFT_VAR(utf8, s, strend, along, datumtype);
1630 DO_BO_UNPACK(along, l);
08ca2aa3 1631 if (!checksum)
6e449a3a 1632 mPUSHi(along);
73cb7263
NC
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV)along;
1635 else
1636 cuv += along;
49704364
WL
1637 }
1638 break;
1639#else
1640 /* Fallthrough! */
a6ec74c1 1641#endif
49704364 1642 case 'l':
73cb7263 1643 while (len-- > 0) {
08ca2aa3
TH
1644 I32 ai32;
1645#if U32SIZE > SIZE32
1646 ai32 = 0;
1647#endif
f337b084 1648 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1649 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1650#if U32SIZE > SIZE32
08ca2aa3 1651 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1652#endif
08ca2aa3 1653 if (!checksum)
6e449a3a 1654 mPUSHi(ai32);
73cb7263
NC
1655 else if (checksum > bits_in_uv)
1656 cdouble += (NV)ai32;
1657 else
1658 cuv += ai32;
a6ec74c1
JH
1659 }
1660 break;
49704364
WL
1661 case 'L' | TYPE_IS_SHRIEKING:
1662#if LONGSIZE != SIZE32
73cb7263 1663 while (len-- > 0) {
08ca2aa3 1664 unsigned long aulong;
f337b084
TH
1665 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1666 DO_BO_UNPACK(aulong, l);
08ca2aa3 1667 if (!checksum)
6e449a3a 1668 mPUSHu(aulong);
73cb7263
NC
1669 else if (checksum > bits_in_uv)
1670 cdouble += (NV)aulong;
1671 else
1672 cuv += aulong;
49704364
WL
1673 }
1674 break;
1675#else
1676 /* Fall through! */
1677#endif
a6ec74c1
JH
1678 case 'V':
1679 case 'N':
1680 case 'L':
73cb7263 1681 while (len-- > 0) {
08ca2aa3
TH
1682 U32 au32;
1683#if U32SIZE > SIZE32
1684 au32 = 0;
1685#endif
f337b084 1686 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1687 DO_BO_UNPACK(au32, 32);
a6ec74c1 1688#ifdef HAS_NTOHL
73cb7263
NC
1689 if (datumtype == 'N')
1690 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1691#endif
1692#ifdef HAS_VTOHL
73cb7263
NC
1693 if (datumtype == 'V')
1694 au32 = vtohl(au32);
a6ec74c1 1695#endif
08ca2aa3 1696 if (!checksum)
6e449a3a 1697 mPUSHu(au32);
fc241834
RGS
1698 else if (checksum > bits_in_uv)
1699 cdouble += (NV)au32;
1700 else
1701 cuv += au32;
a6ec74c1
JH
1702 }
1703 break;
7212898e 1704#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1705 case 'V' | TYPE_IS_SHRIEKING:
1706 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1707 while (len-- > 0) {
08ca2aa3
TH
1708 I32 ai32;
1709# if U32SIZE > SIZE32
1710 ai32 = 0;
1711# endif
f337b084 1712 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1713# ifdef HAS_NTOHL
73cb7263
NC
1714 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1715 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1716# endif
1717# ifdef HAS_VTOHL
73cb7263
NC
1718 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1719 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1720# endif
1721 if (!checksum)
6e449a3a 1722 mPUSHi(ai32);
73cb7263
NC
1723 else if (checksum > bits_in_uv)
1724 cdouble += (NV)ai32;
1725 else
1726 cuv += ai32;
068bd2e7
MHM
1727 }
1728 break;
08ca2aa3 1729#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1730 case 'p':
a6ec74c1 1731 while (len-- > 0) {
f7fe979e 1732 const char *aptr;
f337b084 1733 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1734 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1735 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1736 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1737 }
1738 break;
1739 case 'w':
a6ec74c1
JH
1740 {
1741 UV auv = 0;
1742 U32 bytes = 0;
fc241834 1743
08ca2aa3
TH
1744 while (len > 0 && s < strend) {
1745 U8 ch;
f337b084 1746 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1747 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1748 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1749 if (ch < 0x80) {
a6ec74c1 1750 bytes = 0;
6e449a3a 1751 mPUSHu(auv);
a6ec74c1
JH
1752 len--;
1753 auv = 0;
08ca2aa3 1754 continue;
a6ec74c1 1755 }
08ca2aa3 1756 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1757 const char *t;
a6ec74c1 1758
f5992bc4 1759 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1760 while (s < strend) {
f337b084 1761 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1762 sv = mul128(sv, (U8)(ch & 0x7f));
1763 if (!(ch & 0x80)) {
a6ec74c1
JH
1764 bytes = 0;
1765 break;
1766 }
1767 }
10516c54 1768 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1769 while (*t == '0')
1770 t++;
1771 sv_chop(sv, t);
6e449a3a 1772 mPUSHs(sv);
a6ec74c1
JH
1773 len--;
1774 auv = 0;
1775 }
1776 }
1777 if ((s >= strend) && bytes)
49704364 1778 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1779 }
1780 break;
1781 case 'P':
49704364
WL
1782 if (symptr->howlen == e_star)
1783 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1784 EXTEND(SP, 1);
2d3e0934 1785 if (s + sizeof(char*) <= strend) {
08ca2aa3 1786 char *aptr;
f337b084 1787 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1788 DO_BO_UNPACK_PC(aptr);
fc241834 1789 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1790 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1791 }
a6ec74c1
JH
1792 break;
1793#ifdef HAS_QUAD
1794 case 'q':
73cb7263 1795 while (len-- > 0) {
08ca2aa3 1796 Quad_t aquad;
f337b084
TH
1797 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1798 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1799 if (!checksum)
6e449a3a
MHM
1800 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1801 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1802 else if (checksum > bits_in_uv)
1803 cdouble += (NV)aquad;
1804 else
1805 cuv += aquad;
1806 }
a6ec74c1
JH
1807 break;
1808 case 'Q':
73cb7263 1809 while (len-- > 0) {
08ca2aa3 1810 Uquad_t auquad;
f337b084
TH
1811 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1812 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1813 if (!checksum)
6e449a3a
MHM
1814 mPUSHs(auquad <= UV_MAX ?
1815 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1816 else if (checksum > bits_in_uv)
1817 cdouble += (NV)auquad;
1818 else
1819 cuv += auquad;
a6ec74c1
JH
1820 }
1821 break;
08ca2aa3 1822#endif /* HAS_QUAD */
a6ec74c1
JH
1823 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1824 case 'f':
73cb7263 1825 while (len-- > 0) {
08ca2aa3 1826 float afloat;
f337b084 1827 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1828 DO_BO_UNPACK_N(afloat, float);
1829 if (!checksum)
6e449a3a 1830 mPUSHn(afloat);
08ca2aa3 1831 else
73cb7263 1832 cdouble += afloat;
fc241834 1833 }
a6ec74c1
JH
1834 break;
1835 case 'd':
73cb7263 1836 while (len-- > 0) {
08ca2aa3 1837 double adouble;
f337b084 1838 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1839 DO_BO_UNPACK_N(adouble, double);
1840 if (!checksum)
6e449a3a 1841 mPUSHn(adouble);
08ca2aa3 1842 else
73cb7263 1843 cdouble += adouble;
fc241834 1844 }
a6ec74c1 1845 break;
92d41999 1846 case 'F':
73cb7263 1847 while (len-- > 0) {
275663fa
TC
1848 NV_bytes anv;
1849 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1850 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 1851 if (!checksum)
275663fa 1852 mPUSHn(anv.nv);
08ca2aa3 1853 else
275663fa 1854 cdouble += anv.nv;
fc241834 1855 }
92d41999
JH
1856 break;
1857#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1858 case 'D':
73cb7263 1859 while (len-- > 0) {
275663fa
TC
1860 ld_bytes aldouble;
1861 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1862 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 1863 if (!checksum)
275663fa 1864 mPUSHn(aldouble.ld);
08ca2aa3 1865 else
275663fa 1866 cdouble += aldouble.ld;
92d41999
JH
1867 }
1868 break;
1869#endif
a6ec74c1 1870 case 'u':
858fe5e1 1871 if (!checksum) {
f7fe979e 1872 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1873 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1874 if (l) SvPOK_on(sv);
1875 }
1876 if (utf8) {
1877 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1878 I32 a, b, c, d;
db187877 1879 char hunk[3];
08ca2aa3 1880
08ca2aa3
TH
1881 while (len > 0) {
1882 next_uni_uu(aTHX_ &s, strend, &a);
1883 next_uni_uu(aTHX_ &s, strend, &b);
1884 next_uni_uu(aTHX_ &s, strend, &c);
1885 next_uni_uu(aTHX_ &s, strend, &d);
1886 hunk[0] = (char)((a << 2) | (b >> 4));
1887 hunk[1] = (char)((b << 4) | (c >> 2));
1888 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1889 if (!checksum)
1890 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1891 len -= 3;
1892 }
1893 if (s < strend) {
f7fe979e
AL
1894 if (*s == '\n') {
1895 s++;
1896 }
08ca2aa3
TH
1897 else {
1898 /* possible checksum byte */
f7fe979e
AL
1899 const char *skip = s+UTF8SKIP(s);
1900 if (skip < strend && *skip == '\n')
1901 s = skip+1;
08ca2aa3
TH
1902 }
1903 }
1904 }
1905 } else {
fc241834
RGS
1906 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1907 I32 a, b, c, d;
db187877 1908 char hunk[3];
a6ec74c1 1909
fc241834
RGS
1910 len = PL_uudmap[*(U8*)s++] & 077;
1911 while (len > 0) {
1912 if (s < strend && ISUUCHAR(*s))
1913 a = PL_uudmap[*(U8*)s++] & 077;
1914 else
1915 a = 0;
1916 if (s < strend && ISUUCHAR(*s))
1917 b = PL_uudmap[*(U8*)s++] & 077;
1918 else
1919 b = 0;
1920 if (s < strend && ISUUCHAR(*s))
1921 c = PL_uudmap[*(U8*)s++] & 077;
1922 else
1923 c = 0;
1924 if (s < strend && ISUUCHAR(*s))
1925 d = PL_uudmap[*(U8*)s++] & 077;
1926 else
1927 d = 0;
1928 hunk[0] = (char)((a << 2) | (b >> 4));
1929 hunk[1] = (char)((b << 4) | (c >> 2));
1930 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1931 if (!checksum)
1932 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1933 len -= 3;
1934 }
1935 if (*s == '\n')
1936 s++;
1937 else /* possible checksum byte */
1938 if (s + 1 < strend && s[1] == '\n')
1939 s += 2;
a6ec74c1 1940 }
08ca2aa3 1941 }
858fe5e1
TC
1942 if (!checksum)
1943 XPUSHs(sv);
a6ec74c1
JH
1944 break;
1945 }
49704364 1946
a6ec74c1 1947 if (checksum) {
1109a392 1948 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1949 (checksum > bits_in_uv &&
08ca2aa3
TH
1950 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1951 NV trouble, anv;
a6ec74c1 1952
08ca2aa3 1953 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1954 while (checksum >= 16) {
1955 checksum -= 16;
08ca2aa3 1956 anv *= 65536.0;
a6ec74c1 1957 }
a6ec74c1 1958 while (cdouble < 0.0)
08ca2aa3
TH
1959 cdouble += anv;
1960 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1961 sv = newSVnv(cdouble);
a6ec74c1
JH
1962 }
1963 else {
fa8ec7c1
NC
1964 if (checksum < bits_in_uv) {
1965 UV mask = ((UV)1 << checksum) - 1;
92d41999 1966 cuv &= mask;
a6ec74c1 1967 }
c4c5f44a 1968 sv = newSVuv(cuv);
a6ec74c1 1969 }
6e449a3a 1970 mXPUSHs(sv);
a6ec74c1
JH
1971 checksum = 0;
1972 }
fc241834 1973
49704364
WL
1974 if (symptr->flags & FLAG_SLASH){
1975 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1976 break;
49704364
WL
1977 if( next_symbol(symptr) ){
1978 if( symptr->howlen == e_number )
1979 Perl_croak(aTHX_ "Count after length/code in unpack" );
1980 if( beyond ){
1981 /* ...end of char buffer then no decent length available */
1982 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1983 } else {
1984 /* take top of stack (hope it's numeric) */
1985 len = POPi;
1986 if( len < 0 )
1987 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1988 }
1989 } else {
1990 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1991 }
1992 datumtype = symptr->code;
21c16052 1993 explicit_length = FALSE;
49704364
WL
1994 goto redo_switch;
1995 }
a6ec74c1 1996 }
49704364 1997
18529408
IZ
1998 if (new_s)
1999 *new_s = s;
2000 PUTBACK;
2001 return SP - PL_stack_base - start_sp_offset;
2002}
2003
2004PP(pp_unpack)
2005{
97aff369 2006 dVAR;
18529408 2007 dSP;
bab9c0ac 2008 dPOPPOPssrl;
18529408
IZ
2009 I32 gimme = GIMME_V;
2010 STRLEN llen;
2011 STRLEN rlen;
5c144d81
NC
2012 const char *pat = SvPV_const(left, llen);
2013 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2014 const char *strend = s + rlen;
2015 const char *patend = pat + llen;
08ca2aa3 2016 I32 cnt;
18529408
IZ
2017
2018 PUTBACK;
7accc089 2019 cnt = unpackstring(pat, patend, s, strend,
49704364 2020 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2021 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2022
18529408
IZ
2023 SPAGAIN;
2024 if ( !cnt && gimme == G_SCALAR )
2025 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2026 RETURN;
2027}
2028
f337b084 2029STATIC U8 *
f7fe979e 2030doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2031{
f337b084 2032 *h++ = PL_uuemap[len];
a6ec74c1 2033 while (len > 2) {
f337b084
TH
2034 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2035 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2036 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2037 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2038 s += 3;
2039 len -= 3;
2040 }
2041 if (len > 0) {
f7fe979e 2042 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2043 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2044 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2045 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2046 *h++ = PL_uuemap[0];
a6ec74c1 2047 }
f337b084
TH
2048 *h++ = '\n';
2049 return h;
a6ec74c1
JH
2050}
2051
2052STATIC SV *
f7fe979e 2053S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2054{
8b6e33c7
AL
2055 SV *result = newSVpvn(s, l);
2056 char *const result_c = SvPV_nolen(result); /* convenience */
2057 char *out = result_c;
2058 bool skip = 1;
2059 bool ignore = 0;
a6ec74c1 2060
7918f24d
NC
2061 PERL_ARGS_ASSERT_IS_AN_INT;
2062
a6ec74c1
JH
2063 while (*s) {
2064 switch (*s) {
2065 case ' ':
2066 break;
2067 case '+':
2068 if (!skip) {
2069 SvREFCNT_dec(result);
2070 return (NULL);
2071 }
2072 break;
2073 case '0':
2074 case '1':
2075 case '2':
2076 case '3':
2077 case '4':
2078 case '5':
2079 case '6':
2080 case '7':
2081 case '8':
2082 case '9':
2083 skip = 0;
2084 if (!ignore) {
2085 *(out++) = *s;
2086 }
2087 break;
2088 case '.':
2089 ignore = 1;
2090 break;
2091 default:
2092 SvREFCNT_dec(result);
2093 return (NULL);
2094 }
2095 s++;
2096 }
2097 *(out++) = '\0';
2098 SvCUR_set(result, out - result_c);
2099 return (result);
2100}
2101
2102/* pnum must be '\0' terminated */
2103STATIC int
2104S_div128(pTHX_ SV *pnum, bool *done)
2105{
8b6e33c7
AL
2106 STRLEN len;
2107 char * const s = SvPV(pnum, len);
2108 char *t = s;
2109 int m = 0;
2110
7918f24d
NC
2111 PERL_ARGS_ASSERT_DIV128;
2112
8b6e33c7
AL
2113 *done = 1;
2114 while (*t) {
2115 const int i = m * 10 + (*t - '0');
2116 const int r = (i >> 7); /* r < 10 */
2117 m = i & 0x7F;
2118 if (r) {
2119 *done = 0;
2120 }
2121 *(t++) = '0' + r;
a6ec74c1 2122 }
8b6e33c7
AL
2123 *(t++) = '\0';
2124 SvCUR_set(pnum, (STRLEN) (t - s));
2125 return (m);
a6ec74c1
JH
2126}
2127
18529408 2128/*
7accc089
JH
2129=for apidoc packlist
2130
2131The engine implementing pack() Perl function.
2132
bfce84ec
AL
2133=cut
2134*/
7accc089
JH
2135
2136void
5aaab254 2137Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2138{
97aff369 2139 dVAR;
aadb217d
JH
2140 tempsym_t sym;
2141
7918f24d
NC
2142 PERL_ARGS_ASSERT_PACKLIST;
2143
f7fe979e 2144 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2145
f337b084
TH
2146 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2147 Also make sure any UTF8 flag is loaded */
56eb0262 2148 SvPV_force_nolen(cat);
bfce84ec
AL
2149 if (DO_UTF8(cat))
2150 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2151
49704364
WL
2152 (void)pack_rec( cat, &sym, beglist, endlist );
2153}
2154
f337b084
TH
2155/* like sv_utf8_upgrade, but also repoint the group start markers */
2156STATIC void
2157marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2158 STRLEN len;
2159 tempsym_t *group;
f7fe979e
AL
2160 const char *from_ptr, *from_start, *from_end, **marks, **m;
2161 char *to_start, *to_ptr;
f337b084
TH
2162
2163 if (SvUTF8(sv)) return;
2164
aa07b2f6 2165 from_start = SvPVX_const(sv);
f337b084
TH
2166 from_end = from_start + SvCUR(sv);
2167 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2168 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2169 if (from_ptr == from_end) {
2170 /* Simple case: no character needs to be changed */
2171 SvUTF8_on(sv);
2172 return;
2173 }
2174
3473cf63 2175 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2176 Newx(to_start, len, char);
f337b084
TH
2177 Copy(from_start, to_start, from_ptr-from_start, char);
2178 to_ptr = to_start + (from_ptr-from_start);
2179
a02a5408 2180 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2181 for (group=sym_ptr; group; group = group->previous)
2182 marks[group->level] = from_start + group->strbeg;
2183 marks[sym_ptr->level+1] = from_end+1;
2184 for (m = marks; *m < from_ptr; m++)
2185 *m = to_start + (*m-from_start);
2186
2187 for (;from_ptr < from_end; from_ptr++) {
2188 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2189 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2190 }
2191 *to_ptr = 0;
2192
2193 while (*m == from_ptr) *m++ = to_ptr;
2194 if (m != marks + sym_ptr->level+1) {
2195 Safefree(marks);
2196 Safefree(to_start);
5637ef5b
NC
2197 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2198 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2199 }
2200 for (group=sym_ptr; group; group = group->previous)
2201 group->strbeg = marks[group->level] - to_start;
2202 Safefree(marks);
2203
2204 if (SvOOK(sv)) {
2205 if (SvIVX(sv)) {
b162af07 2206 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2207 from_start -= SvIVX(sv);
2208 SvIV_set(sv, 0);
2209 }
2210 SvFLAGS(sv) &= ~SVf_OOK;
2211 }
2212 if (SvLEN(sv) != 0)
2213 Safefree(from_start);
f880fe2f 2214 SvPV_set(sv, to_start);
b162af07
SP
2215 SvCUR_set(sv, to_ptr - to_start);
2216 SvLEN_set(sv, len);
f337b084
TH
2217 SvUTF8_on(sv);
2218}
2219
2220/* Exponential string grower. Makes string extension effectively O(n)
2221 needed says how many extra bytes we need (not counting the final '\0')
2222 Only grows the string if there is an actual lack of space
2223*/
2224STATIC char *
0bd48802 2225S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2226 const STRLEN cur = SvCUR(sv);
2227 const STRLEN len = SvLEN(sv);
f337b084 2228 STRLEN extend;
7918f24d
NC
2229
2230 PERL_ARGS_ASSERT_SV_EXP_GROW;
2231
f337b084
TH
2232 if (len - cur > needed) return SvPVX(sv);
2233 extend = needed > len ? needed : len;
2234 return SvGROW(sv, len+extend+1);
2235}
49704364
WL
2236
2237STATIC
2238SV **
f337b084 2239S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2240{
97aff369 2241 dVAR;
49704364 2242 tempsym_t lookahead;
f337b084
TH
2243 I32 items = endlist - beglist;
2244 bool found = next_symbol(symptr);
2245 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2246 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2247
7918f24d
NC
2248 PERL_ARGS_ASSERT_PACK_REC;
2249
f337b084
TH
2250 if (symptr->level == 0 && found && symptr->code == 'U') {
2251 marked_upgrade(aTHX_ cat, symptr);
2252 symptr->flags |= FLAG_DO_UTF8;
2253 utf8 = 0;
49704364 2254 }
f337b084 2255 symptr->strbeg = SvCUR(cat);
49704364
WL
2256
2257 while (found) {
f337b084
TH
2258 SV *fromstr;
2259 STRLEN fromlen;
2260 I32 len;
a0714e2c 2261 SV *lengthcode = NULL;
49704364 2262 I32 datumtype = symptr->code;
f337b084
TH
2263 howlen_t howlen = symptr->howlen;
2264 char *start = SvPVX(cat);
2265 char *cur = start + SvCUR(cat);
49704364 2266
f337b084
TH
2267#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2268
2269 switch (howlen) {
fc241834 2270 case e_star:
f337b084
TH
2271 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2272 0 : items;
2273 break;
2274 default:
2275 /* e_no_len and e_number */
2276 len = symptr->length;
49704364
WL
2277 break;
2278 }
2279
f337b084 2280 if (len) {
a7a3cfaa 2281 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2282
a7a3cfaa
TH
2283 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2284 /* We can process this letter. */
2285 STRLEN size = props & PACK_SIZE_MASK;
2286 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2287 }
f337b084
TH
2288 }
2289
49704364
WL
2290 /* Look ahead for next symbol. Do we have code/code? */
2291 lookahead = *symptr;
2292 found = next_symbol(&lookahead);
246f24af
TH
2293 if (symptr->flags & FLAG_SLASH) {
2294 IV count;
f337b084 2295 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2296 if (strchr("aAZ", lookahead.code)) {
2297 if (lookahead.howlen == e_number) count = lookahead.length;
2298 else {
ce399ba6 2299 if (items > 0) {
48a5da33 2300 count = sv_len_utf8(*beglist);
ce399ba6 2301 }
246f24af
TH
2302 else count = 0;
2303 if (lookahead.code == 'Z') count++;
2304 }
2305 } else {
2306 if (lookahead.howlen == e_number && lookahead.length < items)
2307 count = lookahead.length;
2308 else count = items;
2309 }
2310 lookahead.howlen = e_number;
2311 lookahead.length = count;
2312 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2313 }
49704364 2314
fc241834
RGS
2315 /* Code inside the switch must take care to properly update
2316 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2317 doesn't simply leave using break */
1109a392 2318 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2319 default:
f337b084
TH
2320 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2321 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2322 case '%':
49704364 2323 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2324 {
2325 char *from;
2326#ifdef PERL_PACK_CAN_SHRIEKSIGN
2327 case '.' | TYPE_IS_SHRIEKING:
2328#endif
2329 case '.':
2330 if (howlen == e_star) from = start;
2331 else if (len == 0) from = cur;
2332 else {
2333 tempsym_t *group = symptr;
2334
2335 while (--len && group) group = group->previous;
2336 from = group ? start + group->strbeg : start;
2337 }
2338 fromstr = NEXTFROM;
2339 len = SvIV(fromstr);
2340 goto resize;
2341#ifdef PERL_PACK_CAN_SHRIEKSIGN
2342 case '@' | TYPE_IS_SHRIEKING:
2343#endif
a6ec74c1 2344 case '@':
28be1210
TH
2345 from = start + symptr->strbeg;
2346 resize:
2347#ifdef PERL_PACK_CAN_SHRIEKSIGN
2348 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2349#else /* PERL_PACK_CAN_SHRIEKSIGN */
2350 if (utf8)
2351#endif
2352 if (len >= 0) {
2353 while (len && from < cur) {
2354 from += UTF8SKIP(from);
2355 len--;
2356 }
2357 if (from > cur)
2358 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2359 if (len) {
2360 /* Here we know from == cur */
2361 grow:
2362 GROWING(0, cat, start, cur, len);
2363 Zero(cur, len, char);
2364 cur += len;
2365 } else if (from < cur) {
2366 len = cur - from;
2367 goto shrink;
2368 } else goto no_change;
2369 } else {
2370 cur = from;
2371 len = -len;
2372 goto utf8_shrink;
f337b084 2373 }
28be1210
TH
2374 else {
2375 len -= cur - from;
f337b084 2376 if (len > 0) goto grow;
28be1210 2377 if (len == 0) goto no_change;
fc241834 2378 len = -len;
28be1210 2379 goto shrink;
f337b084 2380 }
a6ec74c1 2381 break;
28be1210 2382 }
fc241834 2383 case '(': {
49704364 2384 tempsym_t savsym = *symptr;
66c611c5
MHM
2385 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2386 symptr->flags |= group_modifiers;
49704364
WL
2387 symptr->patend = savsym.grpend;
2388 symptr->level++;
f337b084 2389 symptr->previous = &lookahead;
18529408 2390 while (len--) {
f337b084
TH
2391 U32 was_utf8;
2392 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2393 else symptr->flags &= ~FLAG_PARSE_UTF8;
2394 was_utf8 = SvUTF8(cat);
49704364 2395 symptr->patptr = savsym.grpbeg;
f337b084
TH
2396 beglist = pack_rec(cat, symptr, beglist, endlist);
2397 if (SvUTF8(cat) != was_utf8)
2398 /* This had better be an upgrade while in utf8==0 mode */
2399 utf8 = 1;
2400
49704364 2401 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2402 break; /* No way to continue */
2403 }
ee790063 2404 items = endlist - beglist;
f337b084
TH
2405 lookahead.flags = symptr->flags & ~group_modifiers;
2406 goto no_change;
18529408 2407 }
62f95557
IZ
2408 case 'X' | TYPE_IS_SHRIEKING:
2409 if (!len) /* Avoid division by 0 */
2410 len = 1;
f337b084
TH
2411 if (utf8) {
2412 char *hop, *last;
2413 I32 l = len;
2414 hop = last = start;
2415 while (hop < cur) {
2416 hop += UTF8SKIP(hop);
2417 if (--l == 0) {
2418 last = hop;
2419 l = len;
2420 }
2421 }
2422 if (last > cur)
2423 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2424 cur = last;
2425 break;
2426 }
2427 len = (cur-start) % len;
62f95557 2428 /* FALL THROUGH */
a6ec74c1 2429 case 'X':
f337b084
TH
2430 if (utf8) {
2431 if (len < 1) goto no_change;
28be1210 2432 utf8_shrink:
f337b084
TH
2433 while (len > 0) {
2434 if (cur <= start)
28be1210
TH
2435 Perl_croak(aTHX_ "'%c' outside of string in pack",
2436 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2437 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2438 if (cur <= start)
28be1210
TH
2439 Perl_croak(aTHX_ "'%c' outside of string in pack",
2440 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2441 }
2442 len--;
2443 }
2444 } else {
fc241834 2445 shrink:
f337b084 2446 if (cur - start < len)
28be1210
TH
2447 Perl_croak(aTHX_ "'%c' outside of string in pack",
2448 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2449 cur -= len;
2450 }
2451 if (cur < start+symptr->strbeg) {
2452 /* Make sure group starts don't point into the void */
2453 tempsym_t *group;
9e27e96a 2454 const STRLEN length = cur-start;
f337b084
TH
2455 for (group = symptr;
2456 group && length < group->strbeg;
2457 group = group->previous) group->strbeg = length;
2458 lookahead.strbeg = length;
2459 }
a6ec74c1 2460 break;
fc241834
RGS
2461 case 'x' | TYPE_IS_SHRIEKING: {
2462 I32 ai32;
62f95557
IZ
2463 if (!len) /* Avoid division by 0 */
2464 len = 1;
230e1fce 2465 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2466 else ai32 = (cur - start) % len;
2467 if (ai32 == 0) goto no_change;
2468 len -= ai32;
2469 }
2470 /* FALL THROUGH */
a6ec74c1 2471 case 'x':
f337b084 2472 goto grow;
a6ec74c1
JH
2473 case 'A':
2474 case 'Z':
f337b084 2475 case 'a': {
f7fe979e 2476 const char *aptr;
f337b084 2477
a6ec74c1 2478 fromstr = NEXTFROM;
e62f0680 2479 aptr = SvPV_const(fromstr, fromlen);
f337b084 2480 if (DO_UTF8(fromstr)) {
f7fe979e 2481 const char *end, *s;
f337b084
TH
2482
2483 if (!utf8 && !SvUTF8(cat)) {
2484 marked_upgrade(aTHX_ cat, symptr);
2485 lookahead.flags |= FLAG_DO_UTF8;
2486 lookahead.strbeg = symptr->strbeg;
2487 utf8 = 1;
2488 start = SvPVX(cat);
2489 cur = start + SvCUR(cat);
2490 }
fc241834 2491 if (howlen == e_star) {
f337b084
TH
2492 if (utf8) goto string_copy;
2493 len = fromlen+1;
2494 }
2495 s = aptr;
2496 end = aptr + fromlen;
2497 fromlen = datumtype == 'Z' ? len-1 : len;
2498 while ((I32) fromlen > 0 && s < end) {
2499 s += UTF8SKIP(s);
2500 fromlen--;
2501 }
2502 if (s > end)
2503 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2504 if (utf8) {
fc241834 2505 len = fromlen;
f337b084
TH
2506 if (datumtype == 'Z') len++;
2507 fromlen = s-aptr;
2508 len += fromlen;
fc241834 2509
f337b084 2510 goto string_copy;
fc241834 2511 }
f337b084
TH
2512 fromlen = len - fromlen;
2513 if (datumtype == 'Z') fromlen--;
2514 if (howlen == e_star) {
2515 len = fromlen;
2516 if (datumtype == 'Z') len++;
fc241834 2517 }
f337b084 2518 GROWING(0, cat, start, cur, len);
fc241834 2519 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2520 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2521 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2522 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2523 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2524 cur += fromlen;
a6ec74c1 2525 len -= fromlen;
f337b084
TH
2526 } else if (utf8) {
2527 if (howlen == e_star) {
2528 len = fromlen;
2529 if (datumtype == 'Z') len++;
a6ec74c1 2530 }
f337b084
TH
2531 if (len <= (I32) fromlen) {
2532 fromlen = len;
2533 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2534 }
fc241834 2535 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2536 upgrade, so:
2537 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2538 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2539 len -= fromlen;
2540 while (fromlen > 0) {
230e1fce 2541 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2542 aptr++;
2543 fromlen--;
fc241834 2544 }
f337b084
TH
2545 } else {
2546 string_copy:
2547 if (howlen == e_star) {
2548 len = fromlen;
2549 if (datumtype == 'Z') len++;
2550 }
2551 if (len <= (I32) fromlen) {
2552 fromlen = len;
2553 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2554 }
f337b084
TH
2555 GROWING(0, cat, start, cur, len);
2556 Copy(aptr, cur, fromlen, char);
2557 cur += fromlen;
2558 len -= fromlen;
a6ec74c1 2559 }
f337b084
TH
2560 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2561 cur += len;
3c4fb04a 2562 SvTAINT(cat);
a6ec74c1 2563 break;
f337b084 2564 }
a6ec74c1 2565 case 'B':
f337b084 2566 case 'b': {
b83604b4 2567 const char *str, *end;
f337b084
TH
2568 I32 l, field_len;
2569 U8 bits;
2570 bool utf8_source;
2571 U32 utf8_flags;
a6ec74c1 2572
fc241834 2573 fromstr = NEXTFROM;
b83604b4 2574 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2575 end = str + fromlen;
2576 if (DO_UTF8(fromstr)) {
2577 utf8_source = TRUE;
041457d9 2578 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2579 } else {
2580 utf8_source = FALSE;
2581 utf8_flags = 0; /* Unused, but keep compilers happy */
2582 }
2583 if (howlen == e_star) len = fromlen;
2584 field_len = (len+7)/8;
2585 GROWING(utf8, cat, start, cur, field_len);
2586 if (len > (I32)fromlen) len = fromlen;
2587 bits = 0;
2588 l = 0;
2589 if (datumtype == 'B')
2590 while (l++ < len) {
2591 if (utf8_source) {
95b63a38 2592 UV val = 0;
f337b084
TH
2593 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2594 bits |= val & 1;
2595 } else bits |= *str++ & 1;
2596 if (l & 7) bits <<= 1;
fc241834 2597 else {
f337b084
TH
2598 PUSH_BYTE(utf8, cur, bits);
2599 bits = 0;
a6ec74c1
JH
2600 }
2601 }
f337b084
TH
2602 else
2603 /* datumtype == 'b' */
2604 while (l++ < len) {
2605 if (utf8_source) {
95b63a38 2606 UV val = 0;
f337b084
TH
2607 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2608 if (val & 1) bits |= 0x80;
2609 } else if (*str++ & 1)
2610 bits |= 0x80;
2611 if (l & 7) bits >>= 1;
fc241834 2612 else {
f337b084
TH
2613 PUSH_BYTE(utf8, cur, bits);
2614 bits = 0;
a6ec74c1
JH
2615 }
2616 }
f337b084
TH
2617 l--;
2618 if (l & 7) {
fc241834 2619 if (datumtype == 'B')
f337b084 2620 bits <<= 7 - (l & 7);
fc241834 2621 else
f337b084
TH
2622 bits >>= 7 - (l & 7);
2623 PUSH_BYTE(utf8, cur, bits);
2624 l += 7;
a6ec74c1 2625 }
f337b084
TH
2626 /* Determine how many chars are left in the requested field */
2627 l /= 8;
2628 if (howlen == e_star) field_len = 0;
2629 else field_len -= l;
2630 Zero(cur, field_len, char);
2631 cur += field_len;
a6ec74c1 2632 break;
f337b084 2633 }
a6ec74c1 2634 case 'H':
f337b084 2635 case 'h': {
10516c54 2636 const char *str, *end;
f337b084
TH
2637 I32 l, field_len;
2638 U8 bits;
2639 bool utf8_source;
2640 U32 utf8_flags;
a6ec74c1 2641
fc241834 2642 fromstr = NEXTFROM;
10516c54 2643 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2644 end = str + fromlen;
2645 if (DO_UTF8(fromstr)) {
2646 utf8_source = TRUE;
041457d9 2647 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2648 } else {
2649 utf8_source = FALSE;
2650 utf8_flags = 0; /* Unused, but keep compilers happy */
2651 }
2652 if (howlen == e_star) len = fromlen;
2653 field_len = (len+1)/2;
2654 GROWING(utf8, cat, start, cur, field_len);
2655 if (!utf8 && len > (I32)fromlen) len = fromlen;
2656 bits = 0;
2657 l = 0;
2658 if (datumtype == 'H')
2659 while (l++ < len) {
2660 if (utf8_source) {
95b63a38 2661 UV val = 0;
f337b084
TH
2662 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2663 if (val < 256 && isALPHA(val))
2664 bits |= (val + 9) & 0xf;
a6ec74c1 2665 else
f337b084
TH
2666 bits |= val & 0xf;
2667 } else if (isALPHA(*str))
2668 bits |= (*str++ + 9) & 0xf;
2669 else
2670 bits |= *str++ & 0xf;
2671 if (l & 1) bits <<= 4;
fc241834 2672 else {
f337b084
TH
2673 PUSH_BYTE(utf8, cur, bits);
2674 bits = 0;
a6ec74c1
JH
2675 }
2676 }
f337b084
TH
2677 else
2678 while (l++ < len) {
2679 if (utf8_source) {
95b63a38 2680 UV val = 0;
f337b084
TH
2681 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2682 if (val < 256 && isALPHA(val))
2683 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2684 else
f337b084
TH
2685 bits |= (val & 0xf) << 4;
2686 } else if (isALPHA(*str))
2687 bits |= ((*str++ + 9) & 0xf) << 4;
2688 else
2689 bits |= (*str++ & 0xf) << 4;
2690 if (l & 1) bits >>= 4;
fc241834 2691 else {
f337b084
TH
2692 PUSH_BYTE(utf8, cur, bits);
2693 bits = 0;
a6ec74c1 2694 }
fc241834 2695 }
f337b084
TH
2696 l--;
2697 if (l & 1) {
2698 PUSH_BYTE(utf8, cur, bits);
2699 l++;
2700 }
2701 /* Determine how many chars are left in the requested field */
2702 l /= 2;
2703 if (howlen == e_star) field_len = 0;
2704 else field_len -= l;
2705 Zero(cur, field_len, char);
2706 cur += field_len;
2707 break;
fc241834
RGS
2708 }
2709 case 'c':
f337b084
TH
2710 while (len-- > 0) {
2711 IV aiv;
2712 fromstr = NEXTFROM;
2713 aiv = SvIV(fromstr);
a2a5de95
NC
2714 if ((-128 > aiv || aiv > 127))
2715 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2716 "Character in 'c' format wrapped in pack");
585ec06d 2717 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2718 }
2719 break;
2720 case 'C':
f337b084
TH
2721 if (len == 0) {
2722 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2723 break;
2724 }
a6ec74c1 2725 while (len-- > 0) {
f337b084 2726 IV aiv;
a6ec74c1 2727 fromstr = NEXTFROM;
f337b084 2728 aiv = SvIV(fromstr);
a2a5de95
NC
2729 if ((0 > aiv || aiv > 0xff))
2730 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2731 "Character in 'C' format wrapped in pack");
1651fc44 2732 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2733 }
fc241834
RGS
2734 break;
2735 case 'W': {
2736 char *end;
670f1322 2737 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2738
2739 end = start+SvLEN(cat)-1;
2740 if (utf8) end -= UTF8_MAXLEN-1;
2741 while (len-- > 0) {
2742 UV auv;
2743 fromstr = NEXTFROM;
2744 auv = SvUV(fromstr);
2745 if (in_bytes) auv = auv % 0x100;
2746 if (utf8) {
2747 W_utf8:
2748 if (cur > end) {
2749 *cur = '\0';
b162af07 2750 SvCUR_set(cat, cur - start);
fc241834
RGS
2751
2752 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2753 end = start+SvLEN(cat)-UTF8_MAXLEN;
2754 }
230e1fce
NC
2755 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2756 NATIVE_TO_UNI(auv),
041457d9 2757 warn_utf8 ?
230e1fce 2758 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2759 } else {
2760 if (auv >= 0x100) {
2761 if (!SvUTF8(cat)) {
2762 *cur = '\0';
b162af07 2763 SvCUR_set(cat, cur - start);
fc241834
RGS
2764 marked_upgrade(aTHX_ cat, symptr);
2765 lookahead.flags |= FLAG_DO_UTF8;
2766 lookahead.strbeg = symptr->strbeg;
2767 utf8 = 1;
2768 start = SvPVX(cat);
2769 cur = start + SvCUR(cat);
2770 end = start+SvLEN(cat)-UTF8_MAXLEN;
2771 goto W_utf8;
2772 }
a2a5de95
NC
2773 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2774 "Character in 'W' format wrapped in pack");
fc241834
RGS
2775 auv &= 0xff;
2776 }
2777 if (cur >= end) {
2778 *cur = '\0';
b162af07 2779 SvCUR_set(cat, cur - start);
fc241834
RGS
2780 GROWING(0, cat, start, cur, len+1);
2781 end = start+SvLEN(cat)-1;
2782 }
fe2774ed 2783 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2784 }
2785 }
2786 break;
fc241834
RGS
2787 }
2788 case 'U': {
2789 char *end;
2790
2791 if (len == 0) {
2792 if (!(symptr->flags & FLAG_DO_UTF8)) {
2793 marked_upgrade(aTHX_ cat, symptr);
2794 lookahead.flags |= FLAG_DO_UTF8;
2795 lookahead.strbeg = symptr->strbeg;
2796 }
2797 utf8 = 0;
2798 goto no_change;
2799 }
2800
2801 end = start+SvLEN(cat);
2802 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2803 while (len-- > 0) {
fc241834 2804 UV auv;
a6ec74c1 2805 fromstr = NEXTFROM;
fc241834
RGS
2806 auv = SvUV(fromstr);
2807 if (utf8) {
230e1fce 2808 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2809 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2810 warn_utf8 ?
fc241834
RGS
2811 0 : UNICODE_ALLOW_ANY);
2812 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2813 *cur = '\0';
b162af07 2814 SvCUR_set(cat, cur - start);
fc241834
RGS
2815 GROWING(0, cat, start, cur,
2816 len+(endb-buffer)*UTF8_EXPAND);
2817 end = start+SvLEN(cat);
2818 }
64844641 2819 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2820 } else {
2821 if (cur >= end) {
2822 *cur = '\0';
b162af07 2823 SvCUR_set(cat, cur - start);
fc241834
RGS
2824 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2825 end = start+SvLEN(cat)-UTF8_MAXLEN;
2826 }
230e1fce 2827 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2828 warn_utf8 ?
230e1fce 2829 0 : UNICODE_ALLOW_ANY);
fc241834 2830 }
a6ec74c1 2831 }
a6ec74c1 2832 break;
fc241834 2833 }
a6ec74c1
JH
2834 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2835 case 'f':
a6ec74c1 2836 while (len-- > 0) {
f337b084
TH
2837 float afloat;
2838 NV anv;
a6ec74c1 2839 fromstr = NEXTFROM;
f337b084 2840 anv = SvNV(fromstr);
85bba25f 2841# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2842 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2843 * on Alpha; fake it if we don't have them.
2844 */
f337b084 2845 if (anv > FLT_MAX)
fc241834 2846 afloat = FLT_MAX;
f337b084 2847 else if (anv < -FLT_MAX)
fc241834 2848 afloat = -FLT_MAX;
f337b084 2849 else afloat = (float)anv;
baf3cf9c 2850# else
f337b084 2851 afloat = (float)anv;
baf3cf9c 2852# endif
1109a392 2853 DO_BO_PACK_N(afloat, float);
f337b084 2854 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2855 }
2856 break;
2857 case 'd':
a6ec74c1 2858 while (len-- > 0) {
f337b084
TH
2859 double adouble;
2860 NV anv;
a6ec74c1 2861 fromstr = NEXTFROM;
f337b084 2862 anv = SvNV(fromstr);
85bba25f 2863# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2864 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2865 * on Alpha; fake it if we don't have them.
2866 */
f337b084 2867 if (anv > DBL_MAX)
fc241834 2868 adouble = DBL_MAX;
f337b084 2869 else if (anv < -DBL_MAX)
fc241834 2870 adouble = -DBL_MAX;
f337b084 2871 else adouble = (double)anv;
baf3cf9c 2872# else
f337b084 2873 adouble = (double)anv;
baf3cf9c 2874# endif
1109a392 2875 DO_BO_PACK_N(adouble, double);
f337b084 2876 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2877 }
2878 break;
fc241834 2879 case 'F': {
275663fa 2880 NV_bytes anv;
1109a392 2881 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2882 while (len-- > 0) {
2883 fromstr = NEXTFROM;
cd07c537
DM
2884#ifdef __GNUC__
2885 /* to work round a gcc/x86 bug; don't use SvNV */
2886 anv.nv = sv_2nv(fromstr);
2887#else
275663fa 2888 anv.nv = SvNV(fromstr);
cd07c537 2889#endif
1109a392 2890 DO_BO_PACK_N(anv, NV);
275663fa 2891 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2892 }
2893 break;
fc241834 2894 }
92d41999 2895#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2896 case 'D': {
275663fa 2897 ld_bytes aldouble;
1109a392
MHM
2898 /* long doubles can have unused bits, which may be nonzero */
2899 Zero(&aldouble, 1, long double);
92d41999
JH
2900 while (len-- > 0) {
2901 fromstr = NEXTFROM;
cd07c537
DM
2902# ifdef __GNUC__
2903 /* to work round a gcc/x86 bug; don't use SvNV */
2904 aldouble.ld = (long double)sv_2nv(fromstr);
2905# else
275663fa 2906 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2907# endif
1109a392 2908 DO_BO_PACK_N(aldouble, long double);
275663fa 2909 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2910 }
2911 break;
fc241834 2912 }
92d41999 2913#endif
7212898e 2914#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2915 case 'n' | TYPE_IS_SHRIEKING:
7212898e 2916#endif
a6ec74c1
JH
2917 case 'n':
2918 while (len-- > 0) {
f337b084 2919 I16 ai16;
a6ec74c1 2920 fromstr = NEXTFROM;
ef108786 2921 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2922#ifdef HAS_HTONS
ef108786 2923 ai16 = PerlSock_htons(ai16);
a6ec74c1 2924#endif
f337b084 2925 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2926 }
2927 break;
7212898e 2928#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2929 case 'v' | TYPE_IS_SHRIEKING:
7212898e 2930#endif
a6ec74c1
JH
2931 case 'v':
2932 while (len-- > 0) {
f337b084 2933 I16 ai16;
a6ec74c1 2934 fromstr = NEXTFROM;
ef108786 2935 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2936#ifdef HAS_HTOVS
ef108786 2937 ai16 = htovs(ai16);
a6ec74c1 2938#endif
f337b084 2939 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2940 }
2941 break;
49704364 2942 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2943#if SHORTSIZE != SIZE16
fc241834 2944 while (len-- > 0) {
f337b084 2945 unsigned short aushort;
fc241834
RGS
2946 fromstr = NEXTFROM;
2947 aushort = SvUV(fromstr);
2948 DO_BO_PACK(aushort, s);
f337b084 2949 PUSH_VAR(utf8, cur, aushort);
fc241834 2950 }
49704364
WL
2951 break;
2952#else
2953 /* Fall through! */
a6ec74c1 2954#endif
49704364 2955 case 'S':
fc241834 2956 while (len-- > 0) {
f337b084 2957 U16 au16;
fc241834
RGS
2958 fromstr = NEXTFROM;
2959 au16 = (U16)SvUV(fromstr);
2960 DO_BO_PACK(au16, 16);
f337b084 2961 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2962 }
2963 break;
49704364 2964 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2965#if SHORTSIZE != SIZE16
fc241834 2966 while (len-- > 0) {
f337b084 2967 short ashort;
fc241834
RGS
2968 fromstr = NEXTFROM;
2969 ashort = SvIV(fromstr);
2970 DO_BO_PACK(ashort, s);
f337b084 2971 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2972 }
49704364
WL
2973 break;
2974#else
2975 /* Fall through! */
a6ec74c1 2976#endif
49704364
WL
2977 case 's':
2978 while (len-- > 0) {
f337b084 2979 I16 ai16;
49704364 2980 fromstr = NEXTFROM;
ef108786
MHM
2981 ai16 = (I16)SvIV(fromstr);
2982 DO_BO_PACK(ai16, 16);
f337b084 2983 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2984 }
2985 break;
2986 case 'I':
49704364 2987 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2988 while (len-- > 0) {
f337b084 2989 unsigned int auint;
a6ec74c1
JH
2990 fromstr = NEXTFROM;
2991 auint = SvUV(fromstr);
1109a392 2992 DO_BO_PACK(auint, i);
f337b084 2993 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2994 }
2995 break;
92d41999
JH
2996 case 'j':
2997 while (len-- > 0) {
f337b084 2998 IV aiv;
92d41999
JH
2999 fromstr = NEXTFROM;
3000 aiv = SvIV(fromstr);
1109a392
MHM
3001#if IVSIZE == INTSIZE
3002 DO_BO_PACK(aiv, i);
3003#elif IVSIZE == LONGSIZE
3004 DO_BO_PACK(aiv, l);
3005#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3006 DO_BO_PACK(aiv, 64);
f337b084
TH
3007#else
3008 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3009#endif
f337b084 3010 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3011 }
3012 break;
3013 case 'J':
3014 while (len-- > 0) {
f337b084 3015 UV auv;
92d41999
JH
3016 fromstr = NEXTFROM;
3017 auv = SvUV(fromstr);
1109a392
MHM
3018#if UVSIZE == INTSIZE
3019 DO_BO_PACK(auv, i);
3020#elif UVSIZE == LONGSIZE
3021 DO_BO_PACK(auv, l);
3022#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3023 DO_BO_PACK(auv, 64);
f337b084
TH
3024#else
3025 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3026#endif
f337b084 3027 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3028 }
3029 break;
a6ec74c1
JH
3030 case 'w':
3031 while (len-- > 0) {
f337b084 3032 NV anv;
a6ec74c1 3033 fromstr = NEXTFROM;
15e9f109 3034 anv = SvNV(fromstr);
a6ec74c1 3035
f337b084
TH
3036 if (anv < 0) {
3037 *cur = '\0';
b162af07 3038 SvCUR_set(cat, cur - start);
49704364 3039 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3040 }
a6ec74c1 3041
196b62db
NC
3042 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3043 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3044 any negative IVs will have already been got by the croak()
3045 above. IOK is untrue for fractions, so we test them
3046 against UV_MAX_P1. */
f337b084
TH
3047 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3048 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3049 char *in = buf + sizeof(buf);
196b62db 3050 UV auv = SvUV(fromstr);
a6ec74c1
JH
3051
3052 do {
eb160463 3053 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3054 auv >>= 7;
3055 } while (auv);
3056 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3057 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3058 in, (buf + sizeof(buf)) - in);
3059 } else if (SvPOKp(fromstr))
3060 goto w_string;
a6ec74c1 3061 else if (SvNOKp(fromstr)) {
0258719b 3062 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3063 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3064 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3065 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3066 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3067 Some C compilers are strict about integral constant
3068 expressions so we conservatively divide by a slightly
3069 smaller integer instead of multiplying by the exact
3070 floating-point value.
0258719b
NC
3071 */
3072#ifdef NV_MAX_10_EXP
f337b084 3073 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3074 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3075#else
f337b084 3076 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3077 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3078#endif
a6ec74c1
JH
3079 char *in = buf + sizeof(buf);
3080
8b6e33c7 3081 anv = Perl_floor(anv);
a6ec74c1 3082 do {
8b6e33c7 3083 const NV next = Perl_floor(anv / 128);
a6ec74c1 3084 if (in <= buf) /* this cannot happen ;-) */
49704364 3085 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3086 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3087 anv = next;
3088 } while (anv > 0);
a6ec74c1 3089 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3090 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3091 in, (buf + sizeof(buf)) - in);
3092 } else {
8b6e33c7
AL
3093 const char *from;
3094 char *result, *in;
735b914b
JH
3095 SV *norm;
3096 STRLEN len;
3097 bool done;
3098
f337b084 3099 w_string:
735b914b 3100 /* Copy string and check for compliance */
349d4f2f 3101 from = SvPV_const(fromstr, len);
735b914b 3102 if ((norm = is_an_int(from, len)) == NULL)
49704364 3103 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3104
a02a5408 3105 Newx(result, len, char);
735b914b
JH
3106 in = result + len;
3107 done = FALSE;
f337b084 3108 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3109 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3110 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3111 in, (result + len) - in);
735b914b
JH
3112 Safefree(result);
3113 SvREFCNT_dec(norm); /* free norm */
fc241834 3114 }
a6ec74c1
JH
3115 }
3116 break;
3117 case 'i':
49704364 3118 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3119 while (len-- > 0) {
f337b084 3120 int aint;
a6ec74c1
JH
3121 fromstr = NEXTFROM;
3122 aint = SvIV(fromstr);
1109a392 3123 DO_BO_PACK(aint, i);
f337b084 3124 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3125 }
3126 break;
7212898e 3127#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3128 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3129#endif
a6ec74c1
JH
3130 case 'N':
3131 while (len-- > 0) {
f337b084 3132 U32 au32;
a6ec74c1 3133 fromstr = NEXTFROM;
ef108786 3134 au32 = SvUV(fromstr);
a6ec74c1 3135#ifdef HAS_HTONL
ef108786 3136 au32 = PerlSock_htonl(au32);
a6ec74c1 3137#endif
f337b084 3138 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3139 }
3140 break;
7212898e 3141#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3142 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3143#endif
a6ec74c1
JH
3144 case 'V':
3145 while (len-- > 0) {
f337b084 3146 U32 au32;
a6ec74c1 3147 fromstr = NEXTFROM;
ef108786 3148 au32 = SvUV(fromstr);
a6ec74c1 3149#ifdef HAS_HTOVL
ef108786 3150 au32 = htovl(au32);
a6ec74c1 3151#endif
f337b084 3152 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3153 }
3154 break;
49704364 3155 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3156#if LONGSIZE != SIZE32
fc241834 3157 while (len-- > 0) {
f337b084 3158 unsigned long aulong;
fc241834
RGS
3159 fromstr = NEXTFROM;
3160 aulong = SvUV(fromstr);
3161 DO_BO_PACK(aulong, l);
f337b084 3162 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3163 }
49704364
WL
3164 break;
3165#else
3166 /* Fall though! */
a6ec74c1 3167#endif
49704364 3168 case 'L':
fc241834 3169 while (len-- > 0) {
f337b084 3170 U32 au32;
fc241834
RGS
3171 fromstr = NEXTFROM;
3172 au32 = SvUV(fromstr);
3173 DO_BO_PACK(au32, 32);
f337b084 3174 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3175 }
3176 break;
49704364 3177 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3178#if LONGSIZE != SIZE32
fc241834 3179 while (len-- > 0) {
f337b084 3180 long along;
fc241834
RGS
3181 fromstr = NEXTFROM;
3182 along = SvIV(fromstr);
3183 DO_BO_PACK(along, l);
f337b084 3184 PUSH_VAR(utf8, cur, along);
a6ec74c1 3185 }
49704364
WL
3186 break;
3187#else
3188 /* Fall though! */
a6ec74c1 3189#endif
49704364
WL
3190 case 'l':
3191 while (len-- > 0) {
f337b084 3192 I32 ai32;
49704364 3193 fromstr = NEXTFROM;
ef108786
MHM
3194 ai32 = SvIV(fromstr);
3195 DO_BO_PACK(ai32, 32);
f337b084 3196 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3197 }
3198 break;
3199#ifdef HAS_QUAD
3200 case 'Q':
3201 while (len-- > 0) {
f337b084 3202 Uquad_t auquad;
a6ec74c1 3203 fromstr = NEXTFROM;
f337b084 3204 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3205 DO_BO_PACK(auquad, 64);
f337b084 3206 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3207 }
3208 break;
3209 case 'q':
3210 while (len-- > 0) {
f337b084 3211 Quad_t aquad;
a6ec74c1
JH
3212 fromstr = NEXTFROM;
3213 aquad = (Quad_t)SvIV(fromstr);
1109a392 3214 DO_BO_PACK(aquad, 64);
f337b084 3215 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3216 }
3217 break;
f337b084 3218#endif /* HAS_QUAD */
a6ec74c1
JH
3219 case 'P':
3220 len = 1; /* assume SV is correct length */
f337b084 3221 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3222 /* Fall through! */
a6ec74c1
JH
3223 case 'p':
3224 while (len-- > 0) {
83003860 3225 const char *aptr;
f337b084 3226
a6ec74c1 3227 fromstr = NEXTFROM;
28a4f200
TH
3228 SvGETMAGIC(fromstr);
3229 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3230 else {
a6ec74c1
JH
3231 /* XXX better yet, could spirit away the string to
3232 * a safe spot and hang on to it until the result
3233 * of pack() (and all copies of the result) are
3234 * gone.
3235 */
041457d9 3236 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3237 !SvREADONLY(fromstr)))) {
3238 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3239 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3240 }
3241 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3242 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3243 else
2596d9fe 3244 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3245 }
07409e01 3246 DO_BO_PACK_PC(aptr);
f337b084 3247 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3248 }
3249 break;
fc241834 3250 case 'u': {
f7fe979e 3251 const char *aptr, *aend;
fc241834 3252 bool from_utf8;
f337b084 3253
a6ec74c1 3254 fromstr = NEXTFROM;
fc241834
RGS
3255 if (len <= 2) len = 45;
3256 else len = len / 3 * 3;
3257 if (len >= 64) {
a2a5de95
NC
3258 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3259 "Field too wide in 'u' format in pack");
fc241834
RGS
3260 len = 63;
3261 }
83003860 3262 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3263 from_utf8 = DO_UTF8(fromstr);
3264 if (from_utf8) {
3265 aend = aptr + fromlen;
3f63b0e5 3266 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3267 } else aend = NULL; /* Unused, but keep compilers happy */
3268 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3269 while (fromlen > 0) {
fc241834 3270 U8 *end;
a6ec74c1 3271 I32 todo;
fc241834 3272 U8 hunk[1+63/3*4+1];
a6ec74c1 3273
eb160463 3274 if ((I32)fromlen > len)
a6ec74c1
JH
3275 todo = len;
3276 else
3277 todo = fromlen;
fc241834
RGS
3278 if (from_utf8) {
3279 char buffer[64];
3280 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3281 'u' | TYPE_IS_PACK)) {
3282 *cur = '\0';
b162af07 3283 SvCUR_set(cat, cur - start);
5637ef5b
NC
3284 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3285 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3286 aptr, aend, buffer, (long) todo);
fc241834
RGS
3287 }
3288 end = doencodes(hunk, buffer, todo);
3289 } else {
3290 end = doencodes(hunk, aptr, todo);
3291 aptr += todo;
3292 }
3293 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3294 fromlen -= todo;
3295 }
a6ec74c1
JH
3296 break;
3297 }
f337b084
TH
3298 }
3299 *cur = '\0';
b162af07 3300 SvCUR_set(cat, cur - start);
f337b084 3301 no_change:
49704364 3302 *symptr = lookahead;
a6ec74c1 3303 }
49704364 3304 return beglist;
18529408
IZ
3305}
3306#undef NEXTFROM
3307
3308
3309PP(pp_pack)
3310{
97aff369 3311 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3312 SV *cat = TARG;
18529408 3313 STRLEN fromlen;
349d4f2f 3314 SV *pat_sv = *++MARK;
eb578fdb
KW
3315 const char *pat = SvPV_const(pat_sv, fromlen);
3316 const char *patend = pat + fromlen;
18529408
IZ
3317
3318 MARK++;
76f68e9b 3319 sv_setpvs(cat, "");
f337b084 3320 SvUTF8_off(cat);
18529408 3321
7accc089 3322 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3323
a6ec74c1
JH
3324 SvSETMAGIC(cat);
3325 SP = ORIGMARK;
3326 PUSHs(cat);
3327 RETURN;
3328}
a6ec74c1 3329
73cb7263
NC
3330/*
3331 * Local variables:
3332 * c-indentation-style: bsd
3333 * c-basic-offset: 4
14d04a33 3334 * indent-tabs-mode: nil
73cb7263
NC
3335 * End:
3336 *
14d04a33 3337 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3338 */