This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
e6906430 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
ae53e38e 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
40d34c0d
SB
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
e02cb1b1
NC
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
d2ae4405 65 (symptr)->howlen = e_no_len; \
e02cb1b1
NC
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
cc3ea0aa 72#if PERL_VERSION >= 9
3314a176
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
500d42d2 79#define PERL_PACK_CAN_W
3040befa 80#define PERL_PACK_CAN_DOT
500d42d2
TH
81#else
82#define PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
83#define PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
84#define PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
85#define PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
cc3ea0aa 86#endif
773e5b22
NC
87/* Maximum number of bytes to which a byte can grow due to upgrade */
88#define UTF8_EXPAND 2
cc3ea0aa 89
a6ec74c1 90/*
a6ec74c1
JH
91 * Offset for integer pack/unpack.
92 *
93 * On architectures where I16 and I32 aren't really 16 and 32 bits,
94 * which for now are all Crays, pack and unpack have to play games.
95 */
96
97/*
98 * These values are required for portability of pack() output.
99 * If they're not right on your machine, then pack() and unpack()
100 * wouldn't work right anyway; you'll need to apply the Cray hack.
101 * (I'd like to check them with #if, but you can't use sizeof() in
102 * the preprocessor.) --???
103 */
104/*
105 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
106 defines are now in config.h. --Andy Dougherty April 1998
107 */
108#define SIZE16 2
109#define SIZE32 4
110
111/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
112 --jhi Feb 1999 */
113
cc3ea0aa
NC
114#if U16SIZE > SIZE16 || U32SIZE > SIZE32
115# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
500d42d2
TH
116# define OFF16(p) ((char*)(p))
117# define OFF32(p) ((char*)(p))
a6ec74c1 118# else
cc3ea0aa 119# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
120# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
121# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
122# else
500d42d2 123 ++++ bad cray byte order
a6ec74c1
JH
124# endif
125# endif
a6ec74c1 126#else
500d42d2
TH
127# define OFF16(p) ((char *) (p))
128# define OFF32(p) ((char *) (p))
a6ec74c1
JH
129#endif
130
3314a176
TH
131/* Only to be used inside a loop (see the break) */
132#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
133 if (utf8) { \
134 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
135 } else { \
136 Copy(s, OFF16(p), SIZE16, char); \
137 (s) += SIZE16; \
138 } \
139} STMT_END
500d42d2
TH
140
141/* Only to be used inside a loop (see the break) */
3314a176
TH
142#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
143 if (utf8) { \
144 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
145 } else { \
146 Copy(s, OFF32(p), SIZE32, char); \
147 (s) += SIZE32; \
148 } \
149} STMT_END
150
151#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
152#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
153
154/* Only to be used inside a loop (see the break) */
155#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
500d42d2
TH
156STMT_START { \
157 if (utf8) { \
3314a176
TH
158 if (!uni_to_bytes(aTHX_ &s, strend, \
159 (char *) &var, sizeof(var), datumtype)) break;\
500d42d2
TH
160 } else { \
161 Copy(s, (char *) &var, sizeof(var), char); \
162 s += sizeof(var); \
163 } \
500d42d2
TH
164} STMT_END
165
3314a176 166#define PUSH_VAR(utf8, aptr, var) \
a4de8f71 167 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
3314a176 168
cf168e39
WL
169/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
170#define MAX_SUB_TEMPLATE_LEVEL 100
171
cc3ea0aa 172/* flags (note that type modifiers can also be used as flags!) */
3314a176
TH
173#define FLAG_WAS_UTF8 0x40
174#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
cf168e39 175#define FLAG_UNPACK_ONLY_ONE 0x10
3314a176 176#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
cf168e39
WL
177#define FLAG_SLASH 0x04
178#define FLAG_COMMA 0x02
179#define FLAG_PACK 0x01
180
a6ec74c1
JH
181STATIC SV *
182S_mul128(pTHX_ SV *sv, U8 m)
183{
184 STRLEN len;
185 char *s = SvPV(sv, len);
186 char *t;
a6ec74c1
JH
187
188 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
d7559646 189 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
190
191 sv_catsv(tmpNew, sv);
192 SvREFCNT_dec(sv); /* free old sv */
193 sv = tmpNew;
194 s = SvPV(sv, len);
195 }
196 t = s + len - 1;
197 while (!*t) /* trailing '\0'? */
198 t--;
199 while (t > s) {
e02cb1b1 200 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
201 *(t--) = '0' + (char)(i % 10);
202 m = (char)(i / 10);
a6ec74c1
JH
203 }
204 return (sv);
205}
206
207/* Explosives and implosives. */
208
209#if 'I' == 73 && 'J' == 74
210/* On an ASCII/ISO kind of system */
211#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
212#else
213/*
214 Some other sort of character set - use memchr() so we don't match
215 the null byte.
216 */
217#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
218#endif
219
cc3ea0aa 220/* type modifiers */
62f95557 221#define TYPE_IS_SHRIEKING 0x100
cc3ea0aa
NC
222#define TYPE_IS_BIG_ENDIAN 0x200
223#define TYPE_IS_LITTLE_ENDIAN 0x400
3314a176 224#define TYPE_IS_PACK 0x800
cc3ea0aa
NC
225#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
226#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
227#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
228
229#ifdef PERL_PACK_CAN_SHRIEKSIGN
3040befa 230# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
cc3ea0aa 231#else
3040befa 232# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
cc3ea0aa
NC
233#endif
234
235#ifndef PERL_PACK_CAN_BYTEORDER
236/* Put "can't" first because it is shorter */
237# define TYPE_ENDIANNESS(t) 0
238# define TYPE_NO_ENDIANNESS(t) (t)
239
240# define ENDIANNESS_ALLOWED_TYPES ""
241
242# define DO_BO_UNPACK(var, type)
243# define DO_BO_PACK(var, type)
206b424e
NC
244# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
245# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
cc3ea0aa
NC
246# define DO_BO_UNPACK_N(var, type)
247# define DO_BO_PACK_N(var, type)
248# define DO_BO_UNPACK_P(var)
249# define DO_BO_PACK_P(var)
206b424e
NC
250# define DO_BO_UNPACK_PC(var)
251# define DO_BO_PACK_PC(var)
cc3ea0aa 252
3314a176 253#else /* PERL_PACK_CAN_BYTEORDER */
cc3ea0aa
NC
254
255# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
256# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
257
258# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
259
260# define DO_BO_UNPACK(var, type) \
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 default: break; \
266 } \
267 } STMT_END
268
269# define DO_BO_PACK(var, type) \
270 STMT_START { \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
273 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 default: break; \
275 } \
276 } STMT_END
277
206b424e 278# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
cc3ea0aa
NC
279 STMT_START { \
280 switch (TYPE_ENDIANNESS(datumtype)) { \
281 case TYPE_IS_BIG_ENDIAN: \
206b424e 282 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
cc3ea0aa
NC
283 break; \
284 case TYPE_IS_LITTLE_ENDIAN: \
206b424e 285 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
cc3ea0aa
NC
286 break; \
287 default: \
288 break; \
289 } \
290 } STMT_END
291
206b424e 292# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
cc3ea0aa
NC
293 STMT_START { \
294 switch (TYPE_ENDIANNESS(datumtype)) { \
295 case TYPE_IS_BIG_ENDIAN: \
206b424e 296 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
cc3ea0aa
NC
297 break; \
298 case TYPE_IS_LITTLE_ENDIAN: \
206b424e 299 var = (post_cast *) my_htole ## type ((pre_cast) var); \
cc3ea0aa
NC
300 break; \
301 default: \
302 break; \
303 } \
304 } STMT_END
305
306# define BO_CANT_DOIT(action, type) \
307 STMT_START { \
308 switch (TYPE_ENDIANNESS(datumtype)) { \
309 case TYPE_IS_BIG_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 case TYPE_IS_LITTLE_ENDIAN: \
314 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
315 "platform", #action, #type); \
316 break; \
317 default: \
318 break; \
319 } \
320 } STMT_END
321
322# if PTRSIZE == INTSIZE
206b424e
NC
323# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
324# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
325# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
326# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
cc3ea0aa 327# elif PTRSIZE == LONGSIZE
206b424e
NC
328# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
329# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
330# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
331# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
820b8518
NC
332# elif PTRSIZE == IVSIZE
333# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
334# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
335# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
336# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
cc3ea0aa
NC
337# else
338# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
339# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
206b424e
NC
340# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
341# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
cc3ea0aa
NC
342# endif
343
344# if defined(my_htolen) && defined(my_letohn) && \
345 defined(my_htoben) && defined(my_betohn)
346# define DO_BO_UNPACK_N(var, type) \
347 STMT_START { \
348 switch (TYPE_ENDIANNESS(datumtype)) { \
349 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
350 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
351 default: break; \
352 } \
353 } STMT_END
354
355# define DO_BO_PACK_N(var, type) \
356 STMT_START { \
357 switch (TYPE_ENDIANNESS(datumtype)) { \
358 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
359 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
360 default: break; \
361 } \
362 } STMT_END
363# else
364# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
365# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
366# endif
367
3314a176 368#endif /* PERL_PACK_CAN_BYTEORDER */
cc3ea0aa
NC
369
370#define PACK_SIZE_CANNOT_CSUM 0x80
3314a176 371#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
cc3ea0aa
NC
372#define PACK_SIZE_MASK 0x3F
373
cc3ea0aa
NC
374/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
375 in). You're unlikely ever to need to regenerate them. */
e5464422
TH
376
377#if TYPE_IS_SHRIEKING != 0x100
378 ++++shriek offset should be 256
379#endif
380
381typedef U8 packprops_t;
cc3ea0aa
NC
382#if 'J'-'I' == 1
383/* ASCII */
820b8518 384STATIC const packprops_t packprops[512] = {
e5464422
TH
385 /* normal */
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0,
391 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
cc3ea0aa 392#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
e5464422 393 /* D */ LONG_DOUBLESIZE,
cc3ea0aa 394#else
e5464422 395 0,
cc3ea0aa 396#endif
e5464422
TH
397 0,
398 /* F */ NVSIZE,
399 0, 0,
400 /* I */ sizeof(unsigned int),
401 /* J */ UVSIZE,
402 0,
403 /* L */ SIZE32,
404 0,
405 /* N */ SIZE32,
406 0, 0,
cc3ea0aa 407#if defined(HAS_QUAD)
e5464422 408 /* Q */ sizeof(Uquad_t),
cc3ea0aa 409#else
e5464422 410 0,
cc3ea0aa 411#endif
e5464422
TH
412 0,
413 /* S */ SIZE16,
414 0,
415 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
416 /* V */ SIZE32,
500d42d2 417#if defined(PERL_PACK_CAN_W)
e5464422 418 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
500d42d2 419#else
e5464422 420 0,
500d42d2 421#endif
e5464422
TH
422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
423 /* c */ sizeof(char),
424 /* d */ sizeof(double),
425 0,
426 /* f */ sizeof(float),
427 0, 0,
428 /* i */ sizeof(int),
429 /* j */ IVSIZE,
430 0,
431 /* l */ SIZE32,
432 0,
433 /* n */ SIZE16,
434 0,
435 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
cc3ea0aa 436#if defined(HAS_QUAD)
e5464422 437 /* q */ sizeof(Quad_t),
cc3ea0aa 438#else
e5464422 439 0,
cc3ea0aa 440#endif
e5464422
TH
441 0,
442 /* s */ SIZE16,
443 0, 0,
444 /* v */ SIZE16,
445 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0,
455 /* shrieking */
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 /* I */ sizeof(unsigned int),
462 0, 0,
463 /* L */ sizeof(unsigned long),
464 0,
cc3ea0aa 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 466 /* N */ SIZE32,
cc3ea0aa 467#else
e5464422 468 0,
cc3ea0aa 469#endif
e5464422
TH
470 0, 0, 0, 0,
471 /* S */ sizeof(unsigned short),
472 0, 0,
cc3ea0aa 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 474 /* V */ SIZE32,
cc3ea0aa 475#else
e5464422 476 0,
cc3ea0aa 477#endif
e5464422
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0,
480 /* i */ sizeof(int),
481 0, 0,
482 /* l */ sizeof(long),
483 0,
cc3ea0aa 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 485 /* n */ SIZE16,
cc3ea0aa 486#else
e5464422 487 0,
cc3ea0aa 488#endif
e5464422
TH
489 0, 0, 0, 0,
490 /* s */ sizeof(short),
491 0, 0,
cc3ea0aa 492#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 493 /* v */ SIZE16,
cc3ea0aa 494#else
e5464422 495 0,
cc3ea0aa 496#endif
e5464422
TH
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0
cc3ea0aa
NC
506};
507#else
508/* EBCDIC (or bust) */
820b8518 509STATIC const packprops_t packprops[512] = {
e5464422
TH
510 /* normal */
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0,
520 /* c */ sizeof(char),
521 /* d */ sizeof(double),
522 0,
523 /* f */ sizeof(float),
524 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0,
527 /* j */ IVSIZE,
528 0,
529 /* l */ SIZE32,
530 0,
531 /* n */ SIZE16,
532 0,
533 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
cc3ea0aa 534#if defined(HAS_QUAD)
e5464422 535 /* q */ sizeof(Quad_t),
cc3ea0aa 536#else
e5464422 537 0,
cc3ea0aa 538#endif
e5464422
TH
539 0, 0, 0, 0, 0, 0, 0, 0, 0,
540 /* s */ SIZE16,
541 0, 0,
542 /* v */ SIZE16,
543 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
cc3ea0aa 547#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
e5464422 548 /* D */ LONG_DOUBLESIZE,
cc3ea0aa 549#else
e5464422 550 0,
cc3ea0aa 551#endif
e5464422
TH
552 0,
553 /* F */ NVSIZE,
554 0, 0,
555 /* I */ sizeof(unsigned int),
556 0, 0, 0, 0, 0, 0, 0,
557 /* J */ UVSIZE,
558 0,
559 /* L */ SIZE32,
560 0,
561 /* N */ SIZE32,
562 0, 0,
cc3ea0aa 563#if defined(HAS_QUAD)
e5464422 564 /* Q */ sizeof(Uquad_t),
cc3ea0aa 565#else
e5464422 566 0,
cc3ea0aa 567#endif
e5464422
TH
568 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 /* S */ SIZE16,
570 0,
571 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
572 /* V */ SIZE32,
500d42d2 573#if defined(PERL_PACK_CAN_W)
e5464422 574 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
500d42d2 575#else
e5464422 576 0,
500d42d2 577#endif
e5464422
TH
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* shrieking */
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* i */ sizeof(int),
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* l */ sizeof(long),
593 0,
cc3ea0aa 594#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 595 /* n */ SIZE16,
cc3ea0aa 596#else
e5464422 597 0,
cc3ea0aa 598#endif
e5464422
TH
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* s */ sizeof(short),
601 0, 0,
cc3ea0aa 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 603 /* v */ SIZE16,
cc3ea0aa 604#else
e5464422 605 0,
cc3ea0aa 606#endif
e5464422
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
609 0, 0, 0,
610 /* I */ sizeof(unsigned int),
611 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 /* L */ sizeof(unsigned long),
613 0,
cc3ea0aa 614#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 615 /* N */ SIZE32,
cc3ea0aa 616#else
e5464422 617 0,
cc3ea0aa 618#endif
e5464422
TH
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* S */ sizeof(unsigned short),
621 0, 0,
cc3ea0aa 622#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 623 /* V */ SIZE32,
cc3ea0aa 624#else
e5464422 625 0,
cc3ea0aa 626#endif
e5464422
TH
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
cc3ea0aa
NC
629};
630#endif
631
500d42d2 632STATIC U8
e02cb1b1 633uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
500d42d2 634{
500d42d2 635 STRLEN retlen;
7508116b 636 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
3314a176 637 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
500d42d2
TH
638 /* We try to process malformed UTF-8 as much as possible (preferrably with
639 warnings), but these two mean we make no progress in the string and
640 might enter an infinite loop */
641 if (retlen == (STRLEN) -1 || retlen == 0)
3314a176
TH
642 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
643 (int) TYPE_NO_MODIFIERS(datumtype));
500d42d2 644 if (val >= 0x100) {
3314a176 645 if (ckWARN(WARN_UNPACK))
500d42d2
TH
646 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
647 "Character in '%c' format wrapped in unpack",
3314a176 648 (int) TYPE_NO_MODIFIERS(datumtype));
500d42d2
TH
649 val &= 0xff;
650 }
651 *s += retlen;
652 return val;
653}
654
3314a176
TH
655#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
656 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
500d42d2
TH
657 *(U8 *)(s)++)
658
659STATIC bool
e02cb1b1 660uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
500d42d2
TH
661{
662 UV val;
663 STRLEN retlen;
e02cb1b1 664 const char *from = *s;
500d42d2 665 int bad = 0;
e02cb1b1 666 const U32 flags = ckWARN(WARN_UTF8) ?
500d42d2
TH
667 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
668 for (;buf_len > 0; buf_len--) {
669 if (from >= end) return FALSE;
3314a176 670 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
500d42d2
TH
671 if (retlen == (STRLEN) -1 || retlen == 0) {
672 from += UTF8SKIP(from);
673 bad |= 1;
674 } else from += retlen;
675 if (val >= 0x100) {
676 bad |= 2;
677 val &= 0xff;
678 }
679 *(U8 *)buf++ = val;
680 }
681 /* We have enough characters for the buffer. Did we have problems ? */
682 if (bad) {
683 if (bad & 1) {
684 /* Rewalk the string fragment while warning */
e02cb1b1 685 const char *ptr;
a4de8f71 686 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
500d42d2
TH
687 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
688 if (ptr >= end) break;
3314a176 689 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
500d42d2
TH
690 }
691 if (from > end) from = end;
692 }
693 if ((bad & 2) && ckWARN(WARN_UNPACK))
773e5b22 694 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
3314a176
TH
695 WARN_PACK : WARN_UNPACK),
696 "Character(s) in '%c' format wrapped in %s",
773e5b22 697 (int) TYPE_NO_MODIFIERS(datumtype),
3314a176 698 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
500d42d2
TH
699 }
700 *s = from;
701 return TRUE;
702}
703
704STATIC bool
e02cb1b1 705next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
500d42d2 706{
500d42d2 707 STRLEN retlen;
7508116b 708 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
500d42d2
TH
709 if (val >= 0x100 || !ISUUCHAR(val) ||
710 retlen == (STRLEN) -1 || retlen == 0) {
711 *out = 0;
712 return FALSE;
713 }
714 *out = PL_uudmap[val] & 077;
3314a176 715 *s += retlen;
500d42d2
TH
716 return TRUE;
717}
62f95557 718
86d06bda 719STATIC char *
8e7b0921 720S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
7508116b 721 const U8 * const end = start + len;
86d06bda 722
3314a176 723 while (start < end) {
33086015
NC
724 const UV uv = NATIVE_TO_ASCII(*start);
725 if (UNI_IS_INVARIANT(uv))
726 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
727 else {
728 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
729 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
3314a176
TH
730 }
731 start++;
732 }
86d06bda 733 return dest;
3314a176
TH
734}
735
a4de8f71
NC
736#define PUSH_BYTES(utf8, cur, buf, len) \
737STMT_START { \
86d06bda
AL
738 if (utf8) \
739 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
a4de8f71
NC
740 else { \
741 Copy(buf, cur, len, char); \
742 (cur) += (len); \
743 } \
3314a176
TH
744} STMT_END
745
746#define GROWING(utf8, cat, start, cur, in_len) \
747STMT_START { \
748 STRLEN glen = (in_len); \
773e5b22 749 if (utf8) glen *= UTF8_EXPAND; \
3314a176 750 if ((cur) + glen >= (start) + SvLEN(cat)) { \
8c89da26 751 (start) = sv_exp_grow(cat, glen); \
3314a176
TH
752 (cur) = (start) + SvCUR(cat); \
753 } \
754} STMT_END
755
756#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
757STMT_START { \
e02cb1b1 758 const STRLEN glen = (in_len); \
3314a176 759 STRLEN gl = glen; \
773e5b22 760 if (utf8) gl *= UTF8_EXPAND; \
3314a176
TH
761 if ((cur) + gl >= (start) + SvLEN(cat)) { \
762 *cur = '\0'; \
763 SvCUR(cat) = (cur) - (start); \
8c89da26 764 (start) = sv_exp_grow(cat, gl); \
3314a176
TH
765 (cur) = (start) + SvCUR(cat); \
766 } \
767 PUSH_BYTES(utf8, cur, buf, glen); \
768} STMT_END
769
770#define PUSH_BYTE(utf8, s, byte) \
771STMT_START { \
772 if (utf8) { \
e02cb1b1 773 const U8 au8 = (byte); \
86d06bda 774 (s) = bytes_to_uni(&au8, 1, (s)); \
3314a176
TH
775 } else *(U8 *)(s)++ = (byte); \
776} STMT_END
777
778/* Only to be used inside a loop (see the break) */
779#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
780STMT_START { \
781 STRLEN retlen; \
782 if (str >= end) break; \
783 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
784 if (retlen == (STRLEN) -1 || retlen == 0) { \
785 *cur = '\0'; \
786 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
787 } \
788 str += retlen; \
789} STMT_END
790
e02cb1b1
NC
791static const char *_action( const tempsym_t* symptr )
792{
d2ae4405 793 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
e02cb1b1
NC
794}
795
206947d2 796/* Returns the sizeof() struct described by pat */
028d1f6d 797STATIC I32
3314a176 798S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 799{
3314a176 800 I32 total = 0;
206947d2 801
cf168e39 802 while (next_symbol(symptr)) {
3314a176 803 I32 len;
e02cb1b1 804 int size;
3314a176
TH
805
806 switch (symptr->howlen) {
773e5b22 807 case e_star:
cf168e39 808 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
e02cb1b1 809 _action( symptr ) );
cf168e39 810 break;
3314a176
TH
811 default:
812 /* e_no_len and e_number */
813 len = symptr->length;
814 break;
cf168e39
WL
815 }
816
e5464422 817 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
cc3ea0aa 818 if (!size) {
e02cb1b1 819 int star;
cc3ea0aa
NC
820 /* endianness doesn't influence the size of a type */
821 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
822 default:
823 Perl_croak(aTHX_ "Invalid type '%c' in %s",
824 (int)TYPE_NO_MODIFIERS(symptr->code),
e02cb1b1 825 _action( symptr ) );
3040befa
TH
826#ifdef PERL_PACK_CAN_SHRIEKSIGN
827# ifdef PERL_PACK_CAN_DOT
828 case '.' | TYPE_IS_SHRIEKING:
829# endif
830 case '@' | TYPE_IS_SHRIEKING:
831#endif
cc3ea0aa 832 case '@':
3040befa
TH
833#ifdef PERL_PACK_CAN_DOT
834 case '.':
835#endif
cc3ea0aa
NC
836 case '/':
837 case 'U': /* XXXX Is it correct? */
838 case 'w':
839 case 'u':
840 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
3040befa 841 (int) TYPE_NO_MODIFIERS(symptr->code),
e02cb1b1 842 _action( symptr ) );
cc3ea0aa
NC
843 case '%':
844 size = 0;
845 break;
846 case '(':
773e5b22
NC
847 {
848 tempsym_t savsym = *symptr;
849 symptr->patptr = savsym.grpbeg;
850 symptr->patend = savsym.grpend;
851 /* XXXX Theoretically, we need to measure many times at
852 different positions, since the subexpression may contain
853 alignment commands, but be not of aligned length.
854 Need to detect this and croak(). */
855 size = measure_struct(symptr);
856 *symptr = savsym;
857 break;
858 }
cc3ea0aa
NC
859 case 'X' | TYPE_IS_SHRIEKING:
860 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
861 */
862 if (!len) /* Avoid division by 0 */
863 len = 1;
864 len = total % len; /* Assumed: the start is aligned. */
865 /* FALL THROUGH */
866 case 'X':
867 size = -1;
868 if (total < len)
e02cb1b1 869 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
cc3ea0aa
NC
870 break;
871 case 'x' | TYPE_IS_SHRIEKING:
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 star = total % len; /* Assumed: the start is aligned. */
875 if (star) /* Other portable ways? */
876 len = len - star;
877 else
878 len = 0;
879 /* FALL THROUGH */
880 case 'x':
881 case 'A':
882 case 'Z':
883 case 'a':
cc3ea0aa
NC
884 size = 1;
885 break;
886 case 'B':
887 case 'b':
888 len = (len + 7)/8;
889 size = 1;
890 break;
891 case 'H':
892 case 'h':
893 len = (len + 1)/2;
894 size = 1;
895 break;
896
897 case 'P':
898 len = 1;
899 size = sizeof(char*);
900 break;
901 }
206947d2
IZ
902 }
903 total += len * size;
904 }
905 return total;
906}
907
cf168e39
WL
908
909/* locate matching closing parenthesis or bracket
910 * returns char pointer to char after match, or NULL
911 */
cacbe70b
NC
912STATIC const char *
913S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 914{
cf168e39 915 while (patptr < patend) {
e02cb1b1 916 const char c = *patptr++;
cf168e39
WL
917
918 if (isSPACE(c))
919 continue;
920 else if (c == ender)
921 return patptr-1;
922 else if (c == '#') {
923 while (patptr < patend && *patptr != '\n')
924 patptr++;
925 continue;
926 } else if (c == '(')
927 patptr = group_end(patptr, patend, ')') + 1;
928 else if (c == '[')
929 patptr = group_end(patptr, patend, ']') + 1;
18529408 930 }
cf168e39
WL
931 Perl_croak(aTHX_ "No group ending character '%c' found in template",
932 ender);
933 return 0;
18529408
IZ
934}
935
cf168e39
WL
936
937/* Convert unsigned decimal number to binary.
938 * Expects a pointer to the first digit and address of length variable
939 * Advances char pointer to 1st non-digit char and returns number
773e5b22 940 */
cacbe70b
NC
941STATIC const char *
942S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
cf168e39
WL
943{
944 I32 len = *patptr++ - '0';
945 while (isDIGIT(*patptr)) {
946 if (len >= 0x7FFFFFFF/10)
947 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
948 len = (len * 10) + (*patptr++ - '0');
949 }
950 *lenptr = len;
951 return patptr;
952}
953
954/* The marvellous template parsing routine: Using state stored in *symptr,
955 * locates next template code and count
956 */
957STATIC bool
3314a176 958S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 959{
773e5b22 960 const char* patptr = symptr->patptr;
7508116b 961 const char* const patend = symptr->patend;
3314a176 962 const char *allowed = "";
cf168e39
WL
963
964 symptr->flags &= ~FLAG_SLASH;
965
966 while (patptr < patend) {
967 if (isSPACE(*patptr))
968 patptr++;
969 else if (*patptr == '#') {
970 patptr++;
971 while (patptr < patend && *patptr != '\n')
972 patptr++;
973 if (patptr < patend)
974 patptr++;
975 } else {
773e5b22 976 /* We should have found a template code */
cf168e39 977 I32 code = *patptr++ & 0xFF;
cc3ea0aa 978 U32 inherited_modifiers = 0;
cf168e39
WL
979
980 if (code == ','){ /* grandfather in commas but with a warning */
981 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
982 symptr->flags |= FLAG_COMMA;
983 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
e02cb1b1 984 "Invalid type ',' in %s", _action( symptr ) );
cf168e39
WL
985 }
986 continue;
987 }
773e5b22 988
cf168e39 989 /* for '(', skip to ')' */
773e5b22 990 if (code == '(') {
cf168e39
WL
991 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
992 Perl_croak(aTHX_ "()-group starts with a count in %s",
e02cb1b1 993 _action( symptr ) );
cacbe70b
NC
994 symptr->grpbeg = (char *) patptr;
995 patptr
996 = 1 + ( symptr->grpend = (char *)group_end(patptr, patend, ')') );
cf168e39
WL
997 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
998 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
e02cb1b1 999 _action( symptr ) );
cf168e39
WL
1000 }
1001
cc3ea0aa
NC
1002 /* look for group modifiers to inherit */
1003 if (TYPE_ENDIANNESS(symptr->flags)) {
1004 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1005 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
cf168e39
WL
1006 }
1007
cc3ea0aa
NC
1008 /* look for modifiers */
1009 while (patptr < patend) {
cdf9dde0 1010 I32 modifier;
cc3ea0aa
NC
1011 switch (*patptr) {
1012 case '!':
1013 modifier = TYPE_IS_SHRIEKING;
1014 allowed = SHRIEKING_ALLOWED_TYPES;
1015 break;
1016#ifdef PERL_PACK_CAN_BYTEORDER
1017 case '>':
1018 modifier = TYPE_IS_BIG_ENDIAN;
1019 allowed = ENDIANNESS_ALLOWED_TYPES;
1020 break;
1021 case '<':
1022 modifier = TYPE_IS_LITTLE_ENDIAN;
1023 allowed = ENDIANNESS_ALLOWED_TYPES;
1024 break;
3314a176 1025#endif /* PERL_PACK_CAN_BYTEORDER */
cc3ea0aa 1026 default:
cdf9dde0
AL
1027 allowed = "";
1028 modifier = 0;
cc3ea0aa
NC
1029 break;
1030 }
1031
1032 if (modifier == 0)
1033 break;
1034
1035 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1036 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
e02cb1b1 1037 allowed, _action( symptr ) );
cc3ea0aa
NC
1038
1039 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1040 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
e02cb1b1 1041 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
cc3ea0aa
NC
1042 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1043 TYPE_ENDIANNESS_MASK)
1044 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
e02cb1b1 1045 *patptr, _action( symptr ) );
cc3ea0aa 1046
bc743096 1047 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
cc3ea0aa
NC
1048 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1049 "Duplicate modifier '%c' after '%c' in %s",
1050 *patptr, (int) TYPE_NO_MODIFIERS(code),
e02cb1b1 1051 _action( symptr ) );
cc3ea0aa
NC
1052 }
1053
1054 code |= modifier;
1055 patptr++;
1056 }
1057
1058 /* inherit modifiers */
1059 code |= inherited_modifiers;
1060
773e5b22 1061 /* look for count and/or / */
cf168e39
WL
1062 if (patptr < patend) {
1063 if (isDIGIT(*patptr)) {
1064 patptr = get_num( patptr, &symptr->length );
1065 symptr->howlen = e_number;
1066
1067 } else if (*patptr == '*') {
1068 patptr++;
1069 symptr->howlen = e_star;
1070
1071 } else if (*patptr == '[') {
773e5b22 1072 const char* lenptr = ++patptr;
cf168e39
WL
1073 symptr->howlen = e_number;
1074 patptr = group_end( patptr, patend, ']' ) + 1;
1075 /* what kind of [] is it? */
1076 if (isDIGIT(*lenptr)) {
1077 lenptr = get_num( lenptr, &symptr->length );
1078 if( *lenptr != ']' )
1079 Perl_croak(aTHX_ "Malformed integer in [] in %s",
e02cb1b1 1080 _action( symptr ) );
cf168e39
WL
1081 } else {
1082 tempsym_t savsym = *symptr;
cacbe70b
NC
1083 symptr->patend = (char *) patptr-1;
1084 symptr->patptr = (char *) lenptr;
cf168e39
WL
1085 savsym.length = measure_struct(symptr);
1086 *symptr = savsym;
1087 }
1088 } else {
1089 symptr->howlen = e_no_len;
1090 symptr->length = 1;
1091 }
1092
1093 /* try to find / */
1094 while (patptr < patend) {
1095 if (isSPACE(*patptr))
1096 patptr++;
1097 else if (*patptr == '#') {
1098 patptr++;
1099 while (patptr < patend && *patptr != '\n')
1100 patptr++;
1101 if (patptr < patend)
1102 patptr++;
1103 } else {
cc3ea0aa 1104 if (*patptr == '/') {
cf168e39
WL
1105 symptr->flags |= FLAG_SLASH;
1106 patptr++;
cc3ea0aa
NC
1107 if (patptr < patend &&
1108 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
cf168e39 1109 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
e02cb1b1 1110 _action( symptr ) );
cf168e39
WL
1111 }
1112 break;
1113 }
18529408 1114 }
cf168e39
WL
1115 } else {
1116 /* at end - no count, no / */
1117 symptr->howlen = e_no_len;
1118 symptr->length = 1;
1119 }
1120
1121 symptr->code = code;
773e5b22 1122 symptr->patptr = (char *) patptr;
cf168e39 1123 return TRUE;
18529408 1124 }
cf168e39 1125 }
773e5b22 1126 symptr->patptr = (char *) patptr;
cf168e39 1127 return FALSE;
18529408
IZ
1128}
1129
500d42d2
TH
1130#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1131/*
773e5b22 1132 There is no way to cleanly handle the case where we should process the
500d42d2 1133 string per byte in its upgraded form while it's really in downgraded form
773e5b22
NC
1134 (e.g. estimates like strend-s as an upper bound for the number of
1135 characters left wouldn't work). So if we foresee the need of this
1136 (pattern starts with U or contains U0), we want to work on the encoded
1137 version of the string. Users are advised to upgrade their pack string
500d42d2
TH
1138 themselves if they need to do a lot of unpacks like this on it
1139*/
e02cb1b1 1140/* XXX These can be const */
773e5b22 1141STATIC bool
500d42d2
TH
1142need_utf8(const char *pat, const char *patend)
1143{
1144 bool first = TRUE;
1145 while (pat < patend) {
1146 if (pat[0] == '#') {
1147 pat++;
e02cb1b1 1148 pat = (const char *) memchr(pat, '\n', patend-pat);
500d42d2
TH
1149 if (!pat) return FALSE;
1150 } else if (pat[0] == 'U') {
1151 if (first || pat[1] == '0') return TRUE;
1152 } else first = FALSE;
1153 pat++;
1154 }
1155 return FALSE;
1156}
1157
1158STATIC char
1159first_symbol(const char *pat, const char *patend) {
1160 while (pat < patend) {
1161 if (pat[0] != '#') return pat[0];
1162 pat++;
e02cb1b1 1163 pat = (const char *) memchr(pat, '\n', patend-pat);
500d42d2
TH
1164 if (!pat) return 0;
1165 pat++;
1166 }
1167 return 0;
1168}
1169#endif
1170
18529408 1171/*
f3479639
JH
1172=for apidoc unpackstring
1173
41b209cd
NC
1174The engine implementing unpack() Perl function. C<unpackstring> puts the
1175extracted list items on the stack and returns the number of elements.
1176Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
f3479639
JH
1177
1178=cut */
1179
1180I32
970ba7cf
NC
1181Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend,
1182 U32 flags)
f3479639 1183{
e02cb1b1 1184 tempsym_t sym;
500d42d2 1185
3314a176 1186 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
500d42d2
TH
1187#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1188 else if (need_utf8(pat, patend)) {
1189 /* We probably should try to avoid this in case a scalar context call
1190 wouldn't get to the "U0" */
1191 STRLEN len = strend - s;
a4de8f71 1192 s = (char *) bytes_to_utf8((U8 *) s, &len);
500d42d2
TH
1193 SAVEFREEPV(s);
1194 strend = s + len;
3314a176 1195 flags |= FLAG_DO_UTF8;
500d42d2
TH
1196 }
1197
3314a176
TH
1198 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1199 flags |= FLAG_PARSE_UTF8;
500d42d2 1200#else
3314a176
TH
1201 if (flags & FLAG_DO_UTF8)
1202 flags |= FLAG_PARSE_UTF8;
500d42d2
TH
1203#endif
1204
e02cb1b1 1205 TEMPSYM_INIT(&sym, pat, patend, flags);
f3479639
JH
1206
1207 return unpack_rec(&sym, s, s, strend, NULL );
1208}
1209
cf168e39
WL
1210STATIC
1211I32
e02cb1b1 1212S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
cf168e39 1213{
a6ec74c1 1214 dSP;
18529408 1215 SV *sv;
e02cb1b1 1216 const I32 start_sp_offset = SP - PL_stack_base;
cf168e39 1217 howlen_t howlen;
a6ec74c1 1218
a6ec74c1 1219 I32 checksum = 0;
92d41999 1220 UV cuv = 0;
a6ec74c1 1221 NV cdouble = 0.0;
3314a176 1222 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
cf168e39 1223 bool beyond = FALSE;
0bc88fab 1224 bool explicit_length;
a4de8f71 1225 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
3314a176 1226 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
3040befa 1227 symptr->strbeg = s - strbeg;
cf168e39 1228
cf168e39 1229 while (next_symbol(symptr)) {
e5464422 1230 packprops_t props;
a4de8f71 1231 I32 len;
3314a176 1232 I32 datumtype = symptr->code;
206947d2 1233 /* do first one only unless in list context
500d42d2 1234 / is implemented by unpacking the count, then popping it from the
206947d2 1235 stack, so must check that we're not in the middle of a / */
cf168e39 1236 if ( unpack_only_one
206947d2 1237 && (SP - PL_stack_base == start_sp_offset + 1)
cf168e39 1238 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1239 break;
cf168e39 1240
3314a176 1241 switch (howlen = symptr->howlen) {
773e5b22
NC
1242 case e_star:
1243 len = strend - strbeg; /* long enough */
cf168e39 1244 break;
3314a176
TH
1245 default:
1246 /* e_no_len and e_number */
1247 len = symptr->length;
1248 break;
cf168e39 1249 }
18529408 1250
0bc88fab 1251 explicit_length = TRUE;
a6ec74c1 1252 redo_switch:
cf168e39 1253 beyond = s >= strend;
e5464422
TH
1254
1255 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1256 if (props) {
1257 /* props nonzero means we can process this letter. */
a4de8f71
NC
1258 const long size = props & PACK_SIZE_MASK;
1259 const long howmany = (strend - s) / size;
e5464422
TH
1260 if (len > howmany)
1261 len = howmany;
1262
1263 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1264 if (len && unpack_only_one) len = 1;
1265 EXTEND(SP, len);
1266 EXTEND_MORTAL(len);
cc3ea0aa
NC
1267 }
1268 }
e5464422 1269
cc3ea0aa 1270 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1271 default:
cc3ea0aa 1272 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
cf168e39 1273
a6ec74c1 1274 case '%':
cf168e39 1275 if (howlen == e_no_len)
18529408 1276 len = 16; /* len is not specified */
a6ec74c1 1277 checksum = len;
92d41999 1278 cuv = 0;
a6ec74c1 1279 cdouble = 0;
18529408 1280 continue;
a6ec74c1 1281 break;
18529408
IZ
1282 case '(':
1283 {
cf168e39 1284 tempsym_t savsym = *symptr;
a4de8f71 1285 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
cc3ea0aa 1286 symptr->flags |= group_modifiers;
cf168e39 1287 symptr->patend = savsym.grpend;
3040befa 1288 symptr->previous = &savsym;
cf168e39 1289 symptr->level++;
18529408
IZ
1290 PUTBACK;
1291 while (len--) {
cf168e39 1292 symptr->patptr = savsym.grpbeg;
3314a176
TH
1293 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1294 else symptr->flags &= ~FLAG_PARSE_UTF8;
500d42d2
TH
1295 unpack_rec(symptr, s, strbeg, strend, &s);
1296 if (s == strend && savsym.howlen == e_star)
cf168e39 1297 break; /* No way to continue */
18529408
IZ
1298 }
1299 SPAGAIN;
3040befa 1300 savsym.flags = symptr->flags & ~group_modifiers;
cf168e39 1301 *symptr = savsym;
18529408
IZ
1302 break;
1303 }
3040befa
TH
1304#ifdef PERL_PACK_CAN_DOT
1305# ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 case '.' | TYPE_IS_SHRIEKING:
1307# endif
1308 case '.': {
a4de8f71 1309 const char *from;
3040befa
TH
1310 SV *sv;
1311#ifdef PERL_PACK_CAN_SHRIEKSIGN
a4de8f71 1312 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
3040befa 1313#else /* PERL_PACK_CAN_SHRIEKSIGN */
a4de8f71 1314 const bool u8 = utf8;
3040befa
TH
1315#endif
1316 if (howlen == e_star) from = strbeg;
1317 else if (len <= 0) from = s;
1318 else {
1319 tempsym_t *group = symptr;
1320
1321 while (--len && group) group = group->previous;
1322 from = group ? strbeg + group->strbeg : strbeg;
1323 }
1324 sv = from <= s ?
a4de8f71
NC
1325 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1326 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
3040befa
TH
1327 XPUSHs(sv_2mortal(sv));
1328 break;
1329 }
1330#endif
1331#ifdef PERL_PACK_CAN_SHRIEKSIGN
1332 case '@' | TYPE_IS_SHRIEKING:
1333#endif
a6ec74c1 1334 case '@':
3040befa 1335 s = strbeg + symptr->strbeg;
500d42d2 1336#ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
3040befa
TH
1337#ifdef PERL_PACK_CAN_SHRIEKSIGN
1338 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1339#else /* PERL_PACK_CAN_SHRIEKSIGN */
1340 if (utf8)
1341#endif
1342 {
500d42d2
TH
1343 while (len > 0) {
1344 if (s >= strend)
1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
1346 s += UTF8SKIP(s);
1347 len--;
1348 }
1349 if (s > strend)
1350 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1351 } else
1352#endif
1353 {
3040befa 1354 if (strend-s < len)
773e5b22 1355 Perl_croak(aTHX_ "'@' outside of string in unpack");
3040befa 1356 s += len;
500d42d2 1357 }
a6ec74c1 1358 break;
62f95557
IZ
1359 case 'X' | TYPE_IS_SHRIEKING:
1360 if (!len) /* Avoid division by 0 */
1361 len = 1;
500d42d2
TH
1362#ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1363 if (utf8) {
e02cb1b1 1364 const char *hop, *last;
3314a176
TH
1365 I32 l = len;
1366 hop = last = strbeg;
1367 while (hop < s) {
1368 hop += UTF8SKIP(hop);
1369 if (--l == 0) {
500d42d2 1370 last = hop;
3314a176
TH
1371 l = len;
1372 }
773e5b22 1373 }
3314a176
TH
1374 if (last > s)
1375 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
500d42d2
TH
1376 s = last;
1377 break;
3314a176 1378 }
500d42d2 1379#endif
3314a176 1380 len = (s - strbeg) % len;
62f95557 1381 /* FALL THROUGH */
a6ec74c1 1382 case 'X':
500d42d2
TH
1383#ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1384 if (utf8) {
1385 while (len > 0) {
1386 if (s <= strbeg)
1387 Perl_croak(aTHX_ "'X' outside of string in unpack");
3314a176 1388 while (--s, UTF8_IS_CONTINUATION(*s)) {
500d42d2
TH
1389 if (s <= strbeg)
1390 Perl_croak(aTHX_ "'X' outside of string in unpack");
1391 }
1392 len--;
1393 }
1394 } else
1395#endif
1396 {
773e5b22
NC
1397 if (len > s - strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1399 s -= len;
500d42d2 1400 }
a6ec74c1 1401 break;
a4de8f71
NC
1402 case 'x' | TYPE_IS_SHRIEKING: {
1403 I32 ai32;
62f95557
IZ
1404 if (!len) /* Avoid division by 0 */
1405 len = 1;
a4de8f71
NC
1406 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1407 else ai32 = (s - strbeg) % len;
500d42d2
TH
1408 if (ai32 == 0) break;
1409 len -= ai32;
a4de8f71 1410 }
62f95557 1411 /* FALL THROUGH */
a6ec74c1 1412 case 'x':
500d42d2
TH
1413 if (utf8) {
1414 while (len>0) {
1415 if (s >= strend)
1416 Perl_croak(aTHX_ "'x' outside of string in unpack");
1417 s += UTF8SKIP(s);
1418 len--;
1419 }
1420 } else {
773e5b22
NC
1421 if (len > strend - s)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 s += len;
3314a176 1424 }
a6ec74c1
JH
1425 break;
1426 case '/':
cf168e39
WL
1427 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1428 break;
a6ec74c1
JH
1429 case 'A':
1430 case 'Z':
1431 case 'a':
500d42d2
TH
1432 if (checksum) {
1433 /* Preliminary length estimate is assumed done in 'W' */
1434 if (len > strend - s) len = strend - s;
1435 goto W_checksum;
1436 }
1437#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1438 if (utf8) {
1439 I32 l;
e02cb1b1 1440 const char *hop;
500d42d2
TH
1441 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1442 if (hop >= strend) {
1443 if (hop > strend)
1444 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1445 break;
773e5b22 1446 }
a6ec74c1 1447 }
500d42d2
TH
1448 if (hop > strend)
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 len = hop - s;
1451 } else
1452#endif
1453 if (len > strend - s)
1454 len = strend - s;
1455
1456 if (datumtype == 'Z') {
1457 /* 'Z' strips stuff after first null */
e02cb1b1 1458 const char *ptr, *end;
3314a176
TH
1459 end = s + len;
1460 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
500d42d2
TH
1461 sv = newSVpvn(s, ptr-s);
1462 if (howlen == e_star) /* exact for 'Z*' */
1463 len = ptr-s + (ptr != strend ? 1 : 0);
1464 } else if (datumtype == 'A') {
1465 /* 'A' strips both nulls and spaces */
e02cb1b1 1466 const char *ptr;
1928d712
NC
1467#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1468 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
a4de8f71 1471 !is_utf8_space((U8 *) ptr)) break;
1928d712
NC
1472 if (ptr >= s) ptr += UTF8SKIP(ptr);
1473 else ptr++;
3040befa 1474 if (ptr > s+len)
1928d712
NC
1475 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1476 } else
1477#endif
1478 {
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 ptr++;
1482 }
500d42d2
TH
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1485
1486#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1487 if (utf8) {
1488 SvUTF8_on(sv);
1489 /* Undo any upgrade done due to need_utf8() */
3314a176 1490 if (!(symptr->flags & FLAG_WAS_UTF8))
500d42d2 1491 sv_utf8_downgrade(sv, 0);
a6ec74c1 1492 }
500d42d2 1493#endif
a6ec74c1 1494 XPUSHs(sv_2mortal(sv));
500d42d2 1495 s += len;
a6ec74c1
JH
1496 break;
1497 case 'B':
500d42d2
TH
1498 case 'b': {
1499 char *str;
cf168e39 1500 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1501 len = (strend - s) * 8;
1502 if (checksum) {
1503 if (!PL_bitcount) {
500d42d2 1504 int bits;
cd7a8267 1505 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1506 for (bits = 1; bits < 256; bits++) {
1507 if (bits & 1) PL_bitcount[bits]++;
1508 if (bits & 2) PL_bitcount[bits]++;
1509 if (bits & 4) PL_bitcount[bits]++;
1510 if (bits & 8) PL_bitcount[bits]++;
1511 if (bits & 16) PL_bitcount[bits]++;
1512 if (bits & 32) PL_bitcount[bits]++;
1513 if (bits & 64) PL_bitcount[bits]++;
1514 if (bits & 128) PL_bitcount[bits]++;
1515 }
1516 }
500d42d2 1517#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
3314a176 1518 if (utf8)
500d42d2 1519 while (len >= 8 && s < strend) {
3314a176 1520 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
500d42d2
TH
1521 len -= 8;
1522 }
3314a176 1523 else
500d42d2 1524#endif
773e5b22 1525 while (len >= 8) {
500d42d2 1526 cuv += PL_bitcount[*(U8 *)s++];
773e5b22
NC
1527 len -= 8;
1528 }
500d42d2
TH
1529 if (len && s < strend) {
1530 U8 bits;
1531#ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
3314a176 1532 bits = SHIFT_BYTE(0, s, strend, datumtype);
500d42d2 1533#else
3314a176 1534 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2 1535#endif
3314a176 1536 if (datumtype == 'b')
a6ec74c1 1537 while (len-- > 0) {
92d41999 1538 if (bits & 1) cuv++;
a6ec74c1
JH
1539 bits >>= 1;
1540 }
3314a176 1541 else
a6ec74c1 1542 while (len-- > 0) {
500d42d2 1543 if (bits & 0x80) cuv++;
a6ec74c1
JH
1544 bits <<= 1;
1545 }
773e5b22 1546 }
a6ec74c1
JH
1547 break;
1548 }
500d42d2 1549
133cdda0 1550 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1551 SvPOK_on(sv);
1552 str = SvPVX(sv);
1553 if (datumtype == 'b') {
3314a176 1554 U8 bits = 0;
e02cb1b1 1555 const I32 ai32 = len;
500d42d2
TH
1556 for (len = 0; len < ai32; len++) {
1557 if (len & 7) bits >>= 1;
1558#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1559 else if (utf8) {
1560 if (s >= strend) break;
3314a176 1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
500d42d2
TH
1562 }
1563#endif
1564 else bits = *(U8 *) s++;
1565 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1566 }
500d42d2 1567 } else {
3314a176 1568 U8 bits = 0;
e02cb1b1 1569 const I32 ai32 = len;
500d42d2
TH
1570 for (len = 0; len < ai32; len++) {
1571 if (len & 7) bits <<= 1;
1572#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1573 else if (utf8) {
1574 if (s >= strend) break;
3314a176 1575 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
500d42d2
TH
1576 }
1577#endif
1578 else bits = *(U8 *) s++;
1579 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1580 }
1581 }
1582 *str = '\0';
500d42d2
TH
1583 SvCUR_set(sv, str - SvPVX(sv));
1584 XPUSHs(sv);
a6ec74c1 1585 break;
500d42d2 1586 }
a6ec74c1 1587 case 'H':
500d42d2
TH
1588 case 'h': {
1589 char *str;
773e5b22 1590 /* Preliminary length estimate, acceptable for utf8 too */
cf168e39 1591 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1592 len = (strend - s) * 2;
133cdda0 1593 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1594 SvPOK_on(sv);
1595 str = SvPVX(sv);
1596 if (datumtype == 'h') {
3314a176 1597 U8 bits = 0;
a4de8f71 1598 I32 ai32 = len;
773e5b22
NC
1599 for (len = 0; len < ai32; len++) {
1600 if (len & 1) bits >>= 4;
500d42d2 1601#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
773e5b22
NC
1602 else if (utf8) {
1603 if (s >= strend) break;
3314a176 1604 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
773e5b22 1605 }
500d42d2 1606#endif
773e5b22 1607 else bits = * (U8 *) s++;
a6ec74c1
JH
1608 *str++ = PL_hexdigit[bits & 15];
1609 }
500d42d2 1610 } else {
3314a176 1611 U8 bits = 0;
e02cb1b1 1612 const I32 ai32 = len;
500d42d2
TH
1613 for (len = 0; len < ai32; len++) {
1614 if (len & 1) bits <<= 4;
1615#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1616 else if (utf8) {
1617 if (s >= strend) break;
3314a176 1618 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
500d42d2
TH
1619 }
1620#endif
1621 else bits = *(U8 *) s++;
a6ec74c1
JH
1622 *str++ = PL_hexdigit[(bits >> 4) & 15];
1623 }
1624 }
1625 *str = '\0';
500d42d2
TH
1626 SvCUR_set(sv, str - SvPVX(sv));
1627 XPUSHs(sv);
a6ec74c1 1628 break;
500d42d2 1629 }
a6ec74c1 1630 case 'c':
cc3ea0aa 1631 while (len-- > 0) {
500d42d2 1632#ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
3314a176 1633 int aint = SHIFT_BYTE(0, s, strend, datumtype);
500d42d2 1634#else
3314a176 1635 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2 1636#endif
cc3ea0aa
NC
1637 if (aint >= 128) /* fake up signed chars */
1638 aint -= 256;
500d42d2 1639 if (!checksum)
cc3ea0aa 1640 PUSHs(sv_2mortal(newSViv((IV)aint)));
cc3ea0aa
NC
1641 else if (checksum > bits_in_uv)
1642 cdouble += (NV)aint;
1643 else
1644 cuv += aint;
a6ec74c1
JH
1645 }
1646 break;
1647 case 'C':
500d42d2
TH
1648#ifdef PERL_PACK_CAN_W
1649 case 'W':
1650#endif
1651 W_checksum:
35bcd338 1652 if (len == 0) {
773e5b22 1653 if (explicit_length && datumtype == 'C')
500d42d2
TH
1654#ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1655 /* In maint, continue to switch from "character" mode */
1656 utf8 = 0;
1657#else
1658 /* Switch to "character" mode */
3314a176 1659 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
500d42d2 1660#endif
35bcd338
JH
1661 break;
1662 }
500d42d2 1663#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
773e5b22 1664 if (datumtype == 'C' ?
3314a176
TH
1665 (symptr->flags & FLAG_DO_UTF8) &&
1666 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
500d42d2 1667 while (len-- > 0 && s < strend) {
500d42d2 1668 STRLEN retlen;
e02cb1b1 1669 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
3314a176 1670 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
500d42d2
TH
1671 if (retlen == (STRLEN) -1 || retlen == 0)
1672 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1673 s += retlen;
1674 if (!checksum)
1675 PUSHs(sv_2mortal(newSVuv((UV) val)));
1676 else if (checksum > bits_in_uv)
1677 cdouble += (NV) val;
0bc88fab 1678 else
500d42d2 1679 cuv += val;
773e5b22 1680 }
500d42d2
TH
1681 } else
1682#endif
1683 if (!checksum)
a6ec74c1 1684 while (len-- > 0) {
e02cb1b1 1685 const U8 ch = *(U8 *) s++;
500d42d2 1686 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1687 }
500d42d2
TH
1688 else if (checksum > bits_in_uv)
1689 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1690 else
1691 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1692 break;
1693 case 'U':
35bcd338 1694 if (len == 0) {
500d42d2
TH
1695 if (explicit_length) {
1696#ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1697 /* In maint, switch to "character" mode */
1698 utf8 = 1;
1699#else
1700 /* Switch to "bytes in UTF-8" mode */
3314a176 1701 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
500d42d2
TH
1702 else
1703 /* Should be impossible due to the need_utf8() test */
1704 Perl_croak(aTHX_ "U0 mode on a byte string");
1705#endif
1706 }
35bcd338
JH
1707 break;
1708 }
500d42d2
TH
1709#ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1710 if (!utf8)
1711 goto W_checksum;
1712#endif
1713 if (len > strend - s) len = strend - s;
773e5b22 1714 if (!checksum) {
500d42d2
TH
1715 if (len && unpack_only_one) len = 1;
1716 EXTEND(SP, len);
1717 EXTEND_MORTAL(len);
773e5b22 1718 }
500d42d2
TH
1719 while (len-- > 0 && s < strend) {
1720 STRLEN retlen;
1721 UV auv;
1722#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1723 if (utf8) {
1724 U8 result[UTF8_MAXLEN];
e02cb1b1 1725 const char *ptr = s;
500d42d2 1726 STRLEN len;
500d42d2
TH
1727 /* Bug: warns about bad utf8 even if we are short on bytes
1728 and will break out of the loop */
a4de8f71
NC
1729 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1730 'U'))
500d42d2
TH
1731 break;
1732 len = UTF8SKIP(result);
773e5b22 1733 if (!uni_to_bytes(aTHX_ &ptr, strend,
a4de8f71 1734 (char *) &result[1], len-1, 'U')) break;
500d42d2
TH
1735 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1736 s = ptr;
1737 } else
1738#endif
1739 {
1740 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1741 if (retlen == (STRLEN) -1 || retlen == 0)
1742 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1743 s += retlen;
1744 }
1745 if (!checksum)
1746 PUSHs(sv_2mortal(newSVuv((UV) auv)));
cc3ea0aa 1747 else if (checksum > bits_in_uv)
500d42d2 1748 cdouble += (NV) auv;
cc3ea0aa 1749 else
500d42d2 1750 cuv += auv;
a6ec74c1
JH
1751 }
1752 break;
cf168e39
WL
1753 case 's' | TYPE_IS_SHRIEKING:
1754#if SHORTSIZE != SIZE16
cc3ea0aa 1755 while (len-- > 0) {
500d42d2 1756 short ashort;
3314a176
TH
1757 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1758 DO_BO_UNPACK(ashort, s);
500d42d2 1759 if (!checksum)
cc3ea0aa 1760 PUSHs(sv_2mortal(newSViv((IV)ashort)));
cc3ea0aa
NC
1761 else if (checksum > bits_in_uv)
1762 cdouble += (NV)ashort;
1763 else
1764 cuv += ashort;
cf168e39
WL
1765 }
1766 break;
1767#else
1768 /* Fallthrough! */
a6ec74c1 1769#endif
cf168e39 1770 case 's':
cc3ea0aa 1771 while (len-- > 0) {
500d42d2
TH
1772 I16 ai16;
1773
1774#if U16SIZE > SIZE16
1775 ai16 = 0;
1776#endif
3314a176 1777 SHIFT16(utf8, s, strend, &ai16, datumtype);
cc3ea0aa
NC
1778 DO_BO_UNPACK(ai16, 16);
1779#if U16SIZE > SIZE16
1780 if (ai16 > 32767)
1781 ai16 -= 65536;
1782#endif
500d42d2 1783 if (!checksum)
cc3ea0aa 1784 PUSHs(sv_2mortal(newSViv((IV)ai16)));
cc3ea0aa
NC
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)ai16;
1787 else
1788 cuv += ai16;
a6ec74c1
JH
1789 }
1790 break;
cf168e39
WL
1791 case 'S' | TYPE_IS_SHRIEKING:
1792#if SHORTSIZE != SIZE16
cc3ea0aa 1793 while (len-- > 0) {
500d42d2 1794 unsigned short aushort;
3314a176
TH
1795 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1796 DO_BO_UNPACK(aushort, s);
500d42d2
TH
1797 if (!checksum)
1798 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
cc3ea0aa
NC
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)aushort;
1801 else
1802 cuv += aushort;
cf168e39
WL
1803 }
1804 break;
1805#else
1806 /* Fallhrough! */
1807#endif
a6ec74c1
JH
1808 case 'v':
1809 case 'n':
1810 case 'S':
cc3ea0aa 1811 while (len-- > 0) {
500d42d2
TH
1812 U16 au16;
1813#if U16SIZE > SIZE16
1814 au16 = 0;
1815#endif
3314a176 1816 SHIFT16(utf8, s, strend, &au16, datumtype);
500d42d2 1817 DO_BO_UNPACK(au16, 16);
a6ec74c1 1818#ifdef HAS_NTOHS
cc3ea0aa
NC
1819 if (datumtype == 'n')
1820 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1821#endif
1822#ifdef HAS_VTOHS
cc3ea0aa
NC
1823 if (datumtype == 'v')
1824 au16 = vtohs(au16);
a6ec74c1 1825#endif
500d42d2
TH
1826 if (!checksum)
1827 PUSHs(sv_2mortal(newSVuv((UV)au16)));
cc3ea0aa 1828 else if (checksum > bits_in_uv)
3314a176 1829 cdouble += (NV) au16;
cc3ea0aa
NC
1830 else
1831 cuv += au16;
a6ec74c1 1832 }
cc3ea0aa
NC
1833 break;
1834#ifdef PERL_PACK_CAN_SHRIEKSIGN
1835 case 'v' | TYPE_IS_SHRIEKING:
1836 case 'n' | TYPE_IS_SHRIEKING:
1837 while (len-- > 0) {
500d42d2
TH
1838 I16 ai16;
1839# if U16SIZE > SIZE16
1840 ai16 = 0;
1841# endif
3314a176 1842 SHIFT16(utf8, s, strend, &ai16, datumtype);
500d42d2 1843# ifdef HAS_NTOHS
cc3ea0aa 1844 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
500d42d2
TH
1845 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1846# endif /* HAS_NTOHS */
1847# ifdef HAS_VTOHS
cc3ea0aa 1848 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
500d42d2
TH
1849 ai16 = (I16) vtohs((U16) ai16);
1850# endif /* HAS_VTOHS */
1851 if (!checksum)
cc3ea0aa 1852 PUSHs(sv_2mortal(newSViv((IV)ai16)));
cc3ea0aa 1853 else if (checksum > bits_in_uv)
500d42d2 1854 cdouble += (NV) ai16;
cc3ea0aa
NC
1855 else
1856 cuv += ai16;
a6ec74c1
JH
1857 }
1858 break;
500d42d2 1859#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1860 case 'i':
cf168e39 1861 case 'i' | TYPE_IS_SHRIEKING:
cc3ea0aa 1862 while (len-- > 0) {
500d42d2 1863 int aint;
3314a176
TH
1864 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1865 DO_BO_UNPACK(aint, i);
500d42d2 1866 if (!checksum)
cc3ea0aa 1867 PUSHs(sv_2mortal(newSViv((IV)aint)));
cc3ea0aa
NC
1868 else if (checksum > bits_in_uv)
1869 cdouble += (NV)aint;
1870 else
1871 cuv += aint;
a6ec74c1
JH
1872 }
1873 break;
1874 case 'I':
cf168e39 1875 case 'I' | TYPE_IS_SHRIEKING:
cc3ea0aa 1876 while (len-- > 0) {
500d42d2 1877 unsigned int auint;
3314a176
TH
1878 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1879 DO_BO_UNPACK(auint, i);
500d42d2 1880 if (!checksum)
cc3ea0aa 1881 PUSHs(sv_2mortal(newSVuv((UV)auint)));
cc3ea0aa
NC
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)auint;
1884 else
1885 cuv += auint;
a6ec74c1
JH
1886 }
1887 break;
92d41999 1888 case 'j':
cc3ea0aa 1889 while (len-- > 0) {
500d42d2 1890 IV aiv;
3314a176 1891 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
cc3ea0aa 1892#if IVSIZE == INTSIZE
3314a176 1893 DO_BO_UNPACK(aiv, i);
cc3ea0aa 1894#elif IVSIZE == LONGSIZE
3314a176 1895 DO_BO_UNPACK(aiv, l);
cc3ea0aa 1896#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3314a176 1897 DO_BO_UNPACK(aiv, 64);
500d42d2
TH
1898#else
1899 Perl_croak(aTHX_ "'j' not supported on this platform");
cc3ea0aa 1900#endif
500d42d2 1901 if (!checksum)
cc3ea0aa 1902 PUSHs(sv_2mortal(newSViv(aiv)));
cc3ea0aa
NC
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)aiv;
1905 else
1906 cuv += aiv;
92d41999
JH
1907 }
1908 break;
1909 case 'J':
cc3ea0aa 1910 while (len-- > 0) {
500d42d2 1911 UV auv;
3314a176 1912 SHIFT_VAR(utf8, s, strend, auv, datumtype);
500d42d2 1913#if IVSIZE == INTSIZE
3314a176 1914 DO_BO_UNPACK(auv, i);
500d42d2 1915#elif IVSIZE == LONGSIZE
3314a176 1916 DO_BO_UNPACK(auv, l);
500d42d2 1917#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3314a176 1918 DO_BO_UNPACK(auv, 64);
500d42d2
TH
1919#else
1920 Perl_croak(aTHX_ "'J' not supported on this platform");
cc3ea0aa 1921#endif
500d42d2 1922 if (!checksum)
cc3ea0aa 1923 PUSHs(sv_2mortal(newSVuv(auv)));
cc3ea0aa
NC
1924 else if (checksum > bits_in_uv)
1925 cdouble += (NV)auv;
1926 else
1927 cuv += auv;
92d41999
JH
1928 }
1929 break;
cf168e39
WL
1930 case 'l' | TYPE_IS_SHRIEKING:
1931#if LONGSIZE != SIZE32
cc3ea0aa 1932 while (len-- > 0) {
500d42d2 1933 long along;
3314a176
TH
1934 SHIFT_VAR(utf8, s, strend, along, datumtype);
1935 DO_BO_UNPACK(along, l);
500d42d2 1936 if (!checksum)
cc3ea0aa 1937 PUSHs(sv_2mortal(newSViv((IV)along)));
cc3ea0aa
NC
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)along;
1940 else
1941 cuv += along;
cf168e39
WL
1942 }
1943 break;
1944#else
1945 /* Fallthrough! */
a6ec74c1 1946#endif
cf168e39 1947 case 'l':
cc3ea0aa 1948 while (len-- > 0) {
500d42d2
TH
1949 I32 ai32;
1950#if U32SIZE > SIZE32
1951 ai32 = 0;
1952#endif
3314a176 1953 SHIFT32(utf8, s, strend, &ai32, datumtype);
cc3ea0aa
NC
1954 DO_BO_UNPACK(ai32, 32);
1955#if U32SIZE > SIZE32
500d42d2 1956 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1957#endif
500d42d2 1958 if (!checksum)
cc3ea0aa 1959 PUSHs(sv_2mortal(newSViv((IV)ai32)));
cc3ea0aa
NC
1960 else if (checksum > bits_in_uv)
1961 cdouble += (NV)ai32;
1962 else
1963 cuv += ai32;
a6ec74c1
JH
1964 }
1965 break;
cf168e39
WL
1966 case 'L' | TYPE_IS_SHRIEKING:
1967#if LONGSIZE != SIZE32
cc3ea0aa 1968 while (len-- > 0) {
500d42d2 1969 unsigned long aulong;
3314a176
TH
1970 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1971 DO_BO_UNPACK(aulong, l);
500d42d2 1972 if (!checksum)
cc3ea0aa 1973 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
cc3ea0aa
NC
1974 else if (checksum > bits_in_uv)
1975 cdouble += (NV)aulong;
1976 else
1977 cuv += aulong;
cf168e39
WL
1978 }
1979 break;
1980#else
1981 /* Fall through! */
1982#endif
a6ec74c1
JH
1983 case 'V':
1984 case 'N':
1985 case 'L':
cc3ea0aa 1986 while (len-- > 0) {
500d42d2
TH
1987 U32 au32;
1988#if U32SIZE > SIZE32
1989 au32 = 0;
1990#endif
3314a176 1991 SHIFT32(utf8, s, strend, &au32, datumtype);
500d42d2 1992 DO_BO_UNPACK(au32, 32);
a6ec74c1 1993#ifdef HAS_NTOHL
cc3ea0aa
NC
1994 if (datumtype == 'N')
1995 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1996#endif
1997#ifdef HAS_VTOHL
cc3ea0aa
NC
1998 if (datumtype == 'V')
1999 au32 = vtohl(au32);
a6ec74c1 2000#endif
500d42d2 2001 if (!checksum)
773e5b22
NC
2002 PUSHs(sv_2mortal(newSVuv((UV)au32)));
2003 else if (checksum > bits_in_uv)
2004 cdouble += (NV)au32;
2005 else
2006 cuv += au32;
cc3ea0aa
NC
2007 }
2008 break;
2009#ifdef PERL_PACK_CAN_SHRIEKSIGN
2010 case 'V' | TYPE_IS_SHRIEKING:
2011 case 'N' | TYPE_IS_SHRIEKING:
2012 while (len-- > 0) {
500d42d2
TH
2013 I32 ai32;
2014# if U32SIZE > SIZE32
2015 ai32 = 0;
2016# endif
3314a176 2017 SHIFT32(utf8, s, strend, &ai32, datumtype);
500d42d2 2018# ifdef HAS_NTOHL
cc3ea0aa
NC
2019 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
2020 ai32 = (I32)PerlSock_ntohl((U32)ai32);
500d42d2
TH
2021# endif
2022# ifdef HAS_VTOHL
cc3ea0aa
NC
2023 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
2024 ai32 = (I32)vtohl((U32)ai32);
500d42d2
TH
2025# endif
2026 if (!checksum)
cc3ea0aa 2027 PUSHs(sv_2mortal(newSViv((IV)ai32)));
cc3ea0aa
NC
2028 else if (checksum > bits_in_uv)
2029 cdouble += (NV)ai32;
2030 else
2031 cuv += ai32;
a6ec74c1
JH
2032 }
2033 break;
500d42d2 2034#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 2035 case 'p':
a6ec74c1 2036 while (len-- > 0) {
e02cb1b1 2037 const char *aptr;
3314a176 2038 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
206b424e 2039 DO_BO_UNPACK_PC(aptr);
cc3ea0aa
NC
2040 /* newSVpv generates undef if aptr is NULL */
2041 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
2042 }
2043 break;
2044 case 'w':
a6ec74c1
JH
2045 {
2046 UV auv = 0;
2047 U32 bytes = 0;
773e5b22 2048
500d42d2
TH
2049 while (len > 0 && s < strend) {
2050 U8 ch;
3314a176 2051 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2 2052 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2053 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
500d42d2 2054 if (ch < 0x80) {
a6ec74c1 2055 bytes = 0;
cc3ea0aa 2056 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2057 len--;
2058 auv = 0;
500d42d2 2059 continue;
a6ec74c1 2060 }
500d42d2 2061 if (++bytes >= sizeof(UV)) { /* promote to string */
c06c673c 2062 const char *t;
a6ec74c1 2063
481ddcff 2064 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2065 while (s < strend) {
3314a176 2066 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2
TH
2067 sv = mul128(sv, (U8)(ch & 0x7f));
2068 if (!(ch & 0x80)) {
a6ec74c1
JH
2069 bytes = 0;
2070 break;
2071 }
2072 }
c06c673c 2073 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2074 while (*t == '0')
2075 t++;
cacbe70b 2076 sv_chop(sv, (char *)t);
a6ec74c1
JH
2077 PUSHs(sv_2mortal(sv));
2078 len--;
2079 auv = 0;
2080 }
2081 }
2082 if ((s >= strend) && bytes)
cf168e39 2083 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2084 }
2085 break;
2086 case 'P':
cf168e39
WL
2087 if (symptr->howlen == e_star)
2088 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2089 EXTEND(SP, 1);
820b8518 2090 if (s + sizeof(char*) <= strend) {
500d42d2 2091 char *aptr;
3314a176 2092 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
500d42d2 2093 DO_BO_UNPACK_PC(aptr);
773e5b22
NC
2094 /* newSVpvn generates undef if aptr is NULL */
2095 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
500d42d2 2096 }
a6ec74c1
JH
2097 break;
2098#ifdef HAS_QUAD
2099 case 'q':
cc3ea0aa 2100 while (len-- > 0) {
500d42d2 2101 Quad_t aquad;
3314a176
TH
2102 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2103 DO_BO_UNPACK(aquad, 64);
500d42d2
TH
2104 if (!checksum)
2105 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
cc3ea0aa 2106 newSViv((IV)aquad) : newSVnv((NV)aquad)));
cc3ea0aa
NC
2107 else if (checksum > bits_in_uv)
2108 cdouble += (NV)aquad;
2109 else
2110 cuv += aquad;
2111 }
a6ec74c1
JH
2112 break;
2113 case 'Q':
cc3ea0aa 2114 while (len-- > 0) {
500d42d2 2115 Uquad_t auquad;
3314a176
TH
2116 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2117 DO_BO_UNPACK(auquad, 64);
500d42d2
TH
2118 if (!checksum)
2119 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2120 newSVuv((UV)auquad):newSVnv((NV)auquad)));
cc3ea0aa
NC
2121 else if (checksum > bits_in_uv)
2122 cdouble += (NV)auquad;
2123 else
2124 cuv += auquad;
a6ec74c1
JH
2125 }
2126 break;
500d42d2 2127#endif /* HAS_QUAD */
a6ec74c1
JH
2128 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2129 case 'f':
cc3ea0aa 2130 while (len-- > 0) {
500d42d2 2131 float afloat;
3314a176 2132 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
500d42d2
TH
2133 DO_BO_UNPACK_N(afloat, float);
2134 if (!checksum)
2135 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2136 else
cc3ea0aa 2137 cdouble += afloat;
773e5b22 2138 }
a6ec74c1
JH
2139 break;
2140 case 'd':
cc3ea0aa 2141 while (len-- > 0) {
500d42d2 2142 double adouble;
3314a176 2143 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
500d42d2
TH
2144 DO_BO_UNPACK_N(adouble, double);
2145 if (!checksum)
2146 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2147 else
cc3ea0aa 2148 cdouble += adouble;
773e5b22 2149 }
a6ec74c1 2150 break;
92d41999 2151 case 'F':
cc3ea0aa 2152 while (len-- > 0) {
500d42d2 2153 NV anv;
3314a176 2154 SHIFT_VAR(utf8, s, strend, anv, datumtype);
500d42d2
TH
2155 DO_BO_UNPACK_N(anv, NV);
2156 if (!checksum)
2157 PUSHs(sv_2mortal(newSVnv(anv)));
2158 else
cc3ea0aa 2159 cdouble += anv;
773e5b22 2160 }
92d41999
JH
2161 break;
2162#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2163 case 'D':
cc3ea0aa 2164 while (len-- > 0) {
500d42d2 2165 long double aldouble;
3314a176 2166 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
500d42d2
TH
2167 DO_BO_UNPACK_N(aldouble, long double);
2168 if (!checksum)
2169 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2170 else
2171 cdouble += aldouble;
92d41999
JH
2172 }
2173 break;
2174#endif
a6ec74c1
JH
2175 case 'u':
2176 /* MKS:
2177 * Initialise the decode mapping. By using a table driven
2178 * algorithm, the code will be character-set independent
2179 * (and just as fast as doing character arithmetic)
2180 */
d2ae4405 2181 if (PL_uudmap[(U8)'M'] == 0) {
820b8518 2182 size_t i;
a6ec74c1 2183
820b8518 2184 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2185 PL_uudmap[(U8)PL_uuemap[i]] = i;
2186 /*
2187 * Because ' ' and '`' map to the same value,
2188 * we need to decode them both the same.
2189 */
d2ae4405 2190 PL_uudmap[(U8)' '] = 0;
a6ec74c1 2191 }
500d42d2 2192 {
e02cb1b1 2193 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
133cdda0 2194 sv = sv_2mortal(newSV(l));
500d42d2
TH
2195 if (l) SvPOK_on(sv);
2196 }
2197 if (utf8) {
2198 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2199 I32 a, b, c, d;
2200 char hunk[4];
2201
2202 hunk[3] = '\0';
2203 while (len > 0) {
2204 next_uni_uu(aTHX_ &s, strend, &a);
2205 next_uni_uu(aTHX_ &s, strend, &b);
2206 next_uni_uu(aTHX_ &s, strend, &c);
2207 next_uni_uu(aTHX_ &s, strend, &d);
2208 hunk[0] = (char)((a << 2) | (b >> 4));
2209 hunk[1] = (char)((b << 4) | (c >> 2));
2210 hunk[2] = (char)((c << 6) | d);
2211 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2212 len -= 3;
2213 }
2214 if (s < strend) {
e02cb1b1
NC
2215 if (*s == '\n') {
2216 s++;
2217 }
500d42d2
TH
2218 else {
2219 /* possible checksum byte */
e02cb1b1
NC
2220 const char *skip = s+UTF8SKIP(s);
2221 if (skip < strend && *skip == '\n')
2222 s = skip+1;
500d42d2
TH
2223 }
2224 }
2225 }
2226 } else {
773e5b22
NC
2227 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2228 I32 a, b, c, d;
2229 char hunk[4];
a6ec74c1 2230
773e5b22
NC
2231 hunk[3] = '\0';
2232 len = PL_uudmap[*(U8*)s++] & 077;
2233 while (len > 0) {
2234 if (s < strend && ISUUCHAR(*s))
2235 a = PL_uudmap[*(U8*)s++] & 077;
2236 else
2237 a = 0;
2238 if (s < strend && ISUUCHAR(*s))
2239 b = PL_uudmap[*(U8*)s++] & 077;
2240 else
2241 b = 0;
2242 if (s < strend && ISUUCHAR(*s))
2243 c = PL_uudmap[*(U8*)s++] & 077;
2244 else
2245 c = 0;
2246 if (s < strend && ISUUCHAR(*s))
2247 d = PL_uudmap[*(U8*)s++] & 077;
2248 else
2249 d = 0;
2250 hunk[0] = (char)((a << 2) | (b >> 4));
2251 hunk[1] = (char)((b << 4) | (c >> 2));
2252 hunk[2] = (char)((c << 6) | d);
2253 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2254 len -= 3;
2255 }
2256 if (*s == '\n')
2257 s++;
2258 else /* possible checksum byte */
2259 if (s + 1 < strend && s[1] == '\n')
2260 s += 2;
a6ec74c1 2261 }
500d42d2
TH
2262 }
2263 XPUSHs(sv);
a6ec74c1
JH
2264 break;
2265 }
cf168e39 2266
a6ec74c1 2267 if (checksum) {
cc3ea0aa 2268 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2269 (checksum > bits_in_uv &&
500d42d2
TH
2270 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2271 NV trouble, anv;
a6ec74c1 2272
500d42d2 2273 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2274 while (checksum >= 16) {
2275 checksum -= 16;
500d42d2 2276 anv *= 65536.0;
a6ec74c1 2277 }
a6ec74c1 2278 while (cdouble < 0.0)
500d42d2
TH
2279 cdouble += anv;
2280 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
cc3ea0aa 2281 sv = newSVnv(cdouble);
a6ec74c1
JH
2282 }
2283 else {
fa8ec7c1
NC
2284 if (checksum < bits_in_uv) {
2285 UV mask = ((UV)1 << checksum) - 1;
92d41999 2286 cuv &= mask;
a6ec74c1 2287 }
cc3ea0aa 2288 sv = newSVuv(cuv);
a6ec74c1
JH
2289 }
2290 XPUSHs(sv_2mortal(sv));
2291 checksum = 0;
2292 }
773e5b22 2293
cf168e39
WL
2294 if (symptr->flags & FLAG_SLASH){
2295 if (SP - PL_stack_base - start_sp_offset <= 0)
2296 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2297 if( next_symbol(symptr) ){
2298 if( symptr->howlen == e_number )
2299 Perl_croak(aTHX_ "Count after length/code in unpack" );
2300 if( beyond ){
2301 /* ...end of char buffer then no decent length available */
2302 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2303 } else {
2304 /* take top of stack (hope it's numeric) */
2305 len = POPi;
2306 if( len < 0 )
2307 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2308 }
2309 } else {
2310 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2311 }
2312 datumtype = symptr->code;
0bc88fab 2313 explicit_length = FALSE;
cf168e39
WL
2314 goto redo_switch;
2315 }
a6ec74c1 2316 }
cf168e39 2317
18529408
IZ
2318 if (new_s)
2319 *new_s = s;
2320 PUTBACK;
2321 return SP - PL_stack_base - start_sp_offset;
2322}
2323
2324PP(pp_unpack)
2325{
2326 dSP;
2327 dPOPPOPssrl;
2328 I32 gimme = GIMME_V;
2329 STRLEN llen;
2330 STRLEN rlen;
500d42d2
TH
2331 const char *pat = SvPV_const(left, llen);
2332 const char *s = SvPV_const(right, rlen);
cacbe70b
NC
2333 const char *strend = s + rlen;
2334 const char *patend = pat + llen;
500d42d2 2335 I32 cnt;
18529408
IZ
2336
2337 PUTBACK;
cacbe70b 2338 cnt = unpackstring((char *)pat, (char *)patend, (char *)s, (char *)strend,
cf168e39 2339 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
3314a176 2340 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
cf168e39 2341
18529408
IZ
2342 SPAGAIN;
2343 if ( !cnt && gimme == G_SCALAR )
2344 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2345 RETURN;
2346}
2347
3314a176 2348STATIC U8 *
e02cb1b1 2349doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2350{
3314a176 2351 *h++ = PL_uuemap[len];
a6ec74c1 2352 while (len > 2) {
3314a176
TH
2353 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2354 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2355 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2356 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2357 s += 3;
2358 len -= 3;
2359 }
2360 if (len > 0) {
e02cb1b1 2361 const char r = (len > 1 ? s[1] : '\0');
3314a176
TH
2362 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2363 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2364 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2365 *h++ = PL_uuemap[0];
a6ec74c1 2366 }
3314a176
TH
2367 *h++ = '\n';
2368 return h;
a6ec74c1
JH
2369}
2370
2371STATIC SV *
cacbe70b 2372S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2373{
e02cb1b1
NC
2374 SV *result = newSVpvn(s, l);
2375 char *const result_c = SvPV_nolen(result); /* convenience */
2376 char *out = result_c;
2377 bool skip = 1;
2378 bool ignore = 0;
a6ec74c1
JH
2379
2380 while (*s) {
2381 switch (*s) {
2382 case ' ':
2383 break;
2384 case '+':
2385 if (!skip) {
2386 SvREFCNT_dec(result);
2387 return (NULL);
2388 }
2389 break;
2390 case '0':
2391 case '1':
2392 case '2':
2393 case '3':
2394 case '4':
2395 case '5':
2396 case '6':
2397 case '7':
2398 case '8':
2399 case '9':
2400 skip = 0;
2401 if (!ignore) {
2402 *(out++) = *s;
2403 }
2404 break;
2405 case '.':
2406 ignore = 1;
2407 break;
2408 default:
2409 SvREFCNT_dec(result);
2410 return (NULL);
2411 }
2412 s++;
2413 }
2414 *(out++) = '\0';
2415 SvCUR_set(result, out - result_c);
2416 return (result);
2417}
2418
2419/* pnum must be '\0' terminated */
2420STATIC int
2421S_div128(pTHX_ SV *pnum, bool *done)
2422{
e02cb1b1
NC
2423 STRLEN len;
2424 char * const s = SvPV(pnum, len);
2425 char *t = s;
2426 int m = 0;
2427
2428 *done = 1;
2429 while (*t) {
2430 const int i = m * 10 + (*t - '0');
2431 const int r = (i >> 7); /* r < 10 */
2432 m = i & 0x7F;
2433 if (r) {
2434 *done = 0;
2435 }
2436 *(t++) = '0' + r;
a6ec74c1 2437 }
e02cb1b1
NC
2438 *(t++) = '\0';
2439 SvCUR_set(pnum, (STRLEN) (t - s));
2440 return (m);
a6ec74c1
JH
2441}
2442
18529408 2443/*
f3479639
JH
2444=for apidoc packlist
2445
2446The engine implementing pack() Perl function.
2447
2448=cut */
2449
2450
2451void
970ba7cf
NC
2452Perl_packlist(pTHX_ SV *cat, char *pat, char *patend, register SV **beglist,
2453 SV **endlist)
f3479639 2454{
e02cb1b1
NC
2455 tempsym_t sym;
2456
2457 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
cf168e39 2458
3314a176
TH
2459 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2460 Also make sure any UTF8 flag is loaded */
6d95986e 2461 SvPV_force_nolen(cat);
3314a176
TH
2462 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2463
cf168e39
WL
2464 (void)pack_rec( cat, &sym, beglist, endlist );
2465}
2466
3314a176
TH
2467/* like sv_utf8_upgrade, but also repoint the group start markers */
2468STATIC void
2469marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2470 STRLEN len;
2471 tempsym_t *group;
e02cb1b1
NC
2472 const char *from_ptr, *from_start, *from_end, **marks, **m;
2473 char *to_start, *to_ptr;
3314a176
TH
2474
2475 if (SvUTF8(sv)) return;
2476
2477 from_start = SvPVX(sv);
2478 from_end = from_start + SvCUR(sv);
2479 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2480 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2481 if (from_ptr == from_end) {
2482 /* Simple case: no character needs to be changed */
2483 SvUTF8_on(sv);
2484 return;
2485 }
2486
773e5b22 2487 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
3314a176
TH
2488 New('U', to_start, len, char);
2489 Copy(from_start, to_start, from_ptr-from_start, char);
2490 to_ptr = to_start + (from_ptr-from_start);
2491
e02cb1b1 2492 New('U', marks, sym_ptr->level+2, const char *);
3314a176
TH
2493 for (group=sym_ptr; group; group = group->previous)
2494 marks[group->level] = from_start + group->strbeg;
2495 marks[sym_ptr->level+1] = from_end+1;
2496 for (m = marks; *m < from_ptr; m++)
2497 *m = to_start + (*m-from_start);
2498
2499 for (;from_ptr < from_end; from_ptr++) {
2500 while (*m == from_ptr) *m++ = to_ptr;
a4de8f71 2501 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
3314a176
TH
2502 }
2503 *to_ptr = 0;
2504
2505 while (*m == from_ptr) *m++ = to_ptr;
2506 if (m != marks + sym_ptr->level+1) {
2507 Safefree(marks);
2508 Safefree(to_start);
2509 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2510 }
2511 for (group=sym_ptr; group; group = group->previous)
2512 group->strbeg = marks[group->level] - to_start;
2513 Safefree(marks);
2514
2515 if (SvOOK(sv)) {
2516 if (SvIVX(sv)) {
2517 SvLEN(sv) += SvIVX(sv);
2518 from_start -= SvIVX(sv);
2519 SvIV_set(sv, 0);
2520 }
2521 SvFLAGS(sv) &= ~SVf_OOK;
2522 }
2523 if (SvLEN(sv) != 0)
2524 Safefree(from_start);
2525 SvPVX(sv) = to_start;
2526 SvCUR(sv) = to_ptr - to_start;
2527 SvLEN(sv) = len;
2528 SvUTF8_on(sv);
2529}
2530
2531/* Exponential string grower. Makes string extension effectively O(n)
2532 needed says how many extra bytes we need (not counting the final '\0')
2533 Only grows the string if there is an actual lack of space
2534*/
2535STATIC char *
8c89da26 2536S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
e02cb1b1
NC
2537 const STRLEN cur = SvCUR(sv);
2538 const STRLEN len = SvLEN(sv);
3314a176
TH
2539 STRLEN extend;
2540 if (len - cur > needed) return SvPVX(sv);
2541 extend = needed > len ? needed : len;
2542 return SvGROW(sv, len+extend+1);
2543}
cf168e39
WL
2544
2545STATIC
2546SV **
3314a176 2547S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
cf168e39 2548{
cf168e39 2549 tempsym_t lookahead;
3314a176
TH
2550 I32 items = endlist - beglist;
2551 bool found = next_symbol(symptr);
2552#ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2553 const bool utf8 = 0;
2554#else
2555 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2556#endif
bc743096 2557 bool warn_utf8 = ckWARN(WARN_UTF8);
a6ec74c1 2558
3314a176
TH
2559 if (symptr->level == 0 && found && symptr->code == 'U') {
2560 marked_upgrade(aTHX_ cat, symptr);
2561 symptr->flags |= FLAG_DO_UTF8;
2562#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2563 utf8 = 0;
18529408 2564#endif
3314a176
TH
2565 }
2566 symptr->strbeg = SvCUR(cat);
cf168e39
WL
2567
2568 while (found) {
3314a176
TH
2569 SV *fromstr;
2570 STRLEN fromlen;
2571 I32 len;
0e2d6244 2572 SV *lengthcode = NULL;
cf168e39 2573 I32 datumtype = symptr->code;
3314a176
TH
2574 howlen_t howlen = symptr->howlen;
2575 char *start = SvPVX(cat);
2576 char *cur = start + SvCUR(cat);
cf168e39 2577
3314a176
TH
2578#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2579
2580 switch (howlen) {
773e5b22 2581 case e_star:
3314a176
TH
2582 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2583 0 : items;
2584 break;
2585 default:
2586 /* e_no_len and e_number */
2587 len = symptr->length;
cf168e39
WL
2588 break;
2589 }
2590
3314a176 2591 if (len) {
e5464422 2592 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
3314a176 2593
e5464422
TH
2594 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2595 /* We can process this letter. */
2596 STRLEN size = props & PACK_SIZE_MASK;
2597 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2598 }
3314a176
TH
2599 }
2600
cf168e39
WL
2601 /* Look ahead for next symbol. Do we have code/code? */
2602 lookahead = *symptr;
2603 found = next_symbol(&lookahead);
1928d712
NC
2604 if (symptr->flags & FLAG_SLASH) {
2605 IV count;
3314a176 2606 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
1928d712
NC
2607 if (strchr("aAZ", lookahead.code)) {
2608 if (lookahead.howlen == e_number) count = lookahead.length;
2609 else {
5466829a
TS
2610 if (items > 0) {
2611 if (SvGAMAGIC(*beglist)) {
2612 /* Avoid reading the active data more than once
2613 by copying it to a temporary. */
2614 STRLEN len;
2615 const char *const pv = SvPV_const(*beglist, len);
2616 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2617 if (SvUTF8(*beglist))
2618 SvUTF8_on(temp);
2619 *beglist = temp;
2620 }
1928d712
NC
2621 count = DO_UTF8(*beglist) ?
2622 sv_len_utf8(*beglist) : sv_len(*beglist);
5466829a 2623 }
1928d712
NC
2624 else count = 0;
2625 if (lookahead.code == 'Z') count++;
2626 }
2627 } else {
2628 if (lookahead.howlen == e_number && lookahead.length < items)
2629 count = lookahead.length;
2630 else count = items;
2631 }
2632 lookahead.howlen = e_number;
2633 lookahead.length = count;
2634 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2635 }
cf168e39 2636
773e5b22
NC
2637 /* Code inside the switch must take care to properly update
2638 cat (CUR length and '\0' termination) if it updated *cur and
3314a176 2639 doesn't simply leave using break */
cc3ea0aa 2640 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2641 default:
3314a176
TH
2642 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2643 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2644 case '%':
cf168e39 2645 Perl_croak(aTHX_ "'%%' may not be used in pack");
3040befa
TH
2646 {
2647 char *from;
2648#ifdef PERL_PACK_CAN_DOT
2649# ifdef PERL_PACK_CAN_SHRIEKSIGN
2650 case '.' | TYPE_IS_SHRIEKING:
2651# endif
2652 case '.':
2653 if (howlen == e_star) from = start;
2654 else if (len == 0) from = cur;
2655 else {
2656 tempsym_t *group = symptr;
2657
2658 while (--len && group) group = group->previous;
2659 from = group ? start + group->strbeg : start;
2660 }
2661 fromstr = NEXTFROM;
2662 len = SvIV(fromstr);
2663 goto resize;
2664#endif
2665#ifdef PERL_PACK_CAN_SHRIEKSIGN
2666 case '@' | TYPE_IS_SHRIEKING:
2667#endif
a6ec74c1 2668 case '@':
3040befa
TH
2669 from = start + symptr->strbeg;
2670#ifdef PERL_PACK_CAN_DOT
2671 resize:
2672#endif
2673#ifdef PERL_PACK_CAN_SHRIEKSIGN
2674 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2675#else /* PERL_PACK_CAN_SHRIEKSIGN */
2676 if (utf8)
2677#endif
2678 if (len >= 0) {
2679 while (len && from < cur) {
2680 from += UTF8SKIP(from);
2681 len--;
2682 }
2683 if (from > cur)
2684 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2685 if (len) {
2686 /* Here we know from == cur */
2687 grow:
2688 GROWING(0, cat, start, cur, len);
2689 Zero(cur, len, char);
2690 cur += len;
2691 } else if (from < cur) {
2692 len = cur - from;
2693 goto shrink;
2694 } else goto no_change;
2695 } else {
2696 cur = from;
2697 len = -len;
2698 goto utf8_shrink;
3314a176 2699 }
3040befa
TH
2700 else {
2701 len -= cur - from;
3314a176 2702 if (len > 0) goto grow;
3040befa 2703 if (len == 0) goto no_change;
773e5b22 2704 len = -len;
3040befa 2705 goto shrink;
3314a176 2706 }
a6ec74c1 2707 break;
3040befa 2708 }
773e5b22 2709 case '(': {
cf168e39 2710 tempsym_t savsym = *symptr;
cc3ea0aa
NC
2711 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2712 symptr->flags |= group_modifiers;
cf168e39
WL
2713 symptr->patend = savsym.grpend;
2714 symptr->level++;
3314a176 2715 symptr->previous = &lookahead;
18529408 2716 while (len--) {
3314a176
TH
2717 U32 was_utf8;
2718 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2719 else symptr->flags &= ~FLAG_PARSE_UTF8;
2720 was_utf8 = SvUTF8(cat);
cf168e39 2721 symptr->patptr = savsym.grpbeg;
3314a176
TH
2722 beglist = pack_rec(cat, symptr, beglist, endlist);
2723#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2724 if (SvUTF8(cat) != was_utf8)
2725 /* This had better be an upgrade while in utf8==0 mode */
2726 utf8 = 1;
2727#endif
2728
cf168e39 2729 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2730 break; /* No way to continue */
2731 }
3fe4228b 2732 items = endlist - beglist;
3314a176
TH
2733 lookahead.flags = symptr->flags & ~group_modifiers;
2734 goto no_change;
18529408 2735 }
62f95557
IZ
2736 case 'X' | TYPE_IS_SHRIEKING:
2737 if (!len) /* Avoid division by 0 */
2738 len = 1;
3314a176
TH
2739 if (utf8) {
2740 char *hop, *last;
2741 I32 l = len;
2742 hop = last = start;
2743 while (hop < cur) {
2744 hop += UTF8SKIP(hop);
2745 if (--l == 0) {
2746 last = hop;
2747 l = len;
2748 }
2749 }
2750 if (last > cur)
2751 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2752 cur = last;
2753 break;
2754 }
2755 len = (cur-start) % len;
62f95557 2756 /* FALL THROUGH */
a6ec74c1 2757 case 'X':
3314a176
TH
2758 if (utf8) {
2759 if (len < 1) goto no_change;
3040befa 2760 utf8_shrink:
3314a176
TH
2761 while (len > 0) {
2762 if (cur <= start)
3040befa
TH
2763 Perl_croak(aTHX_ "'%c' outside of string in pack",
2764 (int) TYPE_NO_MODIFIERS(datumtype));
3314a176
TH
2765 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2766 if (cur <= start)
3040befa
TH
2767 Perl_croak(aTHX_ "'%c' outside of string in pack",
2768 (int) TYPE_NO_MODIFIERS(datumtype));
3314a176
TH
2769 }
2770 len--;
2771 }
2772 } else {
773e5b22 2773 shrink:
3314a176 2774 if (cur - start < len)
3040befa
TH
2775 Perl_croak(aTHX_ "'%c' outside of string in pack",
2776 (int) TYPE_NO_MODIFIERS(datumtype));
3314a176
TH
2777 cur -= len;
2778 }
2779 if (cur < start+symptr->strbeg) {
2780 /* Make sure group starts don't point into the void */
2781 tempsym_t *group;
a4de8f71 2782 const STRLEN length = cur-start;
3314a176
TH
2783 for (group = symptr;
2784 group && length < group->strbeg;
2785 group = group->previous) group->strbeg = length;
2786 lookahead.strbeg = length;
2787 }
a6ec74c1 2788 break;
773e5b22
NC
2789 case 'x' | TYPE_IS_SHRIEKING: {
2790 I32 ai32;
62f95557
IZ
2791 if (!len) /* Avoid division by 0 */
2792 len = 1;
a4de8f71 2793 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
773e5b22
NC
2794 else ai32 = (cur - start) % len;
2795 if (ai32 == 0) goto no_change;
2796 len -= ai32;
2797 }
2798 /* FALL THROUGH */
a6ec74c1 2799 case 'x':
3314a176 2800 goto grow;
a6ec74c1
JH
2801 case 'A':
2802 case 'Z':
3314a176 2803 case 'a': {
e02cb1b1 2804 const char *aptr;
3314a176 2805
a6ec74c1 2806 fromstr = NEXTFROM;
cacbe70b 2807 aptr = (char *) SvPV_const(fromstr, fromlen);
3314a176
TH
2808#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2809 if (DO_UTF8(fromstr)) {
e02cb1b1 2810 const char *end, *s;
3314a176
TH
2811
2812 if (!utf8 && !SvUTF8(cat)) {
2813 marked_upgrade(aTHX_ cat, symptr);
2814 lookahead.flags |= FLAG_DO_UTF8;
2815 lookahead.strbeg = symptr->strbeg;
2816 utf8 = 1;
2817 start = SvPVX(cat);
2818 cur = start + SvCUR(cat);
2819 }
773e5b22 2820 if (howlen == e_star) {
3314a176
TH
2821 if (utf8) goto string_copy;
2822 len = fromlen+1;
2823 }
2824 s = aptr;
2825 end = aptr + fromlen;
2826 fromlen = datumtype == 'Z' ? len-1 : len;
2827 while ((I32) fromlen > 0 && s < end) {
2828 s += UTF8SKIP(s);
2829 fromlen--;
2830 }
2831 if (s > end)
2832 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2833 if (utf8) {
773e5b22 2834 len = fromlen;
3314a176
TH
2835 if (datumtype == 'Z') len++;
2836 fromlen = s-aptr;
2837 len += fromlen;
773e5b22 2838
3314a176 2839 goto string_copy;
773e5b22 2840 }
3314a176
TH
2841 fromlen = len - fromlen;
2842 if (datumtype == 'Z') fromlen--;
2843 if (howlen == e_star) {
2844 len = fromlen;
2845 if (datumtype == 'Z') len++;
773e5b22 2846 }
3314a176 2847 GROWING(0, cat, start, cur, len);
773e5b22 2848 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
3314a176
TH
2849 datumtype | TYPE_IS_PACK))
2850 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2851 cur += fromlen;
a6ec74c1 2852 len -= fromlen;
3314a176
TH
2853 } else if (utf8) {
2854 if (howlen == e_star) {
2855 len = fromlen;
2856 if (datumtype == 'Z') len++;
a6ec74c1 2857 }
3314a176
TH
2858 if (len <= (I32) fromlen) {
2859 fromlen = len;
2860 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2861 }
773e5b22
NC
2862 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2863 upgrade, so:
2864 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2865 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
3314a176
TH
2866 len -= fromlen;
2867 while (fromlen > 0) {
a4de8f71 2868 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
3314a176
TH
2869 aptr++;
2870 fromlen--;
773e5b22 2871 }
3314a176
TH
2872 } else
2873#endif
2874 {
2875#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2876 string_copy:
2877#endif
2878 if (howlen == e_star) {
2879 len = fromlen;
2880 if (datumtype == 'Z') len++;
a6ec74c1 2881 }
3314a176
TH
2882 if (len <= (I32) fromlen) {
2883 fromlen = len;
2884 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2885 }
2886 GROWING(0, cat, start, cur, len);
2887 Copy(aptr, cur, fromlen, char);
2888 cur += fromlen;
2889 len -= fromlen;
a6ec74c1 2890 }
3314a176
TH
2891 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2892 cur += len;
a6ec74c1 2893 break;
3314a176 2894 }
a6ec74c1 2895 case 'B':
3314a176 2896 case 'b': {
e02cb1b1 2897 const char *str, *end;
3314a176
TH
2898 I32 l, field_len;
2899 U8 bits;
2900 bool utf8_source;
2901 U32 utf8_flags;
a6ec74c1 2902
773e5b22 2903 fromstr = NEXTFROM;
e02cb1b1 2904 str = SvPV_const(fromstr, fromlen);
3314a176
TH
2905 end = str + fromlen;
2906#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2907 if (DO_UTF8(fromstr)) {
2908 utf8_source = TRUE;
bc743096 2909 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
3314a176
TH
2910 } else
2911#endif
2912 {
2913 utf8_source = FALSE;
2914 utf8_flags = 0; /* Unused, but keep compilers happy */
2915 }
2916 if (howlen == e_star) len = fromlen;
2917 field_len = (len+7)/8;
2918 GROWING(utf8, cat, start, cur, field_len);
2919 if (len > (I32)fromlen) len = fromlen;
2920 bits = 0;
2921 l = 0;
2922 if (datumtype == 'B')
2923 while (l++ < len) {
2924 if (utf8_source) {
f2139bd3 2925 UV val = 0;
3314a176
TH
2926 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2927 bits |= val & 1;
2928 } else bits |= *str++ & 1;
2929 if (l & 7) bits <<= 1;
773e5b22 2930 else {
3314a176
TH
2931 PUSH_BYTE(utf8, cur, bits);
2932 bits = 0;
a6ec74c1
JH
2933 }
2934 }
3314a176
TH
2935 else
2936 /* datumtype == 'b' */
2937 while (l++ < len) {
2938 if (utf8_source) {
f2139bd3 2939 UV val = 0;
3314a176
TH
2940 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2941 if (val & 1) bits |= 0x80;
2942 } else if (*str++ & 1)
2943 bits |= 0x80;
2944 if (l & 7) bits >>= 1;
773e5b22 2945 else {
3314a176
TH
2946 PUSH_BYTE(utf8, cur, bits);
2947 bits = 0;
a6ec74c1
JH
2948 }
2949 }
3314a176
TH
2950 l--;
2951 if (l & 7) {
773e5b22 2952 if (datumtype == 'B')
3314a176 2953 bits <<= 7 - (l & 7);
773e5b22 2954 else
3314a176
TH
2955 bits >>= 7 - (l & 7);
2956 PUSH_BYTE(utf8, cur, bits);
2957 l += 7;
a6ec74c1 2958 }
3314a176
TH
2959 /* Determine how many chars are left in the requested field */
2960 l /= 8;
2961 if (howlen == e_star) field_len = 0;
2962 else field_len -= l;
2963 Zero(cur, field_len, char);
2964 cur += field_len;
a6ec74c1 2965 break;
3314a176 2966 }
a6ec74c1 2967 case 'H':
3314a176
TH
2968 case 'h': {
2969 char *str, *end;
2970 I32 l, field_len;
2971 U8 bits;
2972 bool utf8_source;
2973 U32 utf8_flags;
a6ec74c1 2974
773e5b22
NC
2975 fromstr = NEXTFROM;
2976 str = SvPV(fromstr, fromlen);
3314a176
TH
2977 end = str + fromlen;
2978#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2979 if (DO_UTF8(fromstr)) {
2980 utf8_source = TRUE;
bc743096 2981 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
3314a176
TH
2982 } else
2983#endif
2984 {
2985 utf8_source = FALSE;
2986 utf8_flags = 0; /* Unused, but keep compilers happy */
2987 }
2988 if (howlen == e_star) len = fromlen;
2989 field_len = (len+1)/2;
2990 GROWING(utf8, cat, start, cur, field_len);
2991 if (!utf8 && len > (I32)fromlen) len = fromlen;
2992 bits = 0;
2993 l = 0;
2994 if (datumtype == 'H')
2995 while (l++ < len) {
2996 if (utf8_source) {
f2139bd3 2997 UV val = 0;
3314a176
TH
2998 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2999 if (val < 256 && isALPHA(val))
3000 bits |= (val + 9) & 0xf;
a6ec74c1 3001 else
3314a176
TH
3002 bits |= val & 0xf;
3003 } else if (isALPHA(*str))
3004 bits |= (*str++ + 9) & 0xf;
3005 else
3006 bits |= *str++ & 0xf;
3007 if (l & 1) bits <<= 4;
773e5b22 3008 else {
3314a176
TH
3009 PUSH_BYTE(utf8, cur, bits);
3010 bits = 0;
a6ec74c1
JH
3011 }
3012 }
3314a176
TH
3013 else
3014 while (l++ < len) {
3015 if (utf8_source) {
f2139bd3 3016 UV val = 0;
3314a176
TH
3017 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
3018 if (val < 256 && isALPHA(val))
3019 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 3020 else
3314a176
TH
3021 bits |= (val & 0xf) << 4;
3022 } else if (isALPHA(*str))
3023 bits |= ((*str++ + 9) & 0xf) << 4;
3024 else
3025 bits |= (*str++ & 0xf) << 4;
3026 if (l & 1) bits >>= 4;
773e5b22 3027 else {
3314a176
TH
3028 PUSH_BYTE(utf8, cur, bits);
3029 bits = 0;
a6ec74c1 3030 }
773e5b22 3031 }
3314a176
TH
3032 l--;
3033 if (l & 1) {
3034 PUSH_BYTE(utf8, cur, bits);
3035 l++;
3036 }
3037 /* Determine how many chars are left in the requested field */
3038 l /= 2;
3039 if (howlen == e_star) field_len = 0;
3040 else field_len -= l;
3041 Zero(cur, field_len, char);
3042 cur += field_len;
3043 break;
773e5b22
NC
3044 }
3045 case 'c':
3314a176
TH
3046 while (len-- > 0) {
3047 IV aiv;
3048 fromstr = NEXTFROM;
3049 aiv = SvIV(fromstr);
3050 if ((-128 > aiv || aiv > 127) &&
3051 ckWARN(WARN_PACK))
3052 Perl_warner(aTHX_ packWARN(WARN_PACK),
3053 "Character in 'c' format wrapped in pack");
820b8518 3054 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
3055 }
3056 break;
3057 case 'C':
3314a176
TH
3058 if (len == 0) {
3059#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3060 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3061#endif
3062 break;
3063 }
3064 GROWING(0, cat, start, cur, len);
a6ec74c1 3065 while (len-- > 0) {
3314a176 3066 IV aiv;
a6ec74c1 3067 fromstr = NEXTFROM;
3314a176
TH
3068 aiv = SvIV(fromstr);
3069 if ((0 > aiv || aiv > 0xff) &&
773e5b22
NC
3070 ckWARN(WARN_PACK))
3071 Perl_warner(aTHX_ packWARN(WARN_PACK),
3072 "Character in 'C' format wrapped in pack");
820b8518 3073 *cur++ = (char)(aiv & 0xff);
3314a176 3074 }
773e5b22 3075 break;
3314a176 3076#ifdef PERL_PACK_CAN_W
773e5b22
NC
3077 case 'W': {
3078 char *end;
75056b2a 3079 U8 in_bytes = (U8)IN_BYTES;
773e5b22
NC
3080
3081 end = start+SvLEN(cat)-1;
3082 if (utf8) end -= UTF8_MAXLEN-1;
3083 while (len-- > 0) {
3084 UV auv;
3085 fromstr = NEXTFROM;
3086 auv = SvUV(fromstr);
3087 if (in_bytes) auv = auv % 0x100;
3088 if (utf8) {
3089 W_utf8:
3090 if (cur > end) {
3091 *cur = '\0';
3092 SvCUR(cat) = cur - start;
3093
3094 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3095 end = start+SvLEN(cat)-UTF8_MAXLEN;
3096 }
a4de8f71
NC
3097 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3098 NATIVE_TO_UNI(auv),
bc743096 3099 warn_utf8 ?
a4de8f71 3100 0 : UNICODE_ALLOW_ANY);
773e5b22
NC
3101 } else {
3102 if (auv >= 0x100) {
3103 if (!SvUTF8(cat)) {
3104 *cur = '\0';
3105 SvCUR(cat) = cur - start;
3106 marked_upgrade(aTHX_ cat, symptr);
3107 lookahead.flags |= FLAG_DO_UTF8;
3108 lookahead.strbeg = symptr->strbeg;
3109 utf8 = 1;
3110 start = SvPVX(cat);
3111 cur = start + SvCUR(cat);
3112 end = start+SvLEN(cat)-UTF8_MAXLEN;
3113 goto W_utf8;
3114 }
3115 if (ckWARN(WARN_PACK))
3116 Perl_warner(aTHX_ packWARN(WARN_PACK),
3117 "Character in 'W' format wrapped in pack");
3118 auv &= 0xff;
3119 }
3120 if (cur >= end) {
3121 *cur = '\0';
3122 SvCUR(cat) = cur - start;
3123 GROWING(0, cat, start, cur, len+1);
3124 end = start+SvLEN(cat)-1;
3125 }
3126 *(U8 *) cur++ = auv;
a6ec74c1
JH
3127 }
3128 }
3129 break;
773e5b22 3130 }
3314a176 3131#endif
773e5b22
NC
3132 case 'U': {
3133 char *end;
3314a176 3134
773e5b22 3135 if (len == 0) {
3314a176 3136#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
773e5b22
NC
3137 if (!(symptr->flags & FLAG_DO_UTF8)) {
3138 marked_upgrade(aTHX_ cat, symptr);
3139 lookahead.flags |= FLAG_DO_UTF8;
3140 lookahead.strbeg = symptr->strbeg;
3141 }
3142 utf8 = 0;
3143#endif
3144 goto no_change;
3145 }
3146
3147 end = start+SvLEN(cat);
3148 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3149 while (len-- > 0) {
773e5b22 3150 UV auv;
a6ec74c1 3151 fromstr = NEXTFROM;
773e5b22
NC
3152 auv = SvUV(fromstr);
3153 if (utf8) {
a4de8f71 3154 U8 buffer[UTF8_MAXLEN], *endb;
773e5b22 3155 endb = uvuni_to_utf8_flags(buffer, auv,
bc743096 3156 warn_utf8 ?
773e5b22
NC
3157 0 : UNICODE_ALLOW_ANY);
3158 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3159 *cur = '\0';
3160 SvCUR(cat) = cur - start;
3161 GROWING(0, cat, start, cur,
3162 len+(endb-buffer)*UTF8_EXPAND);
3163 end = start+SvLEN(cat);
3164 }
86d06bda 3165 cur = bytes_to_uni(buffer, endb-buffer, cur);
773e5b22
NC
3166 } else {
3167 if (cur >= end) {
3168 *cur = '\0';
3169 SvCUR(cat) = cur - start;
3170 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3171 end = start+SvLEN(cat)-UTF8_MAXLEN;
3172 }
a4de8f71 3173 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
bc743096 3174 warn_utf8 ?
a4de8f71 3175 0 : UNICODE_ALLOW_ANY);
773e5b22 3176 }
a6ec74c1 3177 }
a6ec74c1 3178 break;
773e5b22 3179 }
a6ec74c1
JH
3180 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3181 case 'f':
a6ec74c1 3182 while (len-- > 0) {
3314a176
TH
3183 float afloat;
3184 NV anv;
a6ec74c1 3185 fromstr = NEXTFROM;
3314a176 3186 anv = SvNV(fromstr);
5cdb9e01 3187#ifdef __VOS__
3314a176 3188 /* VOS does not automatically map a floating-point overflow
773e5b22
NC
3189 during conversion from double to float into infinity, so we
3190 do it by hand. This code should either be generalized for
3191 any OS that needs it, or removed if and when VOS implements
3192 posix-976 (suggestion to support mapping to infinity).
3193 Paul.Green@stratus.com 02-04-02. */
3314a176 3194 if (anv > FLT_MAX)
773e5b22 3195 afloat = _float_constants[0]; /* single prec. inf. */
3314a176 3196 else if (anv < -FLT_MAX)
773e5b22 3197 afloat = _float_constants[0]; /* single prec. inf. */
3314a176
TH
3198 else afloat = (float) anv;
3199#else /* __VOS__ */
baf3cf9c 3200# if defined(VMS) && !defined(__IEEE_FP)
3314a176 3201 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
773e5b22
NC
3202 * on Alpha; fake it if we don't have them.
3203 */
3314a176 3204 if (anv > FLT_MAX)
773e5b22 3205 afloat = FLT_MAX;
3314a176 3206 else if (anv < -FLT_MAX)
773e5b22 3207 afloat = -FLT_MAX;
3314a176 3208 else afloat = (float)anv;
baf3cf9c 3209# else
3314a176 3210 afloat = (float)anv;
baf3cf9c 3211# endif
3314a176 3212#endif /* __VOS__ */
cc3ea0aa 3213 DO_BO_PACK_N(afloat, float);
3314a176 3214 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3215 }
3216 break;
3217 case 'd':
a6ec74c1 3218 while (len-- > 0) {
3314a176
TH
3219 double adouble;
3220 NV anv;
a6ec74c1 3221 fromstr = NEXTFROM;
3314a176 3222 anv = SvNV(fromstr);
5cdb9e01 3223#ifdef __VOS__
3314a176 3224 /* VOS does not automatically map a floating-point overflow
773e5b22
NC
3225 during conversion from long double to double into infinity,
3226 so we do it by hand. This code should either be generalized
3227 for any OS that needs it, or removed if and when VOS
3228 implements posix-976 (suggestion to support mapping to
3229 infinity). Paul.Green@stratus.com 02-04-02. */
3314a176 3230 if (anv > DBL_MAX)
773e5b22 3231 adouble = _double_constants[0]; /* double prec. inf. */
3314a176 3232 else if (anv < -DBL_MAX)
773e5b22 3233 adouble = _double_constants[0]; /* double prec. inf. */
3314a176
TH
3234 else adouble = (double) anv;
3235#else /* __VOS__ */
baf3cf9c 3236# if defined(VMS) && !defined(__IEEE_FP)
3314a176 3237 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
773e5b22
NC
3238 * on Alpha; fake it if we don't have them.
3239 */
3314a176 3240 if (anv > DBL_MAX)
773e5b22 3241 adouble = DBL_MAX;
3314a176 3242 else if (anv < -DBL_MAX)
773e5b22 3243 adouble = -DBL_MAX;
3314a176 3244 else adouble = (double)anv;
baf3cf9c 3245# else
3314a176 3246 adouble = (double)anv;
baf3cf9c 3247# endif
3314a176 3248#endif /* __VOS__ */
cc3ea0aa 3249 DO_BO_PACK_N(adouble, double);
3314a176 3250 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3251 }
3252 break;
773e5b22
NC
3253 case 'F': {
3254 NV anv;
cc3ea0aa 3255 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3256 while (len-- > 0) {
3257 fromstr = NEXTFROM;
3258 anv = SvNV(fromstr);
cc3ea0aa 3259 DO_BO_PACK_N(anv, NV);
773e5b22 3260 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3261 }
3262 break;
773e5b22 3263 }
92d41999 3264#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
773e5b22
NC
3265 case 'D': {
3266 long double aldouble;
cc3ea0aa
NC
3267 /* long doubles can have unused bits, which may be nonzero */
3268 Zero(&aldouble, 1, long double);
92d41999
JH
3269 while (len-- > 0) {
3270 fromstr = NEXTFROM;
3271 aldouble = (long double)SvNV(fromstr);
cc3ea0aa 3272 DO_BO_PACK_N(aldouble, long double);
773e5b22 3273 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3274 }
3275 break;
773e5b22 3276 }
92d41999 3277#endif
cc3ea0aa
NC
3278#ifdef PERL_PACK_CAN_SHRIEKSIGN
3279 case 'n' | TYPE_IS_SHRIEKING:
3280#endif
a6ec74c1
JH
3281 case 'n':
3282 while (len-- > 0) {
3314a176 3283 I16 ai16;
a6ec74c1 3284 fromstr = NEXTFROM;
cc3ea0aa 3285 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3286#ifdef HAS_HTONS
cc3ea0aa 3287 ai16 = PerlSock_htons(ai16);
a6ec74c1 3288#endif
3314a176 3289 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3290 }
3291 break;
cc3ea0aa
NC
3292#ifdef PERL_PACK_CAN_SHRIEKSIGN
3293 case 'v' | TYPE_IS_SHRIEKING:
3294#endif
a6ec74c1
JH
3295 case 'v':
3296 while (len-- > 0) {
3314a176 3297 I16 ai16;
a6ec74c1 3298 fromstr = NEXTFROM;
cc3ea0aa 3299 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3300#ifdef HAS_HTOVS
cc3ea0aa 3301 ai16 = htovs(ai16);
a6ec74c1 3302#endif
3314a176 3303 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3304 }
3305 break;
cf168e39 3306 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3307#if SHORTSIZE != SIZE16
773e5b22 3308 while (len-- > 0) {
3314a176 3309 unsigned short aushort;
773e5b22
NC
3310 fromstr = NEXTFROM;
3311 aushort = SvUV(fromstr);
3312 DO_BO_PACK(aushort, s);
3314a176 3313 PUSH_VAR(utf8, cur, aushort);
773e5b22 3314 }
cf168e39
WL
3315 break;
3316#else
3317 /* Fall through! */
a6ec74c1 3318#endif
cf168e39 3319 case 'S':
773e5b22 3320 while (len-- > 0) {
3314a176 3321 U16 au16;
773e5b22
NC
3322 fromstr = NEXTFROM;
3323 au16 = (U16)SvUV(fromstr);
3324 DO_BO_PACK(au16, 16);
3314a176 3325 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3326 }
3327 break;
cf168e39 3328 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3329#if SHORTSIZE != SIZE16
773e5b22 3330 while (len-- > 0) {
3314a176 3331 short ashort;
773e5b22
NC
3332 fromstr = NEXTFROM;
3333 ashort = SvIV(fromstr);
3334 DO_BO_PACK(ashort, s);
3314a176 3335 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3336 }
cf168e39
WL
3337 break;
3338#else
3339 /* Fall through! */
a6ec74c1 3340#endif
cf168e39
WL
3341 case 's':
3342 while (len-- > 0) {
3314a176 3343 I16 ai16;
cf168e39 3344 fromstr = NEXTFROM;
cc3ea0aa
NC
3345 ai16 = (I16)SvIV(fromstr);
3346 DO_BO_PACK(ai16, 16);
3314a176 3347 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3348 }
3349 break;
3350 case 'I':
cf168e39 3351 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3352 while (len-- > 0) {
3314a176 3353 unsigned int auint;
a6ec74c1
JH
3354 fromstr = NEXTFROM;
3355 auint = SvUV(fromstr);
cc3ea0aa 3356 DO_BO_PACK(auint, i);
3314a176 3357 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3358 }
3359 break;
92d41999
JH
3360 case 'j':
3361 while (len-- > 0) {
3314a176 3362 IV aiv;
92d41999
JH
3363 fromstr = NEXTFROM;
3364 aiv = SvIV(fromstr);
cc3ea0aa
NC
3365#if IVSIZE == INTSIZE
3366 DO_BO_PACK(aiv, i);
3367#elif IVSIZE == LONGSIZE
3368 DO_BO_PACK(aiv, l);
3369#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3370 DO_BO_PACK(aiv, 64);
3314a176
TH
3371#else
3372 Perl_croak(aTHX_ "'j' not supported on this platform");
cc3ea0aa 3373#endif
3314a176 3374 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3375 }
3376 break;
3377 case 'J':
3378 while (len-- > 0) {
3314a176 3379 UV auv;
92d41999
JH
3380 fromstr = NEXTFROM;
3381 auv = SvUV(fromstr);
cc3ea0aa
NC
3382#if UVSIZE == INTSIZE
3383 DO_BO_PACK(auv, i);
3384#elif UVSIZE == LONGSIZE
3385 DO_BO_PACK(auv, l);
3386#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3387 DO_BO_PACK(auv, 64);
3314a176
TH
3388#else
3389 Perl_croak(aTHX_ "'J' not supported on this platform");
cc3ea0aa 3390#endif
3314a176 3391 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3392 }
3393 break;
a6ec74c1
JH
3394 case 'w':
3395 while (len-- > 0) {
3314a176 3396 NV anv;
a6ec74c1 3397 fromstr = NEXTFROM;
15e9f109 3398 anv = SvNV(fromstr);
a6ec74c1 3399
3314a176
TH
3400 if (anv < 0) {
3401 *cur = '\0';
3402 SvCUR(cat) = cur - start;
cf168e39 3403 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3314a176 3404 }
a6ec74c1 3405
196b62db
NC
3406 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3407 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3408 any negative IVs will have already been got by the croak()
3409 above. IOK is untrue for fractions, so we test them
3410 against UV_MAX_P1. */
3314a176
TH
3411 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3412 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3413 char *in = buf + sizeof(buf);
196b62db 3414 UV auv = SvUV(fromstr);
a6ec74c1
JH
3415
3416 do {
eb160463 3417 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3418 auv >>= 7;
3419 } while (auv);
3420 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3314a176
TH
3421 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3422 in, (buf + sizeof(buf)) - in);
3423 } else if (SvPOKp(fromstr))
3424 goto w_string;
a6ec74c1 3425 else if (SvNOKp(fromstr)) {
5b7ea690
JH
3426 /* 10**NV_MAX_10_EXP is the largest power of 10
3427 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3428 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3429 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3430 And with that many bytes only Inf can overflow.
c17baaa1
PG
3431 Some C compilers are strict about integral constant
3432 expressions so we conservatively divide by a slightly
3433 smaller integer instead of multiplying by the exact
3434 floating-point value.
5b7ea690
JH
3435 */
3436#ifdef NV_MAX_10_EXP
3314a176 3437 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
c17baaa1 3438 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
5b7ea690 3439#else
3314a176 3440 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
c17baaa1 3441 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
5b7ea690 3442#endif
a6ec74c1
JH
3443 char *in = buf + sizeof(buf);
3444
e02cb1b1 3445 anv = Perl_floor(anv);
a6ec74c1 3446 do {
e02cb1b1 3447 const NV next = Perl_floor(anv / 128);
a6ec74c1 3448 if (in <= buf) /* this cannot happen ;-) */
cf168e39 3449 Perl_croak(aTHX_ "Cannot compress integer in pack");
5b7ea690 3450 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3451 anv = next;
3452 } while (anv > 0);
a6ec74c1 3453 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3314a176
TH
3454 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3455 in, (buf + sizeof(buf)) - in);
3456 } else {
cacbe70b
NC
3457 const char *from;
3458 char *result, *in;
735b914b
JH
3459 SV *norm;
3460 STRLEN len;
3461 bool done;
3462
3314a176 3463 w_string:
735b914b 3464 /* Copy string and check for compliance */
23c2bb70 3465 from = SvPV_const(fromstr, len);
735b914b 3466 if ((norm = is_an_int(from, len)) == NULL)
cf168e39 3467 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3468
cd7a8267 3469 Newx(result, len, char);
735b914b
JH
3470 in = result + len;
3471 done = FALSE;
3314a176 3472 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3473 result[len - 1] &= 0x7F; /* clear continue bit */
3314a176
TH
3474 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3475 in, (result + len) - in);
735b914b
JH
3476 Safefree(result);
3477 SvREFCNT_dec(norm); /* free norm */
773e5b22 3478 }
a6ec74c1
JH
3479 }
3480 break;
3481 case 'i':
cf168e39 3482 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3483 while (len-- > 0) {
3314a176 3484 int aint;
a6ec74c1
JH
3485 fromstr = NEXTFROM;
3486 aint = SvIV(fromstr);
cc3ea0aa 3487 DO_BO_PACK(aint, i);
3314a176 3488 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3489 }
3490 break;
cc3ea0aa
NC
3491#ifdef PERL_PACK_CAN_SHRIEKSIGN
3492 case 'N' | TYPE_IS_SHRIEKING:
3493#endif
a6ec74c1
JH
3494 case 'N':
3495 while (len-- > 0) {
3314a176 3496 U32 au32;
a6ec74c1 3497 fromstr = NEXTFROM;
cc3ea0aa 3498 au32 = SvUV(fromstr);
a6ec74c1 3499#ifdef HAS_HTONL
cc3ea0aa 3500 au32 = PerlSock_htonl(au32);
a6ec74c1 3501#endif
3314a176 3502 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3503 }
3504 break;
cc3ea0aa
NC
3505#ifdef PERL_PACK_CAN_SHRIEKSIGN
3506 case 'V' | TYPE_IS_SHRIEKING:
3507#endif
a6ec74c1
JH
3508 case 'V':
3509 while (len-- > 0) {
3314a176 3510 U32 au32;
a6ec74c1 3511 fromstr = NEXTFROM;
cc3ea0aa 3512 au32 = SvUV(fromstr);
a6ec74c1 3513#ifdef HAS_HTOVL
cc3ea0aa 3514 au32 = htovl(au32);
a6ec74c1 3515#endif
3314a176 3516 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3517 }
3518 break;
cf168e39 3519 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3520#if LONGSIZE != SIZE32
773e5b22 3521 while (len-- > 0) {
3314a176 3522 unsigned long aulong;
773e5b22
NC
3523 fromstr = NEXTFROM;
3524 aulong = SvUV(fromstr);
3525 DO_BO_PACK(aulong, l);
3314a176 3526 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3527 }
cf168e39
WL
3528 break;
3529#else
3530 /* Fall though! */
a6ec74c1 3531#endif
cf168e39 3532 case 'L':
773e5b22 3533 while (len-- > 0) {
3314a176 3534 U32 au32;
773e5b22
NC
3535 fromstr = NEXTFROM;
3536 au32 = SvUV(fromstr);
3537 DO_BO_PACK(au32, 32);
3314a176 3538 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3539 }
3540 break;
cf168e39 3541 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3542#if LONGSIZE != SIZE32
773e5b22 3543 while (len-- > 0) {
3314a176 3544 long along;
773e5b22
NC
3545 fromstr = NEXTFROM;
3546 along = SvIV(fromstr);
3547 DO_BO_PACK(along, l);
3314a176 3548 PUSH_VAR(utf8, cur, along);
a6ec74c1 3549 }
cf168e39
WL
3550 break;
3551#else
3552 /* Fall though! */
a6ec74c1 3553#endif
cf168e39
WL
3554 case 'l':
3555 while (len-- > 0) {
3314a176 3556 I32 ai32;
cf168e39 3557 fromstr = NEXTFROM;
cc3ea0aa
NC
3558 ai32 = SvIV(fromstr);
3559 DO_BO_PACK(ai32, 32);
3314a176 3560 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3561 }
3562 break;
3563#ifdef HAS_QUAD
3564 case 'Q':
3565 while (len-- > 0) {
3314a176 3566 Uquad_t auquad;
a6ec74c1 3567 fromstr = NEXTFROM;
3314a176 3568 auquad = (Uquad_t) SvUV(fromstr);
cc3ea0aa 3569 DO_BO_PACK(auquad, 64);
3314a176 3570 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3571 }
3572 break;
3573 case 'q':
3574 while (len-- > 0) {
3314a176 3575 Quad_t aquad;
a6ec74c1
JH
3576 fromstr = NEXTFROM;
3577 aquad = (Quad_t)SvIV(fromstr);
cc3ea0aa 3578 DO_BO_PACK(aquad, 64);
3314a176 3579 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3580 }
3581 break;
3314a176 3582#endif /* HAS_QUAD */
a6ec74c1
JH
3583 case 'P':
3584 len = 1; /* assume SV is correct length */
3314a176 3585 GROWING(utf8, cat, start, cur, sizeof(char *));
cf168e39 3586 /* Fall through! */
a6ec74c1
JH
3587 case 'p':
3588 while (len-- > 0) {
3314a176
TH
3589 char *aptr;
3590
a6ec74c1 3591 fromstr = NEXTFROM;
0bc88fab
NC
3592 SvGETMAGIC(fromstr);
3593 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3594 else {
3595 STRLEN n_a;
3596 /* XXX better yet, could spirit away the string to
3597 * a safe spot and hang on to it until the result
3598 * of pack() (and all copies of the result) are
3599 * gone.
3600 */
bc743096
AL
3601 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3602 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3603 Perl_warner(aTHX_ packWARN(WARN_PACK),
773e5b22 3604 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3605 }
3606 if (SvPOK(fromstr) || SvNIOK(fromstr))
cacbe70b 3607 aptr = (char *) SvPV_nomg_const(fromstr, n_a);
a6ec74c1 3608 else
0bc88fab 3609 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3610 }
206b424e 3611 DO_BO_PACK_PC(aptr);
3314a176 3612 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3613 }
3614 break;
3314a176 3615 case 'u': {
e02cb1b1 3616 const char *aptr, *aend;
3314a176 3617#ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
773e5b22 3618 const bool from_utf8 = FALSE;
3314a176 3619#else
773e5b22 3620 bool from_utf8;
3314a176
TH
3621#endif
3622
a6ec74c1 3623 fromstr = NEXTFROM;
773e5b22
NC
3624 if (len <= 2) len = 45;
3625 else len = len / 3 * 3;
3626 if (len >= 64) {
820b8518
NC
3627 if (ckWARN(WARN_PACK))
3628 Perl_warner(aTHX_ packWARN(WARN_PACK),
773e5b22
NC
3629 "Field too wide in 'u' format in pack");
3630 len = 63;
3631 }
cacbe70b 3632 aptr = (char *) SvPV_const(fromstr, fromlen);
3314a176 3633#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
773e5b22
NC
3634 from_utf8 = DO_UTF8(fromstr);
3635 if (from_utf8) {
3636 aend = aptr + fromlen;
3637 fromlen = sv_len_utf8(fromstr);
3638 } else
3639#endif
3640 {
3641 aend = NULL; /* Unused, but keep compilers happy */
3642 }
3643 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3644
a6ec74c1 3645 while (fromlen > 0) {
773e5b22 3646 U8 *end;
a6ec74c1 3647 I32 todo;
773e5b22 3648 U8 hunk[1+63/3*4+1];
a6ec74c1 3649
eb160463 3650 if ((I32)fromlen > len)
a6ec74c1
JH
3651 todo = len;
3652 else
3653 todo = fromlen;
773e5b22
NC
3654 if (from_utf8) {
3655 char buffer[64];
3656 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3657 'u' | TYPE_IS_PACK)) {
3658 *cur = '\0';
3659 SvCUR(cat) = cur - start;
3660 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3661 }
3662 end = doencodes(hunk, buffer, todo);
3663 } else {
3664 end = doencodes(hunk, aptr, todo);
3665 aptr += todo;
3666 }
3667 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3668 fromlen -= todo;
3669 }
a6ec74c1
JH
3670 break;
3671 }
3314a176
TH
3672 }
3673 *cur = '\0';
3674 SvCUR(cat) = cur - start;
3675 no_change:
cf168e39 3676 *symptr = lookahead;
a6ec74c1 3677 }
cf168e39 3678 return beglist;
18529408
IZ
3679}
3680#undef NEXTFROM
3681
3682
3683PP(pp_pack)
3684{
3685 dSP; dMARK; dORIGMARK; dTARGET;
3686 register SV *cat = TARG;
3687 STRLEN fromlen;
23c2bb70
NC
3688 SV *pat_sv = *++MARK;
3689 register const char *pat = SvPV_const(pat_sv, fromlen);
8ec8dcb0 3690 register const char *patend = pat + fromlen;
18529408
IZ
3691
3692 MARK++;
3693 sv_setpvn(cat, "", 0);
3314a176 3694 SvUTF8_off(cat);
18529408 3695
8ec8dcb0 3696 packlist(cat, (char *) pat, (char *) patend, MARK, SP + 1);
18529408 3697
a6ec74c1
JH
3698 SvSETMAGIC(cat);
3699 SP = ORIGMARK;
3700 PUSHs(cat);
3701 RETURN;
3702}
a6ec74c1 3703
583439ab
NC
3704/*
3705 * Local variables:
3706 * c-indentation-style: bsd
3707 * c-basic-offset: 4
3708 * indent-tabs-mode: t
3709 * End:
3710 *
d8294a4d
NC
3711 * ex: set ts=8 sts=4 sw=4 noet:
3712 */