This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
e6906430 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
cc3ea0aa 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
40d34c0d
SB
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
e02cb1b1
NC
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
cc3ea0aa 72#if PERL_VERSION >= 9
3314a176
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
500d42d2 79#define PERL_PACK_CAN_W
3040befa 80#define PERL_PACK_CAN_DOT
500d42d2
TH
81#else
82#define PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
83#define PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
84#define PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
85#define PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
cc3ea0aa 86#endif
773e5b22
NC
87/* Maximum number of bytes to which a byte can grow due to upgrade */
88#define UTF8_EXPAND 2
cc3ea0aa 89
a6ec74c1 90/*
a6ec74c1
JH
91 * Offset for integer pack/unpack.
92 *
93 * On architectures where I16 and I32 aren't really 16 and 32 bits,
94 * which for now are all Crays, pack and unpack have to play games.
95 */
96
97/*
98 * These values are required for portability of pack() output.
99 * If they're not right on your machine, then pack() and unpack()
100 * wouldn't work right anyway; you'll need to apply the Cray hack.
101 * (I'd like to check them with #if, but you can't use sizeof() in
102 * the preprocessor.) --???
103 */
104/*
105 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
106 defines are now in config.h. --Andy Dougherty April 1998
107 */
108#define SIZE16 2
109#define SIZE32 4
110
111/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
112 --jhi Feb 1999 */
113
cc3ea0aa
NC
114#if U16SIZE > SIZE16 || U32SIZE > SIZE32
115# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
500d42d2
TH
116# define OFF16(p) ((char*)(p))
117# define OFF32(p) ((char*)(p))
a6ec74c1 118# else
cc3ea0aa 119# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
120# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
121# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
122# else
500d42d2 123 ++++ bad cray byte order
a6ec74c1
JH
124# endif
125# endif
a6ec74c1 126#else
500d42d2
TH
127# define OFF16(p) ((char *) (p))
128# define OFF32(p) ((char *) (p))
a6ec74c1
JH
129#endif
130
3314a176
TH
131/* Only to be used inside a loop (see the break) */
132#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
133 if (utf8) { \
134 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
135 } else { \
136 Copy(s, OFF16(p), SIZE16, char); \
137 (s) += SIZE16; \
138 } \
139} STMT_END
500d42d2
TH
140
141/* Only to be used inside a loop (see the break) */
3314a176
TH
142#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
143 if (utf8) { \
144 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
145 } else { \
146 Copy(s, OFF32(p), SIZE32, char); \
147 (s) += SIZE32; \
148 } \
149} STMT_END
150
151#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
152#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
153
154/* Only to be used inside a loop (see the break) */
155#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
500d42d2
TH
156STMT_START { \
157 if (utf8) { \
3314a176
TH
158 if (!uni_to_bytes(aTHX_ &s, strend, \
159 (char *) &var, sizeof(var), datumtype)) break;\
500d42d2
TH
160 } else { \
161 Copy(s, (char *) &var, sizeof(var), char); \
162 s += sizeof(var); \
163 } \
500d42d2
TH
164} STMT_END
165
3314a176 166#define PUSH_VAR(utf8, aptr, var) \
a4de8f71 167 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
3314a176 168
cf168e39
WL
169/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
170#define MAX_SUB_TEMPLATE_LEVEL 100
171
cc3ea0aa 172/* flags (note that type modifiers can also be used as flags!) */
3314a176
TH
173#define FLAG_WAS_UTF8 0x40
174#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
cf168e39 175#define FLAG_UNPACK_ONLY_ONE 0x10
3314a176 176#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
cf168e39
WL
177#define FLAG_SLASH 0x04
178#define FLAG_COMMA 0x02
179#define FLAG_PACK 0x01
180
a6ec74c1
JH
181STATIC SV *
182S_mul128(pTHX_ SV *sv, U8 m)
183{
184 STRLEN len;
185 char *s = SvPV(sv, len);
186 char *t;
a6ec74c1
JH
187
188 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
7508116b 189 SV * const tmpNew = newSVpvn("0000000000", 10);
a6ec74c1
JH
190
191 sv_catsv(tmpNew, sv);
192 SvREFCNT_dec(sv); /* free old sv */
193 sv = tmpNew;
194 s = SvPV(sv, len);
195 }
196 t = s + len - 1;
197 while (!*t) /* trailing '\0'? */
198 t--;
199 while (t > s) {
e02cb1b1 200 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
201 *(t--) = '0' + (char)(i % 10);
202 m = (char)(i / 10);
a6ec74c1
JH
203 }
204 return (sv);
205}
206
207/* Explosives and implosives. */
208
209#if 'I' == 73 && 'J' == 74
210/* On an ASCII/ISO kind of system */
211#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
212#else
213/*
214 Some other sort of character set - use memchr() so we don't match
215 the null byte.
216 */
217#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
218#endif
219
cc3ea0aa 220/* type modifiers */
62f95557 221#define TYPE_IS_SHRIEKING 0x100
cc3ea0aa
NC
222#define TYPE_IS_BIG_ENDIAN 0x200
223#define TYPE_IS_LITTLE_ENDIAN 0x400
3314a176 224#define TYPE_IS_PACK 0x800
cc3ea0aa
NC
225#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
226#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
227#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
228
229#ifdef PERL_PACK_CAN_SHRIEKSIGN
3040befa 230# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
cc3ea0aa 231#else
3040befa 232# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
cc3ea0aa
NC
233#endif
234
235#ifndef PERL_PACK_CAN_BYTEORDER
236/* Put "can't" first because it is shorter */
237# define TYPE_ENDIANNESS(t) 0
238# define TYPE_NO_ENDIANNESS(t) (t)
239
240# define ENDIANNESS_ALLOWED_TYPES ""
241
242# define DO_BO_UNPACK(var, type)
243# define DO_BO_PACK(var, type)
206b424e
NC
244# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
245# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
cc3ea0aa
NC
246# define DO_BO_UNPACK_N(var, type)
247# define DO_BO_PACK_N(var, type)
248# define DO_BO_UNPACK_P(var)
249# define DO_BO_PACK_P(var)
206b424e
NC
250# define DO_BO_UNPACK_PC(var)
251# define DO_BO_PACK_PC(var)
cc3ea0aa 252
3314a176 253#else /* PERL_PACK_CAN_BYTEORDER */
cc3ea0aa
NC
254
255# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
256# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
257
258# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
259
260# define DO_BO_UNPACK(var, type) \
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 default: break; \
266 } \
267 } STMT_END
268
269# define DO_BO_PACK(var, type) \
270 STMT_START { \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
273 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 default: break; \
275 } \
276 } STMT_END
277
206b424e 278# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
cc3ea0aa
NC
279 STMT_START { \
280 switch (TYPE_ENDIANNESS(datumtype)) { \
281 case TYPE_IS_BIG_ENDIAN: \
206b424e 282 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
cc3ea0aa
NC
283 break; \
284 case TYPE_IS_LITTLE_ENDIAN: \
206b424e 285 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
cc3ea0aa
NC
286 break; \
287 default: \
288 break; \
289 } \
290 } STMT_END
291
206b424e 292# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
cc3ea0aa
NC
293 STMT_START { \
294 switch (TYPE_ENDIANNESS(datumtype)) { \
295 case TYPE_IS_BIG_ENDIAN: \
206b424e 296 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
cc3ea0aa
NC
297 break; \
298 case TYPE_IS_LITTLE_ENDIAN: \
206b424e 299 var = (post_cast *) my_htole ## type ((pre_cast) var); \
cc3ea0aa
NC
300 break; \
301 default: \
302 break; \
303 } \
304 } STMT_END
305
306# define BO_CANT_DOIT(action, type) \
307 STMT_START { \
308 switch (TYPE_ENDIANNESS(datumtype)) { \
309 case TYPE_IS_BIG_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 case TYPE_IS_LITTLE_ENDIAN: \
314 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
315 "platform", #action, #type); \
316 break; \
317 default: \
318 break; \
319 } \
320 } STMT_END
321
322# if PTRSIZE == INTSIZE
206b424e
NC
323# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
324# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
325# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
326# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
cc3ea0aa 327# elif PTRSIZE == LONGSIZE
206b424e
NC
328# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
329# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
330# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
331# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
820b8518
NC
332# elif PTRSIZE == IVSIZE
333# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
334# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
335# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
336# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
cc3ea0aa
NC
337# else
338# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
339# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
206b424e
NC
340# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
341# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
cc3ea0aa
NC
342# endif
343
344# if defined(my_htolen) && defined(my_letohn) && \
345 defined(my_htoben) && defined(my_betohn)
346# define DO_BO_UNPACK_N(var, type) \
347 STMT_START { \
348 switch (TYPE_ENDIANNESS(datumtype)) { \
349 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
350 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
351 default: break; \
352 } \
353 } STMT_END
354
355# define DO_BO_PACK_N(var, type) \
356 STMT_START { \
357 switch (TYPE_ENDIANNESS(datumtype)) { \
358 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
359 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
360 default: break; \
361 } \
362 } STMT_END
363# else
364# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
365# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
366# endif
367
3314a176 368#endif /* PERL_PACK_CAN_BYTEORDER */
cc3ea0aa
NC
369
370#define PACK_SIZE_CANNOT_CSUM 0x80
3314a176 371#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
cc3ea0aa
NC
372#define PACK_SIZE_MASK 0x3F
373
cc3ea0aa
NC
374/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
375 in). You're unlikely ever to need to regenerate them. */
e5464422
TH
376
377#if TYPE_IS_SHRIEKING != 0x100
378 ++++shriek offset should be 256
379#endif
380
381typedef U8 packprops_t;
cc3ea0aa
NC
382#if 'J'-'I' == 1
383/* ASCII */
820b8518 384STATIC const packprops_t packprops[512] = {
e5464422
TH
385 /* normal */
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0,
391 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
cc3ea0aa 392#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
e5464422 393 /* D */ LONG_DOUBLESIZE,
cc3ea0aa 394#else
e5464422 395 0,
cc3ea0aa 396#endif
e5464422
TH
397 0,
398 /* F */ NVSIZE,
399 0, 0,
400 /* I */ sizeof(unsigned int),
401 /* J */ UVSIZE,
402 0,
403 /* L */ SIZE32,
404 0,
405 /* N */ SIZE32,
406 0, 0,
cc3ea0aa 407#if defined(HAS_QUAD)
e5464422 408 /* Q */ sizeof(Uquad_t),
cc3ea0aa 409#else
e5464422 410 0,
cc3ea0aa 411#endif
e5464422
TH
412 0,
413 /* S */ SIZE16,
414 0,
415 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
416 /* V */ SIZE32,
500d42d2 417#if defined(PERL_PACK_CAN_W)
e5464422 418 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
500d42d2 419#else
e5464422 420 0,
500d42d2 421#endif
e5464422
TH
422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
423 /* c */ sizeof(char),
424 /* d */ sizeof(double),
425 0,
426 /* f */ sizeof(float),
427 0, 0,
428 /* i */ sizeof(int),
429 /* j */ IVSIZE,
430 0,
431 /* l */ SIZE32,
432 0,
433 /* n */ SIZE16,
434 0,
435 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
cc3ea0aa 436#if defined(HAS_QUAD)
e5464422 437 /* q */ sizeof(Quad_t),
cc3ea0aa 438#else
e5464422 439 0,
cc3ea0aa 440#endif
e5464422
TH
441 0,
442 /* s */ SIZE16,
443 0, 0,
444 /* v */ SIZE16,
445 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0,
455 /* shrieking */
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 /* I */ sizeof(unsigned int),
462 0, 0,
463 /* L */ sizeof(unsigned long),
464 0,
cc3ea0aa 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 466 /* N */ SIZE32,
cc3ea0aa 467#else
e5464422 468 0,
cc3ea0aa 469#endif
e5464422
TH
470 0, 0, 0, 0,
471 /* S */ sizeof(unsigned short),
472 0, 0,
cc3ea0aa 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 474 /* V */ SIZE32,
cc3ea0aa 475#else
e5464422 476 0,
cc3ea0aa 477#endif
e5464422
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0,
480 /* i */ sizeof(int),
481 0, 0,
482 /* l */ sizeof(long),
483 0,
cc3ea0aa 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 485 /* n */ SIZE16,
cc3ea0aa 486#else
e5464422 487 0,
cc3ea0aa 488#endif
e5464422
TH
489 0, 0, 0, 0,
490 /* s */ sizeof(short),
491 0, 0,
cc3ea0aa 492#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 493 /* v */ SIZE16,
cc3ea0aa 494#else
e5464422 495 0,
cc3ea0aa 496#endif
e5464422
TH
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0
cc3ea0aa
NC
506};
507#else
508/* EBCDIC (or bust) */
820b8518 509STATIC const packprops_t packprops[512] = {
e5464422
TH
510 /* normal */
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0,
520 /* c */ sizeof(char),
521 /* d */ sizeof(double),
522 0,
523 /* f */ sizeof(float),
524 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0,
527 /* j */ IVSIZE,
528 0,
529 /* l */ SIZE32,
530 0,
531 /* n */ SIZE16,
532 0,
533 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
cc3ea0aa 534#if defined(HAS_QUAD)
e5464422 535 /* q */ sizeof(Quad_t),
cc3ea0aa 536#else
e5464422 537 0,
cc3ea0aa 538#endif
e5464422
TH
539 0, 0, 0, 0, 0, 0, 0, 0, 0,
540 /* s */ SIZE16,
541 0, 0,
542 /* v */ SIZE16,
543 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
cc3ea0aa 547#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
e5464422 548 /* D */ LONG_DOUBLESIZE,
cc3ea0aa 549#else
e5464422 550 0,
cc3ea0aa 551#endif
e5464422
TH
552 0,
553 /* F */ NVSIZE,
554 0, 0,
555 /* I */ sizeof(unsigned int),
556 0, 0, 0, 0, 0, 0, 0,
557 /* J */ UVSIZE,
558 0,
559 /* L */ SIZE32,
560 0,
561 /* N */ SIZE32,
562 0, 0,
cc3ea0aa 563#if defined(HAS_QUAD)
e5464422 564 /* Q */ sizeof(Uquad_t),
cc3ea0aa 565#else
e5464422 566 0,
cc3ea0aa 567#endif
e5464422
TH
568 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 /* S */ SIZE16,
570 0,
571 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
572 /* V */ SIZE32,
500d42d2 573#if defined(PERL_PACK_CAN_W)
e5464422 574 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
500d42d2 575#else
e5464422 576 0,
500d42d2 577#endif
e5464422
TH
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* shrieking */
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* i */ sizeof(int),
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* l */ sizeof(long),
593 0,
cc3ea0aa 594#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 595 /* n */ SIZE16,
cc3ea0aa 596#else
e5464422 597 0,
cc3ea0aa 598#endif
e5464422
TH
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* s */ sizeof(short),
601 0, 0,
cc3ea0aa 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 603 /* v */ SIZE16,
cc3ea0aa 604#else
e5464422 605 0,
cc3ea0aa 606#endif
e5464422
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
609 0, 0, 0,
610 /* I */ sizeof(unsigned int),
611 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 /* L */ sizeof(unsigned long),
613 0,
cc3ea0aa 614#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 615 /* N */ SIZE32,
cc3ea0aa 616#else
e5464422 617 0,
cc3ea0aa 618#endif
e5464422
TH
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* S */ sizeof(unsigned short),
621 0, 0,
cc3ea0aa 622#if defined(PERL_PACK_CAN_SHRIEKSIGN)
e5464422 623 /* V */ SIZE32,
cc3ea0aa 624#else
e5464422 625 0,
cc3ea0aa 626#endif
e5464422
TH
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
cc3ea0aa
NC
629};
630#endif
631
500d42d2 632STATIC U8
e02cb1b1 633uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
500d42d2 634{
500d42d2 635 STRLEN retlen;
7508116b 636 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
3314a176 637 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
500d42d2
TH
638 /* We try to process malformed UTF-8 as much as possible (preferrably with
639 warnings), but these two mean we make no progress in the string and
640 might enter an infinite loop */
641 if (retlen == (STRLEN) -1 || retlen == 0)
3314a176
TH
642 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
643 (int) TYPE_NO_MODIFIERS(datumtype));
500d42d2 644 if (val >= 0x100) {
3314a176 645 if (ckWARN(WARN_UNPACK))
500d42d2
TH
646 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
647 "Character in '%c' format wrapped in unpack",
3314a176 648 (int) TYPE_NO_MODIFIERS(datumtype));
500d42d2
TH
649 val &= 0xff;
650 }
651 *s += retlen;
652 return val;
653}
654
3314a176
TH
655#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
656 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
500d42d2
TH
657 *(U8 *)(s)++)
658
659STATIC bool
e02cb1b1 660uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
500d42d2
TH
661{
662 UV val;
663 STRLEN retlen;
e02cb1b1 664 const char *from = *s;
500d42d2 665 int bad = 0;
e02cb1b1 666 const U32 flags = ckWARN(WARN_UTF8) ?
500d42d2
TH
667 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
668 for (;buf_len > 0; buf_len--) {
669 if (from >= end) return FALSE;
3314a176 670 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
500d42d2
TH
671 if (retlen == (STRLEN) -1 || retlen == 0) {
672 from += UTF8SKIP(from);
673 bad |= 1;
674 } else from += retlen;
675 if (val >= 0x100) {
676 bad |= 2;
677 val &= 0xff;
678 }
679 *(U8 *)buf++ = val;
680 }
681 /* We have enough characters for the buffer. Did we have problems ? */
682 if (bad) {
683 if (bad & 1) {
684 /* Rewalk the string fragment while warning */
e02cb1b1 685 const char *ptr;
a4de8f71 686 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
500d42d2
TH
687 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
688 if (ptr >= end) break;
3314a176 689 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
500d42d2
TH
690 }
691 if (from > end) from = end;
692 }
693 if ((bad & 2) && ckWARN(WARN_UNPACK))
773e5b22 694 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
3314a176
TH
695 WARN_PACK : WARN_UNPACK),
696 "Character(s) in '%c' format wrapped in %s",
773e5b22 697 (int) TYPE_NO_MODIFIERS(datumtype),
3314a176 698 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
500d42d2
TH
699 }
700 *s = from;
701 return TRUE;
702}
703
704STATIC bool
e02cb1b1 705next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
500d42d2 706{
500d42d2 707 STRLEN retlen;
7508116b 708 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
500d42d2
TH
709 if (val >= 0x100 || !ISUUCHAR(val) ||
710 retlen == (STRLEN) -1 || retlen == 0) {
711 *out = 0;
712 return FALSE;
713 }
714 *out = PL_uudmap[val] & 077;
3314a176 715 *s += retlen;
500d42d2
TH
716 return TRUE;
717}
62f95557 718
3314a176 719STATIC void
e02cb1b1 720bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
3314a176 721 U8 buffer[UTF8_MAXLEN];
7508116b 722 const U8 * const end = start + len;
3314a176
TH
723 char *d = *dest;
724 while (start < end) {
e02cb1b1 725 const int length =
3314a176
TH
726 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
727 switch(length) {
728 case 1:
729 *d++ = buffer[0];
730 break;
731 case 2:
732 *d++ = buffer[0];
733 *d++ = buffer[1];
734 break;
735 default:
736 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
737 *start, length);
738 }
739 start++;
740 }
741 *dest = d;
742}
743
a4de8f71
NC
744#define PUSH_BYTES(utf8, cur, buf, len) \
745STMT_START { \
746 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
747 else { \
748 Copy(buf, cur, len, char); \
749 (cur) += (len); \
750 } \
3314a176
TH
751} STMT_END
752
753#define GROWING(utf8, cat, start, cur, in_len) \
754STMT_START { \
755 STRLEN glen = (in_len); \
773e5b22 756 if (utf8) glen *= UTF8_EXPAND; \
3314a176 757 if ((cur) + glen >= (start) + SvLEN(cat)) { \
8c89da26 758 (start) = sv_exp_grow(cat, glen); \
3314a176
TH
759 (cur) = (start) + SvCUR(cat); \
760 } \
761} STMT_END
762
763#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
764STMT_START { \
e02cb1b1 765 const STRLEN glen = (in_len); \
3314a176 766 STRLEN gl = glen; \
773e5b22 767 if (utf8) gl *= UTF8_EXPAND; \
3314a176
TH
768 if ((cur) + gl >= (start) + SvLEN(cat)) { \
769 *cur = '\0'; \
770 SvCUR(cat) = (cur) - (start); \
8c89da26 771 (start) = sv_exp_grow(cat, gl); \
3314a176
TH
772 (cur) = (start) + SvCUR(cat); \
773 } \
774 PUSH_BYTES(utf8, cur, buf, glen); \
775} STMT_END
776
777#define PUSH_BYTE(utf8, s, byte) \
778STMT_START { \
779 if (utf8) { \
e02cb1b1 780 const U8 au8 = (byte); \
3314a176
TH
781 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
782 } else *(U8 *)(s)++ = (byte); \
783} STMT_END
784
785/* Only to be used inside a loop (see the break) */
786#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
787STMT_START { \
788 STRLEN retlen; \
789 if (str >= end) break; \
790 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
791 if (retlen == (STRLEN) -1 || retlen == 0) { \
792 *cur = '\0'; \
793 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
794 } \
795 str += retlen; \
796} STMT_END
797
e02cb1b1
NC
798static const char *_action( const tempsym_t* symptr )
799{
800 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
801}
802
206947d2 803/* Returns the sizeof() struct described by pat */
028d1f6d 804STATIC I32
3314a176 805S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 806{
3314a176 807 I32 total = 0;
206947d2 808
cf168e39 809 while (next_symbol(symptr)) {
3314a176 810 I32 len;
e02cb1b1 811 int size;
3314a176
TH
812
813 switch (symptr->howlen) {
773e5b22 814 case e_star:
cf168e39 815 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
e02cb1b1 816 _action( symptr ) );
cf168e39 817 break;
3314a176
TH
818 default:
819 /* e_no_len and e_number */
820 len = symptr->length;
821 break;
cf168e39
WL
822 }
823
e5464422 824 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
cc3ea0aa 825 if (!size) {
e02cb1b1 826 int star;
cc3ea0aa
NC
827 /* endianness doesn't influence the size of a type */
828 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
829 default:
830 Perl_croak(aTHX_ "Invalid type '%c' in %s",
831 (int)TYPE_NO_MODIFIERS(symptr->code),
e02cb1b1 832 _action( symptr ) );
3040befa
TH
833#ifdef PERL_PACK_CAN_SHRIEKSIGN
834# ifdef PERL_PACK_CAN_DOT
835 case '.' | TYPE_IS_SHRIEKING:
836# endif
837 case '@' | TYPE_IS_SHRIEKING:
838#endif
cc3ea0aa 839 case '@':
3040befa
TH
840#ifdef PERL_PACK_CAN_DOT
841 case '.':
842#endif
cc3ea0aa
NC
843 case '/':
844 case 'U': /* XXXX Is it correct? */
845 case 'w':
846 case 'u':
847 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
3040befa 848 (int) TYPE_NO_MODIFIERS(symptr->code),
e02cb1b1 849 _action( symptr ) );
cc3ea0aa
NC
850 case '%':
851 size = 0;
852 break;
853 case '(':
773e5b22
NC
854 {
855 tempsym_t savsym = *symptr;
856 symptr->patptr = savsym.grpbeg;
857 symptr->patend = savsym.grpend;
858 /* XXXX Theoretically, we need to measure many times at
859 different positions, since the subexpression may contain
860 alignment commands, but be not of aligned length.
861 Need to detect this and croak(). */
862 size = measure_struct(symptr);
863 *symptr = savsym;
864 break;
865 }
cc3ea0aa
NC
866 case 'X' | TYPE_IS_SHRIEKING:
867 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
868 */
869 if (!len) /* Avoid division by 0 */
870 len = 1;
871 len = total % len; /* Assumed: the start is aligned. */
872 /* FALL THROUGH */
873 case 'X':
874 size = -1;
875 if (total < len)
e02cb1b1 876 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
cc3ea0aa
NC
877 break;
878 case 'x' | TYPE_IS_SHRIEKING:
879 if (!len) /* Avoid division by 0 */
880 len = 1;
881 star = total % len; /* Assumed: the start is aligned. */
882 if (star) /* Other portable ways? */
883 len = len - star;
884 else
885 len = 0;
886 /* FALL THROUGH */
887 case 'x':
888 case 'A':
889 case 'Z':
890 case 'a':
cc3ea0aa
NC
891 size = 1;
892 break;
893 case 'B':
894 case 'b':
895 len = (len + 7)/8;
896 size = 1;
897 break;
898 case 'H':
899 case 'h':
900 len = (len + 1)/2;
901 size = 1;
902 break;
903
904 case 'P':
905 len = 1;
906 size = sizeof(char*);
907 break;
908 }
206947d2
IZ
909 }
910 total += len * size;
911 }
912 return total;
913}
914
cf168e39
WL
915
916/* locate matching closing parenthesis or bracket
917 * returns char pointer to char after match, or NULL
918 */
cacbe70b
NC
919STATIC const char *
920S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 921{
cf168e39 922 while (patptr < patend) {
e02cb1b1 923 const char c = *patptr++;
cf168e39
WL
924
925 if (isSPACE(c))
926 continue;
927 else if (c == ender)
928 return patptr-1;
929 else if (c == '#') {
930 while (patptr < patend && *patptr != '\n')
931 patptr++;
932 continue;
933 } else if (c == '(')
934 patptr = group_end(patptr, patend, ')') + 1;
935 else if (c == '[')
936 patptr = group_end(patptr, patend, ']') + 1;
18529408 937 }
cf168e39
WL
938 Perl_croak(aTHX_ "No group ending character '%c' found in template",
939 ender);
940 return 0;
18529408
IZ
941}
942
cf168e39
WL
943
944/* Convert unsigned decimal number to binary.
945 * Expects a pointer to the first digit and address of length variable
946 * Advances char pointer to 1st non-digit char and returns number
773e5b22 947 */
cacbe70b
NC
948STATIC const char *
949S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
cf168e39
WL
950{
951 I32 len = *patptr++ - '0';
952 while (isDIGIT(*patptr)) {
953 if (len >= 0x7FFFFFFF/10)
954 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
955 len = (len * 10) + (*patptr++ - '0');
956 }
957 *lenptr = len;
958 return patptr;
959}
960
961/* The marvellous template parsing routine: Using state stored in *symptr,
962 * locates next template code and count
963 */
964STATIC bool
3314a176 965S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 966{
773e5b22 967 const char* patptr = symptr->patptr;
7508116b 968 const char* const patend = symptr->patend;
3314a176 969 const char *allowed = "";
cf168e39
WL
970
971 symptr->flags &= ~FLAG_SLASH;
972
973 while (patptr < patend) {
974 if (isSPACE(*patptr))
975 patptr++;
976 else if (*patptr == '#') {
977 patptr++;
978 while (patptr < patend && *patptr != '\n')
979 patptr++;
980 if (patptr < patend)
981 patptr++;
982 } else {
773e5b22 983 /* We should have found a template code */
cf168e39 984 I32 code = *patptr++ & 0xFF;
cc3ea0aa 985 U32 inherited_modifiers = 0;
cf168e39
WL
986
987 if (code == ','){ /* grandfather in commas but with a warning */
988 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
989 symptr->flags |= FLAG_COMMA;
990 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
e02cb1b1 991 "Invalid type ',' in %s", _action( symptr ) );
cf168e39
WL
992 }
993 continue;
994 }
773e5b22 995
cf168e39 996 /* for '(', skip to ')' */
773e5b22 997 if (code == '(') {
cf168e39
WL
998 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
999 Perl_croak(aTHX_ "()-group starts with a count in %s",
e02cb1b1 1000 _action( symptr ) );
cacbe70b
NC
1001 symptr->grpbeg = (char *) patptr;
1002 patptr
1003 = 1 + ( symptr->grpend = (char *)group_end(patptr, patend, ')') );
cf168e39
WL
1004 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1005 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
e02cb1b1 1006 _action( symptr ) );
cf168e39
WL
1007 }
1008
cc3ea0aa
NC
1009 /* look for group modifiers to inherit */
1010 if (TYPE_ENDIANNESS(symptr->flags)) {
1011 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1012 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
cf168e39
WL
1013 }
1014
cc3ea0aa
NC
1015 /* look for modifiers */
1016 while (patptr < patend) {
cdf9dde0 1017 I32 modifier;
cc3ea0aa
NC
1018 switch (*patptr) {
1019 case '!':
1020 modifier = TYPE_IS_SHRIEKING;
1021 allowed = SHRIEKING_ALLOWED_TYPES;
1022 break;
1023#ifdef PERL_PACK_CAN_BYTEORDER
1024 case '>':
1025 modifier = TYPE_IS_BIG_ENDIAN;
1026 allowed = ENDIANNESS_ALLOWED_TYPES;
1027 break;
1028 case '<':
1029 modifier = TYPE_IS_LITTLE_ENDIAN;
1030 allowed = ENDIANNESS_ALLOWED_TYPES;
1031 break;
3314a176 1032#endif /* PERL_PACK_CAN_BYTEORDER */
cc3ea0aa 1033 default:
cdf9dde0
AL
1034 allowed = "";
1035 modifier = 0;
cc3ea0aa
NC
1036 break;
1037 }
1038
1039 if (modifier == 0)
1040 break;
1041
1042 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1043 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
e02cb1b1 1044 allowed, _action( symptr ) );
cc3ea0aa
NC
1045
1046 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1047 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
e02cb1b1 1048 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
cc3ea0aa
NC
1049 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1050 TYPE_ENDIANNESS_MASK)
1051 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
e02cb1b1 1052 *patptr, _action( symptr ) );
cc3ea0aa 1053
bc743096 1054 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
cc3ea0aa
NC
1055 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1056 "Duplicate modifier '%c' after '%c' in %s",
1057 *patptr, (int) TYPE_NO_MODIFIERS(code),
e02cb1b1 1058 _action( symptr ) );
cc3ea0aa
NC
1059 }
1060
1061 code |= modifier;
1062 patptr++;
1063 }
1064
1065 /* inherit modifiers */
1066 code |= inherited_modifiers;
1067
773e5b22 1068 /* look for count and/or / */
cf168e39
WL
1069 if (patptr < patend) {
1070 if (isDIGIT(*patptr)) {
1071 patptr = get_num( patptr, &symptr->length );
1072 symptr->howlen = e_number;
1073
1074 } else if (*patptr == '*') {
1075 patptr++;
1076 symptr->howlen = e_star;
1077
1078 } else if (*patptr == '[') {
773e5b22 1079 const char* lenptr = ++patptr;
cf168e39
WL
1080 symptr->howlen = e_number;
1081 patptr = group_end( patptr, patend, ']' ) + 1;
1082 /* what kind of [] is it? */
1083 if (isDIGIT(*lenptr)) {
1084 lenptr = get_num( lenptr, &symptr->length );
1085 if( *lenptr != ']' )
1086 Perl_croak(aTHX_ "Malformed integer in [] in %s",
e02cb1b1 1087 _action( symptr ) );
cf168e39
WL
1088 } else {
1089 tempsym_t savsym = *symptr;
cacbe70b
NC
1090 symptr->patend = (char *) patptr-1;
1091 symptr->patptr = (char *) lenptr;
cf168e39
WL
1092 savsym.length = measure_struct(symptr);
1093 *symptr = savsym;
1094 }
1095 } else {
1096 symptr->howlen = e_no_len;
1097 symptr->length = 1;
1098 }
1099
1100 /* try to find / */
1101 while (patptr < patend) {
1102 if (isSPACE(*patptr))
1103 patptr++;
1104 else if (*patptr == '#') {
1105 patptr++;
1106 while (patptr < patend && *patptr != '\n')
1107 patptr++;
1108 if (patptr < patend)
1109 patptr++;
1110 } else {
cc3ea0aa 1111 if (*patptr == '/') {
cf168e39
WL
1112 symptr->flags |= FLAG_SLASH;
1113 patptr++;
cc3ea0aa
NC
1114 if (patptr < patend &&
1115 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
cf168e39 1116 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
e02cb1b1 1117 _action( symptr ) );
cf168e39
WL
1118 }
1119 break;
1120 }
18529408 1121 }
cf168e39
WL
1122 } else {
1123 /* at end - no count, no / */
1124 symptr->howlen = e_no_len;
1125 symptr->length = 1;
1126 }
1127
1128 symptr->code = code;
773e5b22 1129 symptr->patptr = (char *) patptr;
cf168e39 1130 return TRUE;
18529408 1131 }
cf168e39 1132 }
773e5b22 1133 symptr->patptr = (char *) patptr;
cf168e39 1134 return FALSE;
18529408
IZ
1135}
1136
500d42d2
TH
1137#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1138/*
773e5b22 1139 There is no way to cleanly handle the case where we should process the
500d42d2 1140 string per byte in its upgraded form while it's really in downgraded form
773e5b22
NC
1141 (e.g. estimates like strend-s as an upper bound for the number of
1142 characters left wouldn't work). So if we foresee the need of this
1143 (pattern starts with U or contains U0), we want to work on the encoded
1144 version of the string. Users are advised to upgrade their pack string
500d42d2
TH
1145 themselves if they need to do a lot of unpacks like this on it
1146*/
e02cb1b1 1147/* XXX These can be const */
773e5b22 1148STATIC bool
500d42d2
TH
1149need_utf8(const char *pat, const char *patend)
1150{
1151 bool first = TRUE;
1152 while (pat < patend) {
1153 if (pat[0] == '#') {
1154 pat++;
e02cb1b1 1155 pat = (const char *) memchr(pat, '\n', patend-pat);
500d42d2
TH
1156 if (!pat) return FALSE;
1157 } else if (pat[0] == 'U') {
1158 if (first || pat[1] == '0') return TRUE;
1159 } else first = FALSE;
1160 pat++;
1161 }
1162 return FALSE;
1163}
1164
1165STATIC char
1166first_symbol(const char *pat, const char *patend) {
1167 while (pat < patend) {
1168 if (pat[0] != '#') return pat[0];
1169 pat++;
e02cb1b1 1170 pat = (const char *) memchr(pat, '\n', patend-pat);
500d42d2
TH
1171 if (!pat) return 0;
1172 pat++;
1173 }
1174 return 0;
1175}
1176#endif
1177
18529408 1178/*
f3479639
JH
1179=for apidoc unpackstring
1180
41b209cd
NC
1181The engine implementing unpack() Perl function. C<unpackstring> puts the
1182extracted list items on the stack and returns the number of elements.
1183Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
f3479639
JH
1184
1185=cut */
1186
1187I32
970ba7cf
NC
1188Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend,
1189 U32 flags)
f3479639 1190{
e02cb1b1 1191 tempsym_t sym;
500d42d2 1192
3314a176 1193 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
500d42d2
TH
1194#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1195 else if (need_utf8(pat, patend)) {
1196 /* We probably should try to avoid this in case a scalar context call
1197 wouldn't get to the "U0" */
1198 STRLEN len = strend - s;
a4de8f71 1199 s = (char *) bytes_to_utf8((U8 *) s, &len);
500d42d2
TH
1200 SAVEFREEPV(s);
1201 strend = s + len;
3314a176 1202 flags |= FLAG_DO_UTF8;
500d42d2
TH
1203 }
1204
3314a176
TH
1205 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1206 flags |= FLAG_PARSE_UTF8;
500d42d2 1207#else
3314a176
TH
1208 if (flags & FLAG_DO_UTF8)
1209 flags |= FLAG_PARSE_UTF8;
500d42d2
TH
1210#endif
1211
e02cb1b1 1212 TEMPSYM_INIT(&sym, pat, patend, flags);
f3479639
JH
1213
1214 return unpack_rec(&sym, s, s, strend, NULL );
1215}
1216
cf168e39
WL
1217STATIC
1218I32
e02cb1b1 1219S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
cf168e39 1220{
a6ec74c1 1221 dSP;
18529408 1222 SV *sv;
e02cb1b1 1223 const I32 start_sp_offset = SP - PL_stack_base;
cf168e39 1224 howlen_t howlen;
a6ec74c1 1225
a6ec74c1 1226 I32 checksum = 0;
92d41999 1227 UV cuv = 0;
a6ec74c1 1228 NV cdouble = 0.0;
3314a176 1229 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
cf168e39 1230 bool beyond = FALSE;
0bc88fab 1231 bool explicit_length;
a4de8f71 1232 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
3314a176 1233 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
3040befa 1234 symptr->strbeg = s - strbeg;
cf168e39 1235
cf168e39 1236 while (next_symbol(symptr)) {
e5464422 1237 packprops_t props;
a4de8f71 1238 I32 len;
3314a176 1239 I32 datumtype = symptr->code;
206947d2 1240 /* do first one only unless in list context
500d42d2 1241 / is implemented by unpacking the count, then popping it from the
206947d2 1242 stack, so must check that we're not in the middle of a / */
cf168e39 1243 if ( unpack_only_one
206947d2 1244 && (SP - PL_stack_base == start_sp_offset + 1)
cf168e39 1245 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1246 break;
cf168e39 1247
3314a176 1248 switch (howlen = symptr->howlen) {
773e5b22
NC
1249 case e_star:
1250 len = strend - strbeg; /* long enough */
cf168e39 1251 break;
3314a176
TH
1252 default:
1253 /* e_no_len and e_number */
1254 len = symptr->length;
1255 break;
cf168e39 1256 }
18529408 1257
0bc88fab 1258 explicit_length = TRUE;
a6ec74c1 1259 redo_switch:
cf168e39 1260 beyond = s >= strend;
e5464422
TH
1261
1262 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1263 if (props) {
1264 /* props nonzero means we can process this letter. */
a4de8f71
NC
1265 const long size = props & PACK_SIZE_MASK;
1266 const long howmany = (strend - s) / size;
e5464422
TH
1267 if (len > howmany)
1268 len = howmany;
1269
1270 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1271 if (len && unpack_only_one) len = 1;
1272 EXTEND(SP, len);
1273 EXTEND_MORTAL(len);
cc3ea0aa
NC
1274 }
1275 }
e5464422 1276
cc3ea0aa 1277 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1278 default:
cc3ea0aa 1279 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
cf168e39 1280
a6ec74c1 1281 case '%':
cf168e39 1282 if (howlen == e_no_len)
18529408 1283 len = 16; /* len is not specified */
a6ec74c1 1284 checksum = len;
92d41999 1285 cuv = 0;
a6ec74c1 1286 cdouble = 0;
18529408 1287 continue;
a6ec74c1 1288 break;
18529408
IZ
1289 case '(':
1290 {
cf168e39 1291 tempsym_t savsym = *symptr;
a4de8f71 1292 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
cc3ea0aa 1293 symptr->flags |= group_modifiers;
cf168e39 1294 symptr->patend = savsym.grpend;
3040befa 1295 symptr->previous = &savsym;
cf168e39 1296 symptr->level++;
18529408
IZ
1297 PUTBACK;
1298 while (len--) {
cf168e39 1299 symptr->patptr = savsym.grpbeg;
3314a176
TH
1300 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1301 else symptr->flags &= ~FLAG_PARSE_UTF8;
500d42d2
TH
1302 unpack_rec(symptr, s, strbeg, strend, &s);
1303 if (s == strend && savsym.howlen == e_star)
cf168e39 1304 break; /* No way to continue */
18529408
IZ
1305 }
1306 SPAGAIN;
3040befa 1307 savsym.flags = symptr->flags & ~group_modifiers;
cf168e39 1308 *symptr = savsym;
18529408
IZ
1309 break;
1310 }
3040befa
TH
1311#ifdef PERL_PACK_CAN_DOT
1312# ifdef PERL_PACK_CAN_SHRIEKSIGN
1313 case '.' | TYPE_IS_SHRIEKING:
1314# endif
1315 case '.': {
a4de8f71 1316 const char *from;
3040befa
TH
1317 SV *sv;
1318#ifdef PERL_PACK_CAN_SHRIEKSIGN
a4de8f71 1319 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
3040befa 1320#else /* PERL_PACK_CAN_SHRIEKSIGN */
a4de8f71 1321 const bool u8 = utf8;
3040befa
TH
1322#endif
1323 if (howlen == e_star) from = strbeg;
1324 else if (len <= 0) from = s;
1325 else {
1326 tempsym_t *group = symptr;
1327
1328 while (--len && group) group = group->previous;
1329 from = group ? strbeg + group->strbeg : strbeg;
1330 }
1331 sv = from <= s ?
a4de8f71
NC
1332 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1333 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
3040befa
TH
1334 XPUSHs(sv_2mortal(sv));
1335 break;
1336 }
1337#endif
1338#ifdef PERL_PACK_CAN_SHRIEKSIGN
1339 case '@' | TYPE_IS_SHRIEKING:
1340#endif
a6ec74c1 1341 case '@':
3040befa 1342 s = strbeg + symptr->strbeg;
500d42d2 1343#ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
3040befa
TH
1344#ifdef PERL_PACK_CAN_SHRIEKSIGN
1345 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1346#else /* PERL_PACK_CAN_SHRIEKSIGN */
1347 if (utf8)
1348#endif
1349 {
500d42d2
TH
1350 while (len > 0) {
1351 if (s >= strend)
1352 Perl_croak(aTHX_ "'@' outside of string in unpack");
1353 s += UTF8SKIP(s);
1354 len--;
1355 }
1356 if (s > strend)
1357 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1358 } else
1359#endif
1360 {
3040befa 1361 if (strend-s < len)
773e5b22 1362 Perl_croak(aTHX_ "'@' outside of string in unpack");
3040befa 1363 s += len;
500d42d2 1364 }
a6ec74c1 1365 break;
62f95557
IZ
1366 case 'X' | TYPE_IS_SHRIEKING:
1367 if (!len) /* Avoid division by 0 */
1368 len = 1;
500d42d2
TH
1369#ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1370 if (utf8) {
e02cb1b1 1371 const char *hop, *last;
3314a176
TH
1372 I32 l = len;
1373 hop = last = strbeg;
1374 while (hop < s) {
1375 hop += UTF8SKIP(hop);
1376 if (--l == 0) {
500d42d2 1377 last = hop;
3314a176
TH
1378 l = len;
1379 }
773e5b22 1380 }
3314a176
TH
1381 if (last > s)
1382 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
500d42d2
TH
1383 s = last;
1384 break;
3314a176 1385 }
500d42d2 1386#endif
3314a176 1387 len = (s - strbeg) % len;
62f95557 1388 /* FALL THROUGH */
a6ec74c1 1389 case 'X':
500d42d2
TH
1390#ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1391 if (utf8) {
1392 while (len > 0) {
1393 if (s <= strbeg)
1394 Perl_croak(aTHX_ "'X' outside of string in unpack");
3314a176 1395 while (--s, UTF8_IS_CONTINUATION(*s)) {
500d42d2
TH
1396 if (s <= strbeg)
1397 Perl_croak(aTHX_ "'X' outside of string in unpack");
1398 }
1399 len--;
1400 }
1401 } else
1402#endif
1403 {
773e5b22
NC
1404 if (len > s - strbeg)
1405 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1406 s -= len;
500d42d2 1407 }
a6ec74c1 1408 break;
a4de8f71
NC
1409 case 'x' | TYPE_IS_SHRIEKING: {
1410 I32 ai32;
62f95557
IZ
1411 if (!len) /* Avoid division by 0 */
1412 len = 1;
a4de8f71
NC
1413 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1414 else ai32 = (s - strbeg) % len;
500d42d2
TH
1415 if (ai32 == 0) break;
1416 len -= ai32;
a4de8f71 1417 }
62f95557 1418 /* FALL THROUGH */
a6ec74c1 1419 case 'x':
500d42d2
TH
1420 if (utf8) {
1421 while (len>0) {
1422 if (s >= strend)
1423 Perl_croak(aTHX_ "'x' outside of string in unpack");
1424 s += UTF8SKIP(s);
1425 len--;
1426 }
1427 } else {
773e5b22
NC
1428 if (len > strend - s)
1429 Perl_croak(aTHX_ "'x' outside of string in unpack");
1430 s += len;
3314a176 1431 }
a6ec74c1
JH
1432 break;
1433 case '/':
cf168e39
WL
1434 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1435 break;
a6ec74c1
JH
1436 case 'A':
1437 case 'Z':
1438 case 'a':
500d42d2
TH
1439 if (checksum) {
1440 /* Preliminary length estimate is assumed done in 'W' */
1441 if (len > strend - s) len = strend - s;
1442 goto W_checksum;
1443 }
1444#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1445 if (utf8) {
1446 I32 l;
e02cb1b1 1447 const char *hop;
500d42d2
TH
1448 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1449 if (hop >= strend) {
1450 if (hop > strend)
1451 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1452 break;
773e5b22 1453 }
a6ec74c1 1454 }
500d42d2
TH
1455 if (hop > strend)
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 len = hop - s;
1458 } else
1459#endif
1460 if (len > strend - s)
1461 len = strend - s;
1462
1463 if (datumtype == 'Z') {
1464 /* 'Z' strips stuff after first null */
e02cb1b1 1465 const char *ptr, *end;
3314a176
TH
1466 end = s + len;
1467 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
500d42d2
TH
1468 sv = newSVpvn(s, ptr-s);
1469 if (howlen == e_star) /* exact for 'Z*' */
1470 len = ptr-s + (ptr != strend ? 1 : 0);
1471 } else if (datumtype == 'A') {
1472 /* 'A' strips both nulls and spaces */
e02cb1b1 1473 const char *ptr;
1928d712
NC
1474#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1475 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1476 for (ptr = s+len-1; ptr >= s; ptr--)
1477 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
a4de8f71 1478 !is_utf8_space((U8 *) ptr)) break;
1928d712
NC
1479 if (ptr >= s) ptr += UTF8SKIP(ptr);
1480 else ptr++;
3040befa 1481 if (ptr > s+len)
1928d712
NC
1482 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1483 } else
1484#endif
1485 {
1486 for (ptr = s+len-1; ptr >= s; ptr--)
1487 if (*ptr != 0 && !isSPACE(*ptr)) break;
1488 ptr++;
1489 }
500d42d2
TH
1490 sv = newSVpvn(s, ptr-s);
1491 } else sv = newSVpvn(s, len);
1492
1493#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1494 if (utf8) {
1495 SvUTF8_on(sv);
1496 /* Undo any upgrade done due to need_utf8() */
3314a176 1497 if (!(symptr->flags & FLAG_WAS_UTF8))
500d42d2 1498 sv_utf8_downgrade(sv, 0);
a6ec74c1 1499 }
500d42d2 1500#endif
a6ec74c1 1501 XPUSHs(sv_2mortal(sv));
500d42d2 1502 s += len;
a6ec74c1
JH
1503 break;
1504 case 'B':
500d42d2
TH
1505 case 'b': {
1506 char *str;
cf168e39 1507 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1508 len = (strend - s) * 8;
1509 if (checksum) {
1510 if (!PL_bitcount) {
500d42d2 1511 int bits;
cd7a8267 1512 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1513 for (bits = 1; bits < 256; bits++) {
1514 if (bits & 1) PL_bitcount[bits]++;
1515 if (bits & 2) PL_bitcount[bits]++;
1516 if (bits & 4) PL_bitcount[bits]++;
1517 if (bits & 8) PL_bitcount[bits]++;
1518 if (bits & 16) PL_bitcount[bits]++;
1519 if (bits & 32) PL_bitcount[bits]++;
1520 if (bits & 64) PL_bitcount[bits]++;
1521 if (bits & 128) PL_bitcount[bits]++;
1522 }
1523 }
500d42d2 1524#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
3314a176 1525 if (utf8)
500d42d2 1526 while (len >= 8 && s < strend) {
3314a176 1527 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
500d42d2
TH
1528 len -= 8;
1529 }
3314a176 1530 else
500d42d2 1531#endif
773e5b22 1532 while (len >= 8) {
500d42d2 1533 cuv += PL_bitcount[*(U8 *)s++];
773e5b22
NC
1534 len -= 8;
1535 }
500d42d2
TH
1536 if (len && s < strend) {
1537 U8 bits;
1538#ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
3314a176 1539 bits = SHIFT_BYTE(0, s, strend, datumtype);
500d42d2 1540#else
3314a176 1541 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2 1542#endif
3314a176 1543 if (datumtype == 'b')
a6ec74c1 1544 while (len-- > 0) {
92d41999 1545 if (bits & 1) cuv++;
a6ec74c1
JH
1546 bits >>= 1;
1547 }
3314a176 1548 else
a6ec74c1 1549 while (len-- > 0) {
500d42d2 1550 if (bits & 0x80) cuv++;
a6ec74c1
JH
1551 bits <<= 1;
1552 }
773e5b22 1553 }
a6ec74c1
JH
1554 break;
1555 }
500d42d2
TH
1556
1557 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1558 SvPOK_on(sv);
1559 str = SvPVX(sv);
1560 if (datumtype == 'b') {
3314a176 1561 U8 bits = 0;
e02cb1b1 1562 const I32 ai32 = len;
500d42d2
TH
1563 for (len = 0; len < ai32; len++) {
1564 if (len & 7) bits >>= 1;
1565#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1566 else if (utf8) {
1567 if (s >= strend) break;
3314a176 1568 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
500d42d2
TH
1569 }
1570#endif
1571 else bits = *(U8 *) s++;
1572 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1573 }
500d42d2 1574 } else {
3314a176 1575 U8 bits = 0;
e02cb1b1 1576 const I32 ai32 = len;
500d42d2
TH
1577 for (len = 0; len < ai32; len++) {
1578 if (len & 7) bits <<= 1;
1579#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1580 else if (utf8) {
1581 if (s >= strend) break;
3314a176 1582 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
500d42d2
TH
1583 }
1584#endif
1585 else bits = *(U8 *) s++;
1586 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1587 }
1588 }
1589 *str = '\0';
500d42d2
TH
1590 SvCUR_set(sv, str - SvPVX(sv));
1591 XPUSHs(sv);
a6ec74c1 1592 break;
500d42d2 1593 }
a6ec74c1 1594 case 'H':
500d42d2
TH
1595 case 'h': {
1596 char *str;
773e5b22 1597 /* Preliminary length estimate, acceptable for utf8 too */
cf168e39 1598 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1599 len = (strend - s) * 2;
773e5b22 1600 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1601 SvPOK_on(sv);
1602 str = SvPVX(sv);
1603 if (datumtype == 'h') {
3314a176 1604 U8 bits = 0;
a4de8f71 1605 I32 ai32 = len;
773e5b22
NC
1606 for (len = 0; len < ai32; len++) {
1607 if (len & 1) bits >>= 4;
500d42d2 1608#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
773e5b22
NC
1609 else if (utf8) {
1610 if (s >= strend) break;
3314a176 1611 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
773e5b22 1612 }
500d42d2 1613#endif
773e5b22 1614 else bits = * (U8 *) s++;
a6ec74c1
JH
1615 *str++ = PL_hexdigit[bits & 15];
1616 }
500d42d2 1617 } else {
3314a176 1618 U8 bits = 0;
e02cb1b1 1619 const I32 ai32 = len;
500d42d2
TH
1620 for (len = 0; len < ai32; len++) {
1621 if (len & 1) bits <<= 4;
1622#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1623 else if (utf8) {
1624 if (s >= strend) break;
3314a176 1625 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
500d42d2
TH
1626 }
1627#endif
1628 else bits = *(U8 *) s++;
a6ec74c1
JH
1629 *str++ = PL_hexdigit[(bits >> 4) & 15];
1630 }
1631 }
1632 *str = '\0';
500d42d2
TH
1633 SvCUR_set(sv, str - SvPVX(sv));
1634 XPUSHs(sv);
a6ec74c1 1635 break;
500d42d2 1636 }
a6ec74c1 1637 case 'c':
cc3ea0aa 1638 while (len-- > 0) {
500d42d2 1639#ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
3314a176 1640 int aint = SHIFT_BYTE(0, s, strend, datumtype);
500d42d2 1641#else
3314a176 1642 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2 1643#endif
cc3ea0aa
NC
1644 if (aint >= 128) /* fake up signed chars */
1645 aint -= 256;
500d42d2 1646 if (!checksum)
cc3ea0aa 1647 PUSHs(sv_2mortal(newSViv((IV)aint)));
cc3ea0aa
NC
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV)aint;
1650 else
1651 cuv += aint;
a6ec74c1
JH
1652 }
1653 break;
1654 case 'C':
500d42d2
TH
1655#ifdef PERL_PACK_CAN_W
1656 case 'W':
1657#endif
1658 W_checksum:
35bcd338 1659 if (len == 0) {
773e5b22 1660 if (explicit_length && datumtype == 'C')
500d42d2
TH
1661#ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1662 /* In maint, continue to switch from "character" mode */
1663 utf8 = 0;
1664#else
1665 /* Switch to "character" mode */
3314a176 1666 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
500d42d2 1667#endif
35bcd338
JH
1668 break;
1669 }
500d42d2 1670#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
773e5b22 1671 if (datumtype == 'C' ?
3314a176
TH
1672 (symptr->flags & FLAG_DO_UTF8) &&
1673 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
500d42d2 1674 while (len-- > 0 && s < strend) {
500d42d2 1675 STRLEN retlen;
e02cb1b1 1676 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
3314a176 1677 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
500d42d2
TH
1678 if (retlen == (STRLEN) -1 || retlen == 0)
1679 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1680 s += retlen;
1681 if (!checksum)
1682 PUSHs(sv_2mortal(newSVuv((UV) val)));
1683 else if (checksum > bits_in_uv)
1684 cdouble += (NV) val;
0bc88fab 1685 else
500d42d2 1686 cuv += val;
773e5b22 1687 }
500d42d2
TH
1688 } else
1689#endif
1690 if (!checksum)
a6ec74c1 1691 while (len-- > 0) {
e02cb1b1 1692 const U8 ch = *(U8 *) s++;
500d42d2 1693 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1694 }
500d42d2
TH
1695 else if (checksum > bits_in_uv)
1696 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1697 else
1698 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1699 break;
1700 case 'U':
35bcd338 1701 if (len == 0) {
500d42d2
TH
1702 if (explicit_length) {
1703#ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1704 /* In maint, switch to "character" mode */
1705 utf8 = 1;
1706#else
1707 /* Switch to "bytes in UTF-8" mode */
3314a176 1708 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
500d42d2
TH
1709 else
1710 /* Should be impossible due to the need_utf8() test */
1711 Perl_croak(aTHX_ "U0 mode on a byte string");
1712#endif
1713 }
35bcd338
JH
1714 break;
1715 }
500d42d2
TH
1716#ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1717 if (!utf8)
1718 goto W_checksum;
1719#endif
1720 if (len > strend - s) len = strend - s;
773e5b22 1721 if (!checksum) {
500d42d2
TH
1722 if (len && unpack_only_one) len = 1;
1723 EXTEND(SP, len);
1724 EXTEND_MORTAL(len);
773e5b22 1725 }
500d42d2
TH
1726 while (len-- > 0 && s < strend) {
1727 STRLEN retlen;
1728 UV auv;
1729#ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1730 if (utf8) {
1731 U8 result[UTF8_MAXLEN];
e02cb1b1 1732 const char *ptr = s;
500d42d2 1733 STRLEN len;
500d42d2
TH
1734 /* Bug: warns about bad utf8 even if we are short on bytes
1735 and will break out of the loop */
a4de8f71
NC
1736 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1737 'U'))
500d42d2
TH
1738 break;
1739 len = UTF8SKIP(result);
773e5b22 1740 if (!uni_to_bytes(aTHX_ &ptr, strend,
a4de8f71 1741 (char *) &result[1], len-1, 'U')) break;
500d42d2
TH
1742 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1743 s = ptr;
1744 } else
1745#endif
1746 {
1747 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1748 if (retlen == (STRLEN) -1 || retlen == 0)
1749 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1750 s += retlen;
1751 }
1752 if (!checksum)
1753 PUSHs(sv_2mortal(newSVuv((UV) auv)));
cc3ea0aa 1754 else if (checksum > bits_in_uv)
500d42d2 1755 cdouble += (NV) auv;
cc3ea0aa 1756 else
500d42d2 1757 cuv += auv;
a6ec74c1
JH
1758 }
1759 break;
cf168e39
WL
1760 case 's' | TYPE_IS_SHRIEKING:
1761#if SHORTSIZE != SIZE16
cc3ea0aa 1762 while (len-- > 0) {
500d42d2 1763 short ashort;
3314a176
TH
1764 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1765 DO_BO_UNPACK(ashort, s);
500d42d2 1766 if (!checksum)
cc3ea0aa 1767 PUSHs(sv_2mortal(newSViv((IV)ashort)));
cc3ea0aa
NC
1768 else if (checksum > bits_in_uv)
1769 cdouble += (NV)ashort;
1770 else
1771 cuv += ashort;
cf168e39
WL
1772 }
1773 break;
1774#else
1775 /* Fallthrough! */
a6ec74c1 1776#endif
cf168e39 1777 case 's':
cc3ea0aa 1778 while (len-- > 0) {
500d42d2
TH
1779 I16 ai16;
1780
1781#if U16SIZE > SIZE16
1782 ai16 = 0;
1783#endif
3314a176 1784 SHIFT16(utf8, s, strend, &ai16, datumtype);
cc3ea0aa
NC
1785 DO_BO_UNPACK(ai16, 16);
1786#if U16SIZE > SIZE16
1787 if (ai16 > 32767)
1788 ai16 -= 65536;
1789#endif
500d42d2 1790 if (!checksum)
cc3ea0aa 1791 PUSHs(sv_2mortal(newSViv((IV)ai16)));
cc3ea0aa
NC
1792 else if (checksum > bits_in_uv)
1793 cdouble += (NV)ai16;
1794 else
1795 cuv += ai16;
a6ec74c1
JH
1796 }
1797 break;
cf168e39
WL
1798 case 'S' | TYPE_IS_SHRIEKING:
1799#if SHORTSIZE != SIZE16
cc3ea0aa 1800 while (len-- > 0) {
500d42d2 1801 unsigned short aushort;
3314a176
TH
1802 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1803 DO_BO_UNPACK(aushort, s);
500d42d2
TH
1804 if (!checksum)
1805 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
cc3ea0aa
NC
1806 else if (checksum > bits_in_uv)
1807 cdouble += (NV)aushort;
1808 else
1809 cuv += aushort;
cf168e39
WL
1810 }
1811 break;
1812#else
1813 /* Fallhrough! */
1814#endif
a6ec74c1
JH
1815 case 'v':
1816 case 'n':
1817 case 'S':
cc3ea0aa 1818 while (len-- > 0) {
500d42d2
TH
1819 U16 au16;
1820#if U16SIZE > SIZE16
1821 au16 = 0;
1822#endif
3314a176 1823 SHIFT16(utf8, s, strend, &au16, datumtype);
500d42d2 1824 DO_BO_UNPACK(au16, 16);
a6ec74c1 1825#ifdef HAS_NTOHS
cc3ea0aa
NC
1826 if (datumtype == 'n')
1827 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1828#endif
1829#ifdef HAS_VTOHS
cc3ea0aa
NC
1830 if (datumtype == 'v')
1831 au16 = vtohs(au16);
a6ec74c1 1832#endif
500d42d2
TH
1833 if (!checksum)
1834 PUSHs(sv_2mortal(newSVuv((UV)au16)));
cc3ea0aa 1835 else if (checksum > bits_in_uv)
3314a176 1836 cdouble += (NV) au16;
cc3ea0aa
NC
1837 else
1838 cuv += au16;
a6ec74c1 1839 }
cc3ea0aa
NC
1840 break;
1841#ifdef PERL_PACK_CAN_SHRIEKSIGN
1842 case 'v' | TYPE_IS_SHRIEKING:
1843 case 'n' | TYPE_IS_SHRIEKING:
1844 while (len-- > 0) {
500d42d2
TH
1845 I16 ai16;
1846# if U16SIZE > SIZE16
1847 ai16 = 0;
1848# endif
3314a176 1849 SHIFT16(utf8, s, strend, &ai16, datumtype);
500d42d2 1850# ifdef HAS_NTOHS
cc3ea0aa 1851 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
500d42d2
TH
1852 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1853# endif /* HAS_NTOHS */
1854# ifdef HAS_VTOHS
cc3ea0aa 1855 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
500d42d2
TH
1856 ai16 = (I16) vtohs((U16) ai16);
1857# endif /* HAS_VTOHS */
1858 if (!checksum)
cc3ea0aa 1859 PUSHs(sv_2mortal(newSViv((IV)ai16)));
cc3ea0aa 1860 else if (checksum > bits_in_uv)
500d42d2 1861 cdouble += (NV) ai16;
cc3ea0aa
NC
1862 else
1863 cuv += ai16;
a6ec74c1
JH
1864 }
1865 break;
500d42d2 1866#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1867 case 'i':
cf168e39 1868 case 'i' | TYPE_IS_SHRIEKING:
cc3ea0aa 1869 while (len-- > 0) {
500d42d2 1870 int aint;
3314a176
TH
1871 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1872 DO_BO_UNPACK(aint, i);
500d42d2 1873 if (!checksum)
cc3ea0aa 1874 PUSHs(sv_2mortal(newSViv((IV)aint)));
cc3ea0aa
NC
1875 else if (checksum > bits_in_uv)
1876 cdouble += (NV)aint;
1877 else
1878 cuv += aint;
a6ec74c1
JH
1879 }
1880 break;
1881 case 'I':
cf168e39 1882 case 'I' | TYPE_IS_SHRIEKING:
cc3ea0aa 1883 while (len-- > 0) {
500d42d2 1884 unsigned int auint;
3314a176
TH
1885 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1886 DO_BO_UNPACK(auint, i);
500d42d2 1887 if (!checksum)
cc3ea0aa 1888 PUSHs(sv_2mortal(newSVuv((UV)auint)));
cc3ea0aa
NC
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)auint;
1891 else
1892 cuv += auint;
a6ec74c1
JH
1893 }
1894 break;
92d41999 1895 case 'j':
cc3ea0aa 1896 while (len-- > 0) {
500d42d2 1897 IV aiv;
3314a176 1898 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
cc3ea0aa 1899#if IVSIZE == INTSIZE
3314a176 1900 DO_BO_UNPACK(aiv, i);
cc3ea0aa 1901#elif IVSIZE == LONGSIZE
3314a176 1902 DO_BO_UNPACK(aiv, l);
cc3ea0aa 1903#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3314a176 1904 DO_BO_UNPACK(aiv, 64);
500d42d2
TH
1905#else
1906 Perl_croak(aTHX_ "'j' not supported on this platform");
cc3ea0aa 1907#endif
500d42d2 1908 if (!checksum)
cc3ea0aa 1909 PUSHs(sv_2mortal(newSViv(aiv)));
cc3ea0aa
NC
1910 else if (checksum > bits_in_uv)
1911 cdouble += (NV)aiv;
1912 else
1913 cuv += aiv;
92d41999
JH
1914 }
1915 break;
1916 case 'J':
cc3ea0aa 1917 while (len-- > 0) {
500d42d2 1918 UV auv;
3314a176 1919 SHIFT_VAR(utf8, s, strend, auv, datumtype);
500d42d2 1920#if IVSIZE == INTSIZE
3314a176 1921 DO_BO_UNPACK(auv, i);
500d42d2 1922#elif IVSIZE == LONGSIZE
3314a176 1923 DO_BO_UNPACK(auv, l);
500d42d2 1924#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3314a176 1925 DO_BO_UNPACK(auv, 64);
500d42d2
TH
1926#else
1927 Perl_croak(aTHX_ "'J' not supported on this platform");
cc3ea0aa 1928#endif
500d42d2 1929 if (!checksum)
cc3ea0aa 1930 PUSHs(sv_2mortal(newSVuv(auv)));
cc3ea0aa
NC
1931 else if (checksum > bits_in_uv)
1932 cdouble += (NV)auv;
1933 else
1934 cuv += auv;
92d41999
JH
1935 }
1936 break;
cf168e39
WL
1937 case 'l' | TYPE_IS_SHRIEKING:
1938#if LONGSIZE != SIZE32
cc3ea0aa 1939 while (len-- > 0) {
500d42d2 1940 long along;
3314a176
TH
1941 SHIFT_VAR(utf8, s, strend, along, datumtype);
1942 DO_BO_UNPACK(along, l);
500d42d2 1943 if (!checksum)
cc3ea0aa 1944 PUSHs(sv_2mortal(newSViv((IV)along)));
cc3ea0aa
NC
1945 else if (checksum > bits_in_uv)
1946 cdouble += (NV)along;
1947 else
1948 cuv += along;
cf168e39
WL
1949 }
1950 break;
1951#else
1952 /* Fallthrough! */
a6ec74c1 1953#endif
cf168e39 1954 case 'l':
cc3ea0aa 1955 while (len-- > 0) {
500d42d2
TH
1956 I32 ai32;
1957#if U32SIZE > SIZE32
1958 ai32 = 0;
1959#endif
3314a176 1960 SHIFT32(utf8, s, strend, &ai32, datumtype);
cc3ea0aa
NC
1961 DO_BO_UNPACK(ai32, 32);
1962#if U32SIZE > SIZE32
500d42d2 1963 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1964#endif
500d42d2 1965 if (!checksum)
cc3ea0aa 1966 PUSHs(sv_2mortal(newSViv((IV)ai32)));
cc3ea0aa
NC
1967 else if (checksum > bits_in_uv)
1968 cdouble += (NV)ai32;
1969 else
1970 cuv += ai32;
a6ec74c1
JH
1971 }
1972 break;
cf168e39
WL
1973 case 'L' | TYPE_IS_SHRIEKING:
1974#if LONGSIZE != SIZE32
cc3ea0aa 1975 while (len-- > 0) {
500d42d2 1976 unsigned long aulong;
3314a176
TH
1977 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1978 DO_BO_UNPACK(aulong, l);
500d42d2 1979 if (!checksum)
cc3ea0aa 1980 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
cc3ea0aa
NC
1981 else if (checksum > bits_in_uv)
1982 cdouble += (NV)aulong;
1983 else
1984 cuv += aulong;
cf168e39
WL
1985 }
1986 break;
1987#else
1988 /* Fall through! */
1989#endif
a6ec74c1
JH
1990 case 'V':
1991 case 'N':
1992 case 'L':
cc3ea0aa 1993 while (len-- > 0) {
500d42d2
TH
1994 U32 au32;
1995#if U32SIZE > SIZE32
1996 au32 = 0;
1997#endif
3314a176 1998 SHIFT32(utf8, s, strend, &au32, datumtype);
500d42d2 1999 DO_BO_UNPACK(au32, 32);
a6ec74c1 2000#ifdef HAS_NTOHL
cc3ea0aa
NC
2001 if (datumtype == 'N')
2002 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
2003#endif
2004#ifdef HAS_VTOHL
cc3ea0aa
NC
2005 if (datumtype == 'V')
2006 au32 = vtohl(au32);
a6ec74c1 2007#endif
500d42d2 2008 if (!checksum)
773e5b22
NC
2009 PUSHs(sv_2mortal(newSVuv((UV)au32)));
2010 else if (checksum > bits_in_uv)
2011 cdouble += (NV)au32;
2012 else
2013 cuv += au32;
cc3ea0aa
NC
2014 }
2015 break;
2016#ifdef PERL_PACK_CAN_SHRIEKSIGN
2017 case 'V' | TYPE_IS_SHRIEKING:
2018 case 'N' | TYPE_IS_SHRIEKING:
2019 while (len-- > 0) {
500d42d2
TH
2020 I32 ai32;
2021# if U32SIZE > SIZE32
2022 ai32 = 0;
2023# endif
3314a176 2024 SHIFT32(utf8, s, strend, &ai32, datumtype);
500d42d2 2025# ifdef HAS_NTOHL
cc3ea0aa
NC
2026 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
2027 ai32 = (I32)PerlSock_ntohl((U32)ai32);
500d42d2
TH
2028# endif
2029# ifdef HAS_VTOHL
cc3ea0aa
NC
2030 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
2031 ai32 = (I32)vtohl((U32)ai32);
500d42d2
TH
2032# endif
2033 if (!checksum)
cc3ea0aa 2034 PUSHs(sv_2mortal(newSViv((IV)ai32)));
cc3ea0aa
NC
2035 else if (checksum > bits_in_uv)
2036 cdouble += (NV)ai32;
2037 else
2038 cuv += ai32;
a6ec74c1
JH
2039 }
2040 break;
500d42d2 2041#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 2042 case 'p':
a6ec74c1 2043 while (len-- > 0) {
e02cb1b1 2044 const char *aptr;
3314a176 2045 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
206b424e 2046 DO_BO_UNPACK_PC(aptr);
cc3ea0aa
NC
2047 /* newSVpv generates undef if aptr is NULL */
2048 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
2049 }
2050 break;
2051 case 'w':
a6ec74c1
JH
2052 {
2053 UV auv = 0;
2054 U32 bytes = 0;
773e5b22 2055
500d42d2
TH
2056 while (len > 0 && s < strend) {
2057 U8 ch;
3314a176 2058 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2 2059 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2060 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
500d42d2 2061 if (ch < 0x80) {
a6ec74c1 2062 bytes = 0;
cc3ea0aa 2063 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2064 len--;
2065 auv = 0;
500d42d2 2066 continue;
a6ec74c1 2067 }
500d42d2 2068 if (++bytes >= sizeof(UV)) { /* promote to string */
c06c673c 2069 const char *t;
a6ec74c1
JH
2070
2071 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2072 while (s < strend) {
3314a176 2073 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
500d42d2
TH
2074 sv = mul128(sv, (U8)(ch & 0x7f));
2075 if (!(ch & 0x80)) {
a6ec74c1
JH
2076 bytes = 0;
2077 break;
2078 }
2079 }
c06c673c 2080 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2081 while (*t == '0')
2082 t++;
cacbe70b 2083 sv_chop(sv, (char *)t);
a6ec74c1
JH
2084 PUSHs(sv_2mortal(sv));
2085 len--;
2086 auv = 0;
2087 }
2088 }
2089 if ((s >= strend) && bytes)
cf168e39 2090 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2091 }
2092 break;
2093 case 'P':
cf168e39
WL
2094 if (symptr->howlen == e_star)
2095 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2096 EXTEND(SP, 1);
820b8518 2097 if (s + sizeof(char*) <= strend) {
500d42d2 2098 char *aptr;
3314a176 2099 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
500d42d2 2100 DO_BO_UNPACK_PC(aptr);
773e5b22
NC
2101 /* newSVpvn generates undef if aptr is NULL */
2102 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
500d42d2 2103 }
a6ec74c1
JH
2104 break;
2105#ifdef HAS_QUAD
2106 case 'q':
cc3ea0aa 2107 while (len-- > 0) {
500d42d2 2108 Quad_t aquad;
3314a176
TH
2109 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2110 DO_BO_UNPACK(aquad, 64);
500d42d2
TH
2111 if (!checksum)
2112 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
cc3ea0aa 2113 newSViv((IV)aquad) : newSVnv((NV)aquad)));
cc3ea0aa
NC
2114 else if (checksum > bits_in_uv)
2115 cdouble += (NV)aquad;
2116 else
2117 cuv += aquad;
2118 }
a6ec74c1
JH
2119 break;
2120 case 'Q':
cc3ea0aa 2121 while (len-- > 0) {
500d42d2 2122 Uquad_t auquad;
3314a176
TH
2123 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2124 DO_BO_UNPACK(auquad, 64);
500d42d2
TH
2125 if (!checksum)
2126 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2127 newSVuv((UV)auquad):newSVnv((NV)auquad)));
cc3ea0aa
NC
2128 else if (checksum > bits_in_uv)
2129 cdouble += (NV)auquad;
2130 else
2131 cuv += auquad;
a6ec74c1
JH
2132 }
2133 break;
500d42d2 2134#endif /* HAS_QUAD */
a6ec74c1
JH
2135 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2136 case 'f':
cc3ea0aa 2137 while (len-- > 0) {
500d42d2 2138 float afloat;
3314a176 2139 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
500d42d2
TH
2140 DO_BO_UNPACK_N(afloat, float);
2141 if (!checksum)
2142 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2143 else
cc3ea0aa 2144 cdouble += afloat;
773e5b22 2145 }
a6ec74c1
JH
2146 break;
2147 case 'd':
cc3ea0aa 2148 while (len-- > 0) {
500d42d2 2149 double adouble;
3314a176 2150 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
500d42d2
TH
2151 DO_BO_UNPACK_N(adouble, double);
2152 if (!checksum)
2153 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2154 else
cc3ea0aa 2155 cdouble += adouble;
773e5b22 2156 }
a6ec74c1 2157 break;
92d41999 2158 case 'F':
cc3ea0aa 2159 while (len-- > 0) {
500d42d2 2160 NV anv;
3314a176 2161 SHIFT_VAR(utf8, s, strend, anv, datumtype);
500d42d2
TH
2162 DO_BO_UNPACK_N(anv, NV);
2163 if (!checksum)
2164 PUSHs(sv_2mortal(newSVnv(anv)));
2165 else
cc3ea0aa 2166 cdouble += anv;
773e5b22 2167 }
92d41999
JH
2168 break;
2169#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2170 case 'D':
cc3ea0aa 2171 while (len-- > 0) {
500d42d2 2172 long double aldouble;
3314a176 2173 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
500d42d2
TH
2174 DO_BO_UNPACK_N(aldouble, long double);
2175 if (!checksum)
2176 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2177 else
2178 cdouble += aldouble;
92d41999
JH
2179 }
2180 break;
2181#endif
a6ec74c1
JH
2182 case 'u':
2183 /* MKS:
2184 * Initialise the decode mapping. By using a table driven
2185 * algorithm, the code will be character-set independent
2186 * (and just as fast as doing character arithmetic)
2187 */
2188 if (PL_uudmap['M'] == 0) {
820b8518 2189 size_t i;
a6ec74c1 2190
820b8518 2191 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2192 PL_uudmap[(U8)PL_uuemap[i]] = i;
2193 /*
2194 * Because ' ' and '`' map to the same value,
2195 * we need to decode them both the same.
2196 */
2197 PL_uudmap[' '] = 0;
2198 }
500d42d2 2199 {
e02cb1b1 2200 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
500d42d2
TH
2201 sv = sv_2mortal(NEWSV(42, l));
2202 if (l) SvPOK_on(sv);
2203 }
2204 if (utf8) {
2205 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2206 I32 a, b, c, d;
2207 char hunk[4];
2208
2209 hunk[3] = '\0';
2210 while (len > 0) {
2211 next_uni_uu(aTHX_ &s, strend, &a);
2212 next_uni_uu(aTHX_ &s, strend, &b);
2213 next_uni_uu(aTHX_ &s, strend, &c);
2214 next_uni_uu(aTHX_ &s, strend, &d);
2215 hunk[0] = (char)((a << 2) | (b >> 4));
2216 hunk[1] = (char)((b << 4) | (c >> 2));
2217 hunk[2] = (char)((c << 6) | d);
2218 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2219 len -= 3;
2220 }
2221 if (s < strend) {
e02cb1b1
NC
2222 if (*s == '\n') {
2223 s++;
2224 }
500d42d2
TH
2225 else {
2226 /* possible checksum byte */
e02cb1b1
NC
2227 const char *skip = s+UTF8SKIP(s);
2228 if (skip < strend && *skip == '\n')
2229 s = skip+1;
500d42d2
TH
2230 }
2231 }
2232 }
2233 } else {
773e5b22
NC
2234 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2235 I32 a, b, c, d;
2236 char hunk[4];
a6ec74c1 2237
773e5b22
NC
2238 hunk[3] = '\0';
2239 len = PL_uudmap[*(U8*)s++] & 077;
2240 while (len > 0) {
2241 if (s < strend && ISUUCHAR(*s))
2242 a = PL_uudmap[*(U8*)s++] & 077;
2243 else
2244 a = 0;
2245 if (s < strend && ISUUCHAR(*s))
2246 b = PL_uudmap[*(U8*)s++] & 077;
2247 else
2248 b = 0;
2249 if (s < strend && ISUUCHAR(*s))
2250 c = PL_uudmap[*(U8*)s++] & 077;
2251 else
2252 c = 0;
2253 if (s < strend && ISUUCHAR(*s))
2254 d = PL_uudmap[*(U8*)s++] & 077;
2255 else
2256 d = 0;
2257 hunk[0] = (char)((a << 2) | (b >> 4));
2258 hunk[1] = (char)((b << 4) | (c >> 2));
2259 hunk[2] = (char)((c << 6) | d);
2260 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2261 len -= 3;
2262 }
2263 if (*s == '\n')
2264 s++;
2265 else /* possible checksum byte */
2266 if (s + 1 < strend && s[1] == '\n')
2267 s += 2;
a6ec74c1 2268 }
500d42d2
TH
2269 }
2270 XPUSHs(sv);
a6ec74c1
JH
2271 break;
2272 }
cf168e39 2273
a6ec74c1 2274 if (checksum) {
cc3ea0aa 2275 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2276 (checksum > bits_in_uv &&
500d42d2
TH
2277 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2278 NV trouble, anv;
a6ec74c1 2279
500d42d2 2280 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2281 while (checksum >= 16) {
2282 checksum -= 16;
500d42d2 2283 anv *= 65536.0;
a6ec74c1 2284 }
a6ec74c1 2285 while (cdouble < 0.0)
500d42d2
TH
2286 cdouble += anv;
2287 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
cc3ea0aa 2288 sv = newSVnv(cdouble);
a6ec74c1
JH
2289 }
2290 else {
fa8ec7c1
NC
2291 if (checksum < bits_in_uv) {
2292 UV mask = ((UV)1 << checksum) - 1;
92d41999 2293 cuv &= mask;
a6ec74c1 2294 }
cc3ea0aa 2295 sv = newSVuv(cuv);
a6ec74c1
JH
2296 }
2297 XPUSHs(sv_2mortal(sv));
2298 checksum = 0;
2299 }
773e5b22 2300
cf168e39
WL
2301 if (symptr->flags & FLAG_SLASH){
2302 if (SP - PL_stack_base - start_sp_offset <= 0)
2303 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2304 if( next_symbol(symptr) ){
2305 if( symptr->howlen == e_number )
2306 Perl_croak(aTHX_ "Count after length/code in unpack" );
2307 if( beyond ){
2308 /* ...end of char buffer then no decent length available */
2309 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2310 } else {
2311 /* take top of stack (hope it's numeric) */
2312 len = POPi;
2313 if( len < 0 )
2314 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2315 }
2316 } else {
2317 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2318 }
2319 datumtype = symptr->code;
0bc88fab 2320 explicit_length = FALSE;
cf168e39
WL
2321 goto redo_switch;
2322 }
a6ec74c1 2323 }
cf168e39 2324
18529408
IZ
2325 if (new_s)
2326 *new_s = s;
2327 PUTBACK;
2328 return SP - PL_stack_base - start_sp_offset;
2329}
2330
2331PP(pp_unpack)
2332{
2333 dSP;
2334 dPOPPOPssrl;
2335 I32 gimme = GIMME_V;
2336 STRLEN llen;
2337 STRLEN rlen;
500d42d2
TH
2338 const char *pat = SvPV_const(left, llen);
2339 const char *s = SvPV_const(right, rlen);
cacbe70b
NC
2340 const char *strend = s + rlen;
2341 const char *patend = pat + llen;
500d42d2 2342 I32 cnt;
18529408
IZ
2343
2344 PUTBACK;
cacbe70b 2345 cnt = unpackstring((char *)pat, (char *)patend, (char *)s, (char *)strend,
cf168e39 2346 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
3314a176 2347 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
cf168e39 2348
18529408
IZ
2349 SPAGAIN;
2350 if ( !cnt && gimme == G_SCALAR )
2351 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2352 RETURN;
2353}
2354
3314a176 2355STATIC U8 *
e02cb1b1 2356doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2357{
3314a176 2358 *h++ = PL_uuemap[len];
a6ec74c1 2359 while (len > 2) {
3314a176
TH
2360 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2361 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2362 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2363 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2364 s += 3;
2365 len -= 3;
2366 }
2367 if (len > 0) {
e02cb1b1 2368 const char r = (len > 1 ? s[1] : '\0');
3314a176
TH
2369 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2370 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2371 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2372 *h++ = PL_uuemap[0];
a6ec74c1 2373 }
3314a176
TH
2374 *h++ = '\n';
2375 return h;
a6ec74c1
JH
2376}
2377
2378STATIC SV *
cacbe70b 2379S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2380{
e02cb1b1
NC
2381 SV *result = newSVpvn(s, l);
2382 char *const result_c = SvPV_nolen(result); /* convenience */
2383 char *out = result_c;
2384 bool skip = 1;
2385 bool ignore = 0;
a6ec74c1
JH
2386
2387 while (*s) {
2388 switch (*s) {
2389 case ' ':
2390 break;
2391 case '+':
2392 if (!skip) {
2393 SvREFCNT_dec(result);
2394 return (NULL);
2395 }
2396 break;
2397 case '0':
2398 case '1':
2399 case '2':
2400 case '3':
2401 case '4':
2402 case '5':
2403 case '6':
2404 case '7':
2405 case '8':
2406 case '9':
2407 skip = 0;
2408 if (!ignore) {
2409 *(out++) = *s;
2410 }
2411 break;
2412 case '.':
2413 ignore = 1;
2414 break;
2415 default:
2416 SvREFCNT_dec(result);
2417 return (NULL);
2418 }
2419 s++;
2420 }
2421 *(out++) = '\0';
2422 SvCUR_set(result, out - result_c);
2423 return (result);
2424}
2425
2426/* pnum must be '\0' terminated */
2427STATIC int
2428S_div128(pTHX_ SV *pnum, bool *done)
2429{
e02cb1b1
NC
2430 STRLEN len;
2431 char * const s = SvPV(pnum, len);
2432 char *t = s;
2433 int m = 0;
2434
2435 *done = 1;
2436 while (*t) {
2437 const int i = m * 10 + (*t - '0');
2438 const int r = (i >> 7); /* r < 10 */
2439 m = i & 0x7F;
2440 if (r) {
2441 *done = 0;
2442 }
2443 *(t++) = '0' + r;
a6ec74c1 2444 }
e02cb1b1
NC
2445 *(t++) = '\0';
2446 SvCUR_set(pnum, (STRLEN) (t - s));
2447 return (m);
a6ec74c1
JH
2448}
2449
18529408
IZ
2450/*
2451=for apidoc pack_cat
2452
f3479639
JH
2453The engine implementing pack() Perl function. Note: parameters next_in_list and
2454flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2455
2456=cut */
2457
cf168e39 2458
18529408 2459void
970ba7cf
NC
2460Perl_pack_cat(pTHX_ SV *cat, char *pat, char *patend, register SV **beglist,
2461 SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2462{
e02cb1b1 2463 tempsym_t sym;
bc743096
AL
2464 PERL_UNUSED_ARG(next_in_list);
2465 PERL_UNUSED_ARG(flags);
f3479639 2466
e02cb1b1
NC
2467 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2468
f3479639
JH
2469 (void)pack_rec( cat, &sym, beglist, endlist );
2470}
2471
2472
2473/*
2474=for apidoc packlist
2475
2476The engine implementing pack() Perl function.
2477
2478=cut */
2479
2480
2481void
970ba7cf
NC
2482Perl_packlist(pTHX_ SV *cat, char *pat, char *patend, register SV **beglist,
2483 SV **endlist)
f3479639 2484{
3314a176 2485 STRLEN no_len;
e02cb1b1
NC
2486 tempsym_t sym;
2487
2488 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
cf168e39 2489
3314a176
TH
2490 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2491 Also make sure any UTF8 flag is loaded */
2492 SvPV_force(cat, no_len);
2493 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2494
cf168e39
WL
2495 (void)pack_rec( cat, &sym, beglist, endlist );
2496}
2497
3314a176
TH
2498/* like sv_utf8_upgrade, but also repoint the group start markers */
2499STATIC void
2500marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2501 STRLEN len;
2502 tempsym_t *group;
e02cb1b1
NC
2503 const char *from_ptr, *from_start, *from_end, **marks, **m;
2504 char *to_start, *to_ptr;
3314a176
TH
2505
2506 if (SvUTF8(sv)) return;
2507
2508 from_start = SvPVX(sv);
2509 from_end = from_start + SvCUR(sv);
2510 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2511 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2512 if (from_ptr == from_end) {
2513 /* Simple case: no character needs to be changed */
2514 SvUTF8_on(sv);
2515 return;
2516 }
2517
773e5b22 2518 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
3314a176
TH
2519 New('U', to_start, len, char);
2520 Copy(from_start, to_start, from_ptr-from_start, char);
2521 to_ptr = to_start + (from_ptr-from_start);
2522
e02cb1b1 2523 New('U', marks, sym_ptr->level+2, const char *);
3314a176
TH
2524 for (group=sym_ptr; group; group = group->previous)
2525 marks[group->level] = from_start + group->strbeg;
2526 marks[sym_ptr->level+1] = from_end+1;
2527 for (m = marks; *m < from_ptr; m++)
2528 *m = to_start + (*m-from_start);
2529
2530 for (;from_ptr < from_end; from_ptr++) {
2531 while (*m == from_ptr) *m++ = to_ptr;
a4de8f71 2532 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
3314a176
TH
2533 }
2534 *to_ptr = 0;
2535
2536 while (*m == from_ptr) *m++ = to_ptr;
2537 if (m != marks + sym_ptr->level+1) {
2538 Safefree(marks);
2539 Safefree(to_start);
2540 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2541 }
2542 for (group=sym_ptr; group; group = group->previous)
2543 group->strbeg = marks[group->level] - to_start;
2544 Safefree(marks);
2545
2546 if (SvOOK(sv)) {
2547 if (SvIVX(sv)) {
2548 SvLEN(sv) += SvIVX(sv);
2549 from_start -= SvIVX(sv);
2550 SvIV_set(sv, 0);
2551 }
2552 SvFLAGS(sv) &= ~SVf_OOK;
2553 }
2554 if (SvLEN(sv) != 0)
2555 Safefree(from_start);
2556 SvPVX(sv) = to_start;
2557 SvCUR(sv) = to_ptr - to_start;
2558 SvLEN(sv) = len;
2559 SvUTF8_on(sv);
2560}
2561
2562/* Exponential string grower. Makes string extension effectively O(n)
2563 needed says how many extra bytes we need (not counting the final '\0')
2564 Only grows the string if there is an actual lack of space
2565*/
2566STATIC char *
8c89da26 2567S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
e02cb1b1
NC
2568 const STRLEN cur = SvCUR(sv);
2569 const STRLEN len = SvLEN(sv);
3314a176
TH
2570 STRLEN extend;
2571 if (len - cur > needed) return SvPVX(sv);
2572 extend = needed > len ? needed : len;
2573 return SvGROW(sv, len+extend+1);
2574}
cf168e39
WL
2575
2576STATIC
2577SV **
3314a176 2578S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
cf168e39 2579{
cf168e39 2580 tempsym_t lookahead;
3314a176
TH
2581 I32 items = endlist - beglist;
2582 bool found = next_symbol(symptr);
2583#ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2584 const bool utf8 = 0;
2585#else
2586 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2587#endif
bc743096 2588 bool warn_utf8 = ckWARN(WARN_UTF8);
a6ec74c1 2589
3314a176
TH
2590 if (symptr->level == 0 && found && symptr->code == 'U') {
2591 marked_upgrade(aTHX_ cat, symptr);
2592 symptr->flags |= FLAG_DO_UTF8;
2593#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2594 utf8 = 0;
18529408 2595#endif
3314a176
TH
2596 }
2597 symptr->strbeg = SvCUR(cat);
cf168e39
WL
2598
2599 while (found) {
3314a176
TH
2600 SV *fromstr;
2601 STRLEN fromlen;
2602 I32 len;
0e2d6244 2603 SV *lengthcode = NULL;
cf168e39 2604 I32 datumtype = symptr->code;
3314a176
TH
2605 howlen_t howlen = symptr->howlen;
2606 char *start = SvPVX(cat);
2607 char *cur = start + SvCUR(cat);
cf168e39 2608
3314a176
TH
2609#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2610
2611 switch (howlen) {
773e5b22 2612 case e_star:
3314a176
TH
2613 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2614 0 : items;
2615 break;
2616 default:
2617 /* e_no_len and e_number */
2618 len = symptr->length;
cf168e39
WL
2619 break;
2620 }
2621
3314a176 2622 if (len) {
e5464422 2623 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
3314a176 2624
e5464422
TH
2625 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2626 /* We can process this letter. */
2627 STRLEN size = props & PACK_SIZE_MASK;
2628 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2629 }
3314a176
TH
2630 }
2631
cf168e39
WL
2632 /* Look ahead for next symbol. Do we have code/code? */
2633 lookahead = *symptr;
2634 found = next_symbol(&lookahead);
1928d712
NC
2635 if (symptr->flags & FLAG_SLASH) {
2636 IV count;
3314a176 2637 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
1928d712
NC
2638 if (strchr("aAZ", lookahead.code)) {
2639 if (lookahead.howlen == e_number) count = lookahead.length;
2640 else {
5466829a
TS
2641 if (items > 0) {
2642 if (SvGAMAGIC(*beglist)) {
2643 /* Avoid reading the active data more than once
2644 by copying it to a temporary. */
2645 STRLEN len;
2646 const char *const pv = SvPV_const(*beglist, len);
2647 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2648 if (SvUTF8(*beglist))
2649 SvUTF8_on(temp);
2650 *beglist = temp;
2651 }
1928d712
NC
2652 count = DO_UTF8(*beglist) ?
2653 sv_len_utf8(*beglist) : sv_len(*beglist);
5466829a 2654 }
1928d712
NC
2655 else count = 0;
2656 if (lookahead.code == 'Z') count++;
2657 }
2658 } else {
2659 if (lookahead.howlen == e_number && lookahead.length < items)
2660 count = lookahead.length;
2661 else count = items;
2662 }
2663 lookahead.howlen = e_number;
2664 lookahead.length = count;
2665 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2666 }
cf168e39 2667
773e5b22
NC
2668 /* Code inside the switch must take care to properly update
2669 cat (CUR length and '\0' termination) if it updated *cur and
3314a176 2670 doesn't simply leave using break */
cc3ea0aa 2671 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2672 default:
3314a176
TH
2673 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2674 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2675 case '%':
cf168e39 2676 Perl_croak(aTHX_ "'%%' may not be used in pack");
3040befa
TH
2677 {
2678 char *from;
2679#ifdef PERL_PACK_CAN_DOT
2680# ifdef PERL_PACK_CAN_SHRIEKSIGN
2681 case '.' | TYPE_IS_SHRIEKING:
2682# endif
2683 case '.':
2684 if (howlen == e_star) from = start;
2685 else if (len == 0) from = cur;
2686 else {
2687 tempsym_t *group = symptr;
2688
2689 while (--len && group) group = group->previous;
2690 from = group ? start + group->strbeg : start;
2691 }
2692 fromstr = NEXTFROM;
2693 len = SvIV(fromstr);
2694 goto resize;
2695#endif
2696#ifdef PERL_PACK_CAN_SHRIEKSIGN
2697 case '@' | TYPE_IS_SHRIEKING:
2698#endif
a6ec74c1 2699 case '@':
3040befa
TH
2700 from = start + symptr->strbeg;
2701#ifdef PERL_PACK_CAN_DOT
2702 resize:
2703#endif
2704#ifdef PERL_PACK_CAN_SHRIEKSIGN
2705 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2706#else /* PERL_PACK_CAN_SHRIEKSIGN */
2707 if (utf8)
2708#endif
2709 if (len >= 0) {
2710 while (len && from < cur) {
2711 from += UTF8SKIP(from);
2712 len--;
2713 }
2714 if (from > cur)
2715 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2716 if (len) {
2717 /* Here we know from == cur */
2718 grow:
2719 GROWING(0, cat, start, cur, len);
2720 Zero(cur, len, char);
2721 cur += len;
2722 } else if (from < cur) {
2723 len = cur - from;
2724 goto shrink;
2725 } else goto no_change;
2726 } else {
2727 cur = from;
2728 len = -len;
2729 goto utf8_shrink;
3314a176 2730 }
3040befa
TH
2731 else {
2732 len -= cur - from;
3314a176 2733 if (len > 0) goto grow;
3040befa 2734 if (len == 0) goto no_change;
773e5b22 2735 len = -len;
3040befa 2736 goto shrink;
3314a176 2737 }
a6ec74c1 2738 break;
3040befa 2739 }
773e5b22 2740 case '(': {
cf168e39 2741 tempsym_t savsym = *symptr;
cc3ea0aa
NC
2742 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2743 symptr->flags |= group_modifiers;
cf168e39
WL
2744 symptr->patend = savsym.grpend;
2745 symptr->level++;
3314a176 2746 symptr->previous = &lookahead;
18529408 2747 while (len--) {
3314a176
TH
2748 U32 was_utf8;
2749 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2750 else symptr->flags &= ~FLAG_PARSE_UTF8;
2751 was_utf8 = SvUTF8(cat);
cf168e39 2752 symptr->patptr = savsym.grpbeg;
3314a176
TH
2753 beglist = pack_rec(cat, symptr, beglist, endlist);
2754#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2755 if (SvUTF8(cat) != was_utf8)
2756 /* This had better be an upgrade while in utf8==0 mode */
2757 utf8 = 1;
2758#endif
2759
cf168e39 2760 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2761 break; /* No way to continue */
2762 }
3314a176
TH
2763 lookahead.flags = symptr->flags & ~group_modifiers;
2764 goto no_change;
18529408 2765 }
62f95557
IZ
2766 case 'X' | TYPE_IS_SHRIEKING:
2767 if (!len) /* Avoid division by 0 */
2768 len = 1;
3314a176
TH
2769 if (utf8) {
2770 char *hop, *last;
2771 I32 l = len;
2772 hop = last = start;
2773 while (hop < cur) {
2774 hop += UTF8SKIP(hop);
2775 if (--l == 0) {
2776 last = hop;
2777 l = len;
2778 }
2779 }
2780 if (last > cur)
2781 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2782 cur = last;
2783 break;
2784 }
2785 len = (cur-start) % len;
62f95557 2786 /* FALL THROUGH */
a6ec74c1 2787 case 'X':
3314a176
TH
2788 if (utf8) {
2789 if (len < 1) goto no_change;
3040befa 2790 utf8_shrink:
3314a176
TH
2791 while (len > 0) {
2792 if (cur <= start)
3040befa
TH
2793 Perl_croak(aTHX_ "'%c' outside of string in pack",
2794 (int) TYPE_NO_MODIFIERS(datumtype));
3314a176
TH
2795 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2796 if (cur <= start)
3040befa
TH
2797 Perl_croak(aTHX_ "'%c' outside of string in pack",
2798 (int) TYPE_NO_MODIFIERS(datumtype));
3314a176
TH
2799 }
2800 len--;
2801 }
2802 } else {
773e5b22 2803 shrink:
3314a176 2804 if (cur - start < len)
3040befa
TH
2805 Perl_croak(aTHX_ "'%c' outside of string in pack",
2806 (int) TYPE_NO_MODIFIERS(datumtype));
3314a176
TH
2807 cur -= len;
2808 }
2809 if (cur < start+symptr->strbeg) {
2810 /* Make sure group starts don't point into the void */
2811 tempsym_t *group;
a4de8f71 2812 const STRLEN length = cur-start;
3314a176
TH
2813 for (group = symptr;
2814 group && length < group->strbeg;
2815 group = group->previous) group->strbeg = length;
2816 lookahead.strbeg = length;
2817 }
a6ec74c1 2818 break;
773e5b22
NC
2819 case 'x' | TYPE_IS_SHRIEKING: {
2820 I32 ai32;
62f95557
IZ
2821 if (!len) /* Avoid division by 0 */
2822 len = 1;
a4de8f71 2823 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
773e5b22
NC
2824 else ai32 = (cur - start) % len;
2825 if (ai32 == 0) goto no_change;
2826 len -= ai32;
2827 }
2828 /* FALL THROUGH */
a6ec74c1 2829 case 'x':
3314a176 2830 goto grow;
a6ec74c1
JH
2831 case 'A':
2832 case 'Z':
3314a176 2833 case 'a': {
e02cb1b1 2834 const char *aptr;
3314a176 2835
a6ec74c1 2836 fromstr = NEXTFROM;
cacbe70b 2837 aptr = (char *) SvPV_const(fromstr, fromlen);
3314a176
TH
2838#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2839 if (DO_UTF8(fromstr)) {
e02cb1b1 2840 const char *end, *s;
3314a176
TH
2841
2842 if (!utf8 && !SvUTF8(cat)) {
2843 marked_upgrade(aTHX_ cat, symptr);
2844 lookahead.flags |= FLAG_DO_UTF8;
2845 lookahead.strbeg = symptr->strbeg;
2846 utf8 = 1;
2847 start = SvPVX(cat);
2848 cur = start + SvCUR(cat);
2849 }
773e5b22 2850 if (howlen == e_star) {
3314a176
TH
2851 if (utf8) goto string_copy;
2852 len = fromlen+1;
2853 }
2854 s = aptr;
2855 end = aptr + fromlen;
2856 fromlen = datumtype == 'Z' ? len-1 : len;
2857 while ((I32) fromlen > 0 && s < end) {
2858 s += UTF8SKIP(s);
2859 fromlen--;
2860 }
2861 if (s > end)
2862 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2863 if (utf8) {
773e5b22 2864 len = fromlen;
3314a176
TH
2865 if (datumtype == 'Z') len++;
2866 fromlen = s-aptr;
2867 len += fromlen;
773e5b22 2868
3314a176 2869 goto string_copy;
773e5b22 2870 }
3314a176
TH
2871 fromlen = len - fromlen;
2872 if (datumtype == 'Z') fromlen--;
2873 if (howlen == e_star) {
2874 len = fromlen;
2875 if (datumtype == 'Z') len++;
773e5b22 2876 }
3314a176 2877 GROWING(0, cat, start, cur, len);
773e5b22 2878 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
3314a176
TH
2879 datumtype | TYPE_IS_PACK))
2880 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2881 cur += fromlen;
a6ec74c1 2882 len -= fromlen;
3314a176
TH
2883 } else if (utf8) {
2884 if (howlen == e_star) {
2885 len = fromlen;
2886 if (datumtype == 'Z') len++;
a6ec74c1 2887 }
3314a176
TH
2888 if (len <= (I32) fromlen) {
2889 fromlen = len;
2890 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2891 }
773e5b22
NC
2892 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2893 upgrade, so:
2894 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2895 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
3314a176
TH
2896 len -= fromlen;
2897 while (fromlen > 0) {
a4de8f71 2898 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
3314a176
TH
2899 aptr++;
2900 fromlen--;
773e5b22 2901 }
3314a176
TH
2902 } else
2903#endif
2904 {
2905#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2906 string_copy:
2907#endif
2908 if (howlen == e_star) {
2909 len = fromlen;
2910 if (datumtype == 'Z') len++;
a6ec74c1 2911 }
3314a176
TH
2912 if (len <= (I32) fromlen) {
2913 fromlen = len;
2914 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2915 }
2916 GROWING(0, cat, start, cur, len);
2917 Copy(aptr, cur, fromlen, char);
2918 cur += fromlen;
2919 len -= fromlen;
a6ec74c1 2920 }
3314a176
TH
2921 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2922 cur += len;
a6ec74c1 2923 break;
3314a176 2924 }
a6ec74c1 2925 case 'B':
3314a176 2926 case 'b': {
e02cb1b1 2927 const char *str, *end;
3314a176
TH
2928 I32 l, field_len;
2929 U8 bits;
2930 bool utf8_source;
2931 U32 utf8_flags;
a6ec74c1 2932
773e5b22 2933 fromstr = NEXTFROM;
e02cb1b1 2934 str = SvPV_const(fromstr, fromlen);
3314a176
TH
2935 end = str + fromlen;
2936#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2937 if (DO_UTF8(fromstr)) {
2938 utf8_source = TRUE;
bc743096 2939 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
3314a176
TH
2940 } else
2941#endif
2942 {
2943 utf8_source = FALSE;
2944 utf8_flags = 0; /* Unused, but keep compilers happy */
2945 }
2946 if (howlen == e_star) len = fromlen;
2947 field_len = (len+7)/8;
2948 GROWING(utf8, cat, start, cur, field_len);
2949 if (len > (I32)fromlen) len = fromlen;
2950 bits = 0;
2951 l = 0;
2952 if (datumtype == 'B')
2953 while (l++ < len) {
2954 if (utf8_source) {
2955 UV val;
2956 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2957 bits |= val & 1;
2958 } else bits |= *str++ & 1;
2959 if (l & 7) bits <<= 1;
773e5b22 2960 else {
3314a176
TH
2961 PUSH_BYTE(utf8, cur, bits);
2962 bits = 0;
a6ec74c1
JH
2963 }
2964 }
3314a176
TH
2965 else
2966 /* datumtype == 'b' */
2967 while (l++ < len) {
2968 if (utf8_source) {
2969 UV val;
2970 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2971 if (val & 1) bits |= 0x80;
2972 } else if (*str++ & 1)
2973 bits |= 0x80;
2974 if (l & 7) bits >>= 1;
773e5b22 2975 else {
3314a176
TH
2976 PUSH_BYTE(utf8, cur, bits);
2977 bits = 0;
a6ec74c1
JH
2978 }
2979 }
3314a176
TH
2980 l--;
2981 if (l & 7) {
773e5b22 2982 if (datumtype == 'B')
3314a176 2983 bits <<= 7 - (l & 7);
773e5b22 2984 else
3314a176
TH
2985 bits >>= 7 - (l & 7);
2986 PUSH_BYTE(utf8, cur, bits);
2987 l += 7;
a6ec74c1 2988 }
3314a176
TH
2989 /* Determine how many chars are left in the requested field */
2990 l /= 8;
2991 if (howlen == e_star) field_len = 0;
2992 else field_len -= l;
2993 Zero(cur, field_len, char);
2994 cur += field_len;
a6ec74c1 2995 break;
3314a176 2996 }
a6ec74c1 2997 case 'H':
3314a176
TH
2998 case 'h': {
2999 char *str, *end;
3000 I32 l, field_len;
3001 U8 bits;
3002 bool utf8_source;
3003 U32 utf8_flags;
a6ec74c1 3004
773e5b22
NC
3005 fromstr = NEXTFROM;
3006 str = SvPV(fromstr, fromlen);
3314a176
TH
3007 end = str + fromlen;
3008#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3009 if (DO_UTF8(fromstr)) {
3010 utf8_source = TRUE;
bc743096 3011 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
3314a176
TH
3012 } else
3013#endif
3014 {
3015 utf8_source = FALSE;
3016 utf8_flags = 0; /* Unused, but keep compilers happy */
3017 }
3018 if (howlen == e_star) len = fromlen;
3019 field_len = (len+1)/2;
3020 GROWING(utf8, cat, start, cur, field_len);
3021 if (!utf8 && len > (I32)fromlen) len = fromlen;
3022 bits = 0;
3023 l = 0;
3024 if (datumtype == 'H')
3025 while (l++ < len) {
3026 if (utf8_source) {
3027 UV val;
3028 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
3029 if (val < 256 && isALPHA(val))
3030 bits |= (val + 9) & 0xf;
a6ec74c1 3031 else
3314a176
TH
3032 bits |= val & 0xf;
3033 } else if (isALPHA(*str))
3034 bits |= (*str++ + 9) & 0xf;
3035 else
3036 bits |= *str++ & 0xf;
3037 if (l & 1) bits <<= 4;
773e5b22 3038 else {
3314a176
TH
3039 PUSH_BYTE(utf8, cur, bits);
3040 bits = 0;
a6ec74c1
JH
3041 }
3042 }
3314a176
TH
3043 else
3044 while (l++ < len) {
3045 if (utf8_source) {
3046 UV val;
3047 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
3048 if (val < 256 && isALPHA(val))
3049 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 3050 else
3314a176
TH
3051 bits |= (val & 0xf) << 4;
3052 } else if (isALPHA(*str))
3053 bits |= ((*str++ + 9) & 0xf) << 4;
3054 else
3055 bits |= (*str++ & 0xf) << 4;
3056 if (l & 1) bits >>= 4;
773e5b22 3057 else {
3314a176
TH
3058 PUSH_BYTE(utf8, cur, bits);
3059 bits = 0;
a6ec74c1 3060 }
773e5b22 3061 }
3314a176
TH
3062 l--;
3063 if (l & 1) {
3064 PUSH_BYTE(utf8, cur, bits);
3065 l++;
3066 }
3067 /* Determine how many chars are left in the requested field */
3068 l /= 2;
3069 if (howlen == e_star) field_len = 0;
3070 else field_len -= l;
3071 Zero(cur, field_len, char);
3072 cur += field_len;
3073 break;
773e5b22
NC
3074 }
3075 case 'c':
3314a176
TH
3076 while (len-- > 0) {
3077 IV aiv;
3078 fromstr = NEXTFROM;
3079 aiv = SvIV(fromstr);
3080 if ((-128 > aiv || aiv > 127) &&
3081 ckWARN(WARN_PACK))
3082 Perl_warner(aTHX_ packWARN(WARN_PACK),
3083 "Character in 'c' format wrapped in pack");
820b8518 3084 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
3085 }
3086 break;
3087 case 'C':
3314a176
TH
3088 if (len == 0) {
3089#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3090 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3091#endif
3092 break;
3093 }
3094 GROWING(0, cat, start, cur, len);
a6ec74c1 3095 while (len-- > 0) {
3314a176 3096 IV aiv;
a6ec74c1 3097 fromstr = NEXTFROM;
3314a176
TH
3098 aiv = SvIV(fromstr);
3099 if ((0 > aiv || aiv > 0xff) &&
773e5b22
NC
3100 ckWARN(WARN_PACK))
3101 Perl_warner(aTHX_ packWARN(WARN_PACK),
3102 "Character in 'C' format wrapped in pack");
820b8518 3103 *cur++ = (char)(aiv & 0xff);
3314a176 3104 }
773e5b22 3105 break;
3314a176 3106#ifdef PERL_PACK_CAN_W
773e5b22
NC
3107 case 'W': {
3108 char *end;
3109 U8 in_bytes = IN_BYTES;
3110
3111 end = start+SvLEN(cat)-1;
3112 if (utf8) end -= UTF8_MAXLEN-1;
3113 while (len-- > 0) {
3114 UV auv;
3115 fromstr = NEXTFROM;
3116 auv = SvUV(fromstr);
3117 if (in_bytes) auv = auv % 0x100;
3118 if (utf8) {
3119 W_utf8:
3120 if (cur > end) {
3121 *cur = '\0';
3122 SvCUR(cat) = cur - start;
3123
3124 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3125 end = start+SvLEN(cat)-UTF8_MAXLEN;
3126 }
a4de8f71
NC
3127 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3128 NATIVE_TO_UNI(auv),
bc743096 3129 warn_utf8 ?
a4de8f71 3130 0 : UNICODE_ALLOW_ANY);
773e5b22
NC
3131 } else {
3132 if (auv >= 0x100) {
3133 if (!SvUTF8(cat)) {
3134 *cur = '\0';
3135 SvCUR(cat) = cur - start;
3136 marked_upgrade(aTHX_ cat, symptr);
3137 lookahead.flags |= FLAG_DO_UTF8;
3138 lookahead.strbeg = symptr->strbeg;
3139 utf8 = 1;
3140 start = SvPVX(cat);
3141 cur = start + SvCUR(cat);
3142 end = start+SvLEN(cat)-UTF8_MAXLEN;
3143 goto W_utf8;
3144 }
3145 if (ckWARN(WARN_PACK))
3146 Perl_warner(aTHX_ packWARN(WARN_PACK),
3147 "Character in 'W' format wrapped in pack");
3148 auv &= 0xff;
3149 }
3150 if (cur >= end) {
3151 *cur = '\0';
3152 SvCUR(cat) = cur - start;
3153 GROWING(0, cat, start, cur, len+1);
3154 end = start+SvLEN(cat)-1;
3155 }
3156 *(U8 *) cur++ = auv;
a6ec74c1
JH
3157 }
3158 }
3159 break;
773e5b22 3160 }
3314a176 3161#endif
773e5b22
NC
3162 case 'U': {
3163 char *end;
3314a176 3164
773e5b22 3165 if (len == 0) {
3314a176 3166#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
773e5b22
NC
3167 if (!(symptr->flags & FLAG_DO_UTF8)) {
3168 marked_upgrade(aTHX_ cat, symptr);
3169 lookahead.flags |= FLAG_DO_UTF8;
3170 lookahead.strbeg = symptr->strbeg;
3171 }
3172 utf8 = 0;
3173#endif
3174 goto no_change;
3175 }
3176
3177 end = start+SvLEN(cat);
3178 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3179 while (len-- > 0) {
773e5b22 3180 UV auv;
a6ec74c1 3181 fromstr = NEXTFROM;
773e5b22
NC
3182 auv = SvUV(fromstr);
3183 if (utf8) {
a4de8f71 3184 U8 buffer[UTF8_MAXLEN], *endb;
773e5b22 3185 endb = uvuni_to_utf8_flags(buffer, auv,
bc743096 3186 warn_utf8 ?
773e5b22
NC
3187 0 : UNICODE_ALLOW_ANY);
3188 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3189 *cur = '\0';
3190 SvCUR(cat) = cur - start;
3191 GROWING(0, cat, start, cur,
3192 len+(endb-buffer)*UTF8_EXPAND);
3193 end = start+SvLEN(cat);
3194 }
3195 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3196 } else {
3197 if (cur >= end) {
3198 *cur = '\0';
3199 SvCUR(cat) = cur - start;
3200 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3201 end = start+SvLEN(cat)-UTF8_MAXLEN;
3202 }
a4de8f71 3203 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
bc743096 3204 warn_utf8 ?
a4de8f71 3205 0 : UNICODE_ALLOW_ANY);
773e5b22 3206 }
a6ec74c1 3207 }
a6ec74c1 3208 break;
773e5b22 3209 }
a6ec74c1
JH
3210 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3211 case 'f':
a6ec74c1 3212 while (len-- > 0) {
3314a176
TH
3213 float afloat;
3214 NV anv;
a6ec74c1 3215 fromstr = NEXTFROM;
3314a176 3216 anv = SvNV(fromstr);
5cdb9e01 3217#ifdef __VOS__
3314a176 3218 /* VOS does not automatically map a floating-point overflow
773e5b22
NC
3219 during conversion from double to float into infinity, so we
3220 do it by hand. This code should either be generalized for
3221 any OS that needs it, or removed if and when VOS implements
3222 posix-976 (suggestion to support mapping to infinity).
3223 Paul.Green@stratus.com 02-04-02. */
3314a176 3224 if (anv > FLT_MAX)
773e5b22 3225 afloat = _float_constants[0]; /* single prec. inf. */
3314a176 3226 else if (anv < -FLT_MAX)
773e5b22 3227 afloat = _float_constants[0]; /* single prec. inf. */
3314a176
TH
3228 else afloat = (float) anv;
3229#else /* __VOS__ */
baf3cf9c 3230# if defined(VMS) && !defined(__IEEE_FP)
3314a176 3231 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
773e5b22
NC
3232 * on Alpha; fake it if we don't have them.
3233 */
3314a176 3234 if (anv > FLT_MAX)
773e5b22 3235 afloat = FLT_MAX;
3314a176 3236 else if (anv < -FLT_MAX)
773e5b22 3237 afloat = -FLT_MAX;
3314a176 3238 else afloat = (float)anv;
baf3cf9c 3239# else
3314a176 3240 afloat = (float)anv;
baf3cf9c 3241# endif
3314a176 3242#endif /* __VOS__ */
cc3ea0aa 3243 DO_BO_PACK_N(afloat, float);
3314a176 3244 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3245 }
3246 break;
3247 case 'd':
a6ec74c1 3248 while (len-- > 0) {
3314a176
TH
3249 double adouble;
3250 NV anv;
a6ec74c1 3251 fromstr = NEXTFROM;
3314a176 3252 anv = SvNV(fromstr);
5cdb9e01 3253#ifdef __VOS__
3314a176 3254 /* VOS does not automatically map a floating-point overflow
773e5b22
NC
3255 during conversion from long double to double into infinity,
3256 so we do it by hand. This code should either be generalized
3257 for any OS that needs it, or removed if and when VOS
3258 implements posix-976 (suggestion to support mapping to
3259 infinity). Paul.Green@stratus.com 02-04-02. */
3314a176 3260 if (anv > DBL_MAX)
773e5b22 3261 adouble = _double_constants[0]; /* double prec. inf. */
3314a176 3262 else if (anv < -DBL_MAX)
773e5b22 3263 adouble = _double_constants[0]; /* double prec. inf. */
3314a176
TH
3264 else adouble = (double) anv;
3265#else /* __VOS__ */
baf3cf9c 3266# if defined(VMS) && !defined(__IEEE_FP)
3314a176 3267 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
773e5b22
NC
3268 * on Alpha; fake it if we don't have them.
3269 */
3314a176 3270 if (anv > DBL_MAX)
773e5b22 3271 adouble = DBL_MAX;
3314a176 3272 else if (anv < -DBL_MAX)
773e5b22 3273 adouble = -DBL_MAX;
3314a176 3274 else adouble = (double)anv;
baf3cf9c 3275# else
3314a176 3276 adouble = (double)anv;
baf3cf9c 3277# endif
3314a176 3278#endif /* __VOS__ */
cc3ea0aa 3279 DO_BO_PACK_N(adouble, double);
3314a176 3280 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3281 }
3282 break;
773e5b22
NC
3283 case 'F': {
3284 NV anv;
cc3ea0aa 3285 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3286 while (len-- > 0) {
3287 fromstr = NEXTFROM;
3288 anv = SvNV(fromstr);
cc3ea0aa 3289 DO_BO_PACK_N(anv, NV);
773e5b22 3290 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3291 }
3292 break;
773e5b22 3293 }
92d41999 3294#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
773e5b22
NC
3295 case 'D': {
3296 long double aldouble;
cc3ea0aa
NC
3297 /* long doubles can have unused bits, which may be nonzero */
3298 Zero(&aldouble, 1, long double);
92d41999
JH
3299 while (len-- > 0) {
3300 fromstr = NEXTFROM;
3301 aldouble = (long double)SvNV(fromstr);
cc3ea0aa 3302 DO_BO_PACK_N(aldouble, long double);
773e5b22 3303 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3304 }
3305 break;
773e5b22 3306 }
92d41999 3307#endif
cc3ea0aa
NC
3308#ifdef PERL_PACK_CAN_SHRIEKSIGN
3309 case 'n' | TYPE_IS_SHRIEKING:
3310#endif
a6ec74c1
JH
3311 case 'n':
3312 while (len-- > 0) {
3314a176 3313 I16 ai16;
a6ec74c1 3314 fromstr = NEXTFROM;
cc3ea0aa 3315 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3316#ifdef HAS_HTONS
cc3ea0aa 3317 ai16 = PerlSock_htons(ai16);
a6ec74c1 3318#endif
3314a176 3319 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3320 }
3321 break;
cc3ea0aa
NC
3322#ifdef PERL_PACK_CAN_SHRIEKSIGN
3323 case 'v' | TYPE_IS_SHRIEKING:
3324#endif
a6ec74c1
JH
3325 case 'v':
3326 while (len-- > 0) {
3314a176 3327 I16 ai16;
a6ec74c1 3328 fromstr = NEXTFROM;
cc3ea0aa 3329 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3330#ifdef HAS_HTOVS
cc3ea0aa 3331 ai16 = htovs(ai16);
a6ec74c1 3332#endif
3314a176 3333 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3334 }
3335 break;
cf168e39 3336 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3337#if SHORTSIZE != SIZE16
773e5b22 3338 while (len-- > 0) {
3314a176 3339 unsigned short aushort;
773e5b22
NC
3340 fromstr = NEXTFROM;
3341 aushort = SvUV(fromstr);
3342 DO_BO_PACK(aushort, s);
3314a176 3343 PUSH_VAR(utf8, cur, aushort);
773e5b22 3344 }
cf168e39
WL
3345 break;
3346#else
3347 /* Fall through! */
a6ec74c1 3348#endif
cf168e39 3349 case 'S':
773e5b22 3350 while (len-- > 0) {
3314a176 3351 U16 au16;
773e5b22
NC
3352 fromstr = NEXTFROM;
3353 au16 = (U16)SvUV(fromstr);
3354 DO_BO_PACK(au16, 16);
3314a176 3355 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3356 }
3357 break;
cf168e39 3358 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3359#if SHORTSIZE != SIZE16
773e5b22 3360 while (len-- > 0) {
3314a176 3361 short ashort;
773e5b22
NC
3362 fromstr = NEXTFROM;
3363 ashort = SvIV(fromstr);
3364 DO_BO_PACK(ashort, s);
3314a176 3365 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3366 }
cf168e39
WL
3367 break;
3368#else
3369 /* Fall through! */
a6ec74c1 3370#endif
cf168e39
WL
3371 case 's':
3372 while (len-- > 0) {
3314a176 3373 I16 ai16;
cf168e39 3374 fromstr = NEXTFROM;
cc3ea0aa
NC
3375 ai16 = (I16)SvIV(fromstr);
3376 DO_BO_PACK(ai16, 16);
3314a176 3377 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3378 }
3379 break;
3380 case 'I':
cf168e39 3381 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3382 while (len-- > 0) {
3314a176 3383 unsigned int auint;
a6ec74c1
JH
3384 fromstr = NEXTFROM;
3385 auint = SvUV(fromstr);
cc3ea0aa 3386 DO_BO_PACK(auint, i);
3314a176 3387 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3388 }
3389 break;
92d41999
JH
3390 case 'j':
3391 while (len-- > 0) {
3314a176 3392 IV aiv;
92d41999
JH
3393 fromstr = NEXTFROM;
3394 aiv = SvIV(fromstr);
cc3ea0aa
NC
3395#if IVSIZE == INTSIZE
3396 DO_BO_PACK(aiv, i);
3397#elif IVSIZE == LONGSIZE
3398 DO_BO_PACK(aiv, l);
3399#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3400 DO_BO_PACK(aiv, 64);
3314a176
TH
3401#else
3402 Perl_croak(aTHX_ "'j' not supported on this platform");
cc3ea0aa 3403#endif
3314a176 3404 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3405 }
3406 break;
3407 case 'J':
3408 while (len-- > 0) {
3314a176 3409 UV auv;
92d41999
JH
3410 fromstr = NEXTFROM;
3411 auv = SvUV(fromstr);
cc3ea0aa
NC
3412#if UVSIZE == INTSIZE
3413 DO_BO_PACK(auv, i);
3414#elif UVSIZE == LONGSIZE
3415 DO_BO_PACK(auv, l);
3416#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3417 DO_BO_PACK(auv, 64);
3314a176
TH
3418#else
3419 Perl_croak(aTHX_ "'J' not supported on this platform");
cc3ea0aa 3420#endif
3314a176 3421 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3422 }
3423 break;
a6ec74c1
JH
3424 case 'w':
3425 while (len-- > 0) {
3314a176 3426 NV anv;
a6ec74c1 3427 fromstr = NEXTFROM;
15e9f109 3428 anv = SvNV(fromstr);
a6ec74c1 3429
3314a176
TH
3430 if (anv < 0) {
3431 *cur = '\0';
3432 SvCUR(cat) = cur - start;
cf168e39 3433 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3314a176 3434 }
a6ec74c1 3435
196b62db
NC
3436 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3437 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3438 any negative IVs will have already been got by the croak()
3439 above. IOK is untrue for fractions, so we test them
3440 against UV_MAX_P1. */
3314a176
TH
3441 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3442 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3443 char *in = buf + sizeof(buf);
196b62db 3444 UV auv = SvUV(fromstr);
a6ec74c1
JH
3445
3446 do {
eb160463 3447 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3448 auv >>= 7;
3449 } while (auv);
3450 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3314a176
TH
3451 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3452 in, (buf + sizeof(buf)) - in);
3453 } else if (SvPOKp(fromstr))
3454 goto w_string;
a6ec74c1 3455 else if (SvNOKp(fromstr)) {
5b7ea690
JH
3456 /* 10**NV_MAX_10_EXP is the largest power of 10
3457 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3458 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3459 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3460 And with that many bytes only Inf can overflow.
c17baaa1
PG
3461 Some C compilers are strict about integral constant
3462 expressions so we conservatively divide by a slightly
3463 smaller integer instead of multiplying by the exact
3464 floating-point value.
5b7ea690
JH
3465 */
3466#ifdef NV_MAX_10_EXP
3314a176 3467 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
c17baaa1 3468 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
5b7ea690 3469#else
3314a176 3470 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
c17baaa1 3471 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
5b7ea690 3472#endif
a6ec74c1
JH
3473 char *in = buf + sizeof(buf);
3474
e02cb1b1 3475 anv = Perl_floor(anv);
a6ec74c1 3476 do {
e02cb1b1 3477 const NV next = Perl_floor(anv / 128);
a6ec74c1 3478 if (in <= buf) /* this cannot happen ;-) */
cf168e39 3479 Perl_croak(aTHX_ "Cannot compress integer in pack");
5b7ea690 3480 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3481 anv = next;
3482 } while (anv > 0);
a6ec74c1 3483 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3314a176
TH
3484 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3485 in, (buf + sizeof(buf)) - in);
3486 } else {
cacbe70b
NC
3487 const char *from;
3488 char *result, *in;
735b914b
JH
3489 SV *norm;
3490 STRLEN len;
3491 bool done;
3492
3314a176 3493 w_string:
735b914b 3494 /* Copy string and check for compliance */
23c2bb70 3495 from = SvPV_const(fromstr, len);
735b914b 3496 if ((norm = is_an_int(from, len)) == NULL)
cf168e39 3497 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3498
cd7a8267 3499 Newx(result, len, char);
735b914b
JH
3500 in = result + len;
3501 done = FALSE;
3314a176 3502 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3503 result[len - 1] &= 0x7F; /* clear continue bit */
3314a176
TH
3504 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3505 in, (result + len) - in);
735b914b
JH
3506 Safefree(result);
3507 SvREFCNT_dec(norm); /* free norm */
773e5b22 3508 }
a6ec74c1
JH
3509 }
3510 break;
3511 case 'i':
cf168e39 3512 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3513 while (len-- > 0) {
3314a176 3514 int aint;
a6ec74c1
JH
3515 fromstr = NEXTFROM;
3516 aint = SvIV(fromstr);
cc3ea0aa 3517 DO_BO_PACK(aint, i);
3314a176 3518 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3519 }
3520 break;
cc3ea0aa
NC
3521#ifdef PERL_PACK_CAN_SHRIEKSIGN
3522 case 'N' | TYPE_IS_SHRIEKING:
3523#endif
a6ec74c1
JH
3524 case 'N':
3525 while (len-- > 0) {
3314a176 3526 U32 au32;
a6ec74c1 3527 fromstr = NEXTFROM;
cc3ea0aa 3528 au32 = SvUV(fromstr);
a6ec74c1 3529#ifdef HAS_HTONL
cc3ea0aa 3530 au32 = PerlSock_htonl(au32);
a6ec74c1 3531#endif
3314a176 3532 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3533 }
3534 break;
cc3ea0aa
NC
3535#ifdef PERL_PACK_CAN_SHRIEKSIGN
3536 case 'V' | TYPE_IS_SHRIEKING:
3537#endif
a6ec74c1
JH
3538 case 'V':
3539 while (len-- > 0) {
3314a176 3540 U32 au32;
a6ec74c1 3541 fromstr = NEXTFROM;
cc3ea0aa 3542 au32 = SvUV(fromstr);
a6ec74c1 3543#ifdef HAS_HTOVL
cc3ea0aa 3544 au32 = htovl(au32);
a6ec74c1 3545#endif
3314a176 3546 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3547 }
3548 break;
cf168e39 3549 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3550#if LONGSIZE != SIZE32
773e5b22 3551 while (len-- > 0) {
3314a176 3552 unsigned long aulong;
773e5b22
NC
3553 fromstr = NEXTFROM;
3554 aulong = SvUV(fromstr);
3555 DO_BO_PACK(aulong, l);
3314a176 3556 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3557 }
cf168e39
WL
3558 break;
3559#else
3560 /* Fall though! */
a6ec74c1 3561#endif
cf168e39 3562 case 'L':
773e5b22 3563 while (len-- > 0) {
3314a176 3564 U32 au32;
773e5b22
NC
3565 fromstr = NEXTFROM;
3566 au32 = SvUV(fromstr);
3567 DO_BO_PACK(au32, 32);
3314a176 3568 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3569 }
3570 break;
cf168e39 3571 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3572#if LONGSIZE != SIZE32
773e5b22 3573 while (len-- > 0) {
3314a176 3574 long along;
773e5b22
NC
3575 fromstr = NEXTFROM;
3576 along = SvIV(fromstr);
3577 DO_BO_PACK(along, l);
3314a176 3578 PUSH_VAR(utf8, cur, along);
a6ec74c1 3579 }
cf168e39
WL
3580 break;
3581#else
3582 /* Fall though! */
a6ec74c1 3583#endif
cf168e39
WL
3584 case 'l':
3585 while (len-- > 0) {
3314a176 3586 I32 ai32;
cf168e39 3587 fromstr = NEXTFROM;
cc3ea0aa
NC
3588 ai32 = SvIV(fromstr);
3589 DO_BO_PACK(ai32, 32);
3314a176 3590 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3591 }
3592 break;
3593#ifdef HAS_QUAD
3594 case 'Q':
3595 while (len-- > 0) {
3314a176 3596 Uquad_t auquad;
a6ec74c1 3597 fromstr = NEXTFROM;
3314a176 3598 auquad = (Uquad_t) SvUV(fromstr);
cc3ea0aa 3599 DO_BO_PACK(auquad, 64);
3314a176 3600 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3601 }
3602 break;
3603 case 'q':
3604 while (len-- > 0) {
3314a176 3605 Quad_t aquad;
a6ec74c1
JH
3606 fromstr = NEXTFROM;
3607 aquad = (Quad_t)SvIV(fromstr);
cc3ea0aa 3608 DO_BO_PACK(aquad, 64);
3314a176 3609 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3610 }
3611 break;
3314a176 3612#endif /* HAS_QUAD */
a6ec74c1
JH
3613 case 'P':
3614 len = 1; /* assume SV is correct length */
3314a176 3615 GROWING(utf8, cat, start, cur, sizeof(char *));
cf168e39 3616 /* Fall through! */
a6ec74c1
JH
3617 case 'p':
3618 while (len-- > 0) {
3314a176
TH
3619 char *aptr;
3620
a6ec74c1 3621 fromstr = NEXTFROM;
0bc88fab
NC
3622 SvGETMAGIC(fromstr);
3623 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3624 else {
3625 STRLEN n_a;
3626 /* XXX better yet, could spirit away the string to
3627 * a safe spot and hang on to it until the result
3628 * of pack() (and all copies of the result) are
3629 * gone.
3630 */
bc743096
AL
3631 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3632 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3633 Perl_warner(aTHX_ packWARN(WARN_PACK),
773e5b22 3634 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3635 }
3636 if (SvPOK(fromstr) || SvNIOK(fromstr))
cacbe70b 3637 aptr = (char *) SvPV_nomg_const(fromstr, n_a);
a6ec74c1 3638 else
0bc88fab 3639 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3640 }
206b424e 3641 DO_BO_PACK_PC(aptr);
3314a176 3642 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3643 }
3644 break;
3314a176 3645 case 'u': {
e02cb1b1 3646 const char *aptr, *aend;
3314a176 3647#ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
773e5b22 3648 const bool from_utf8 = FALSE;
3314a176 3649#else
773e5b22 3650 bool from_utf8;
3314a176
TH
3651#endif
3652
a6ec74c1 3653 fromstr = NEXTFROM;
773e5b22
NC
3654 if (len <= 2) len = 45;
3655 else len = len / 3 * 3;
3656 if (len >= 64) {
820b8518
NC
3657 if (ckWARN(WARN_PACK))
3658 Perl_warner(aTHX_ packWARN(WARN_PACK),
773e5b22
NC
3659 "Field too wide in 'u' format in pack");
3660 len = 63;
3661 }
cacbe70b 3662 aptr = (char *) SvPV_const(fromstr, fromlen);
3314a176 3663#ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
773e5b22
NC
3664 from_utf8 = DO_UTF8(fromstr);
3665 if (from_utf8) {
3666 aend = aptr + fromlen;
3667 fromlen = sv_len_utf8(fromstr);
3668 } else
3669#endif
3670 {
3671 aend = NULL; /* Unused, but keep compilers happy */
3672 }
3673 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3674
a6ec74c1 3675 while (fromlen > 0) {
773e5b22 3676 U8 *end;
a6ec74c1 3677 I32 todo;
773e5b22 3678 U8 hunk[1+63/3*4+1];
a6ec74c1 3679
eb160463 3680 if ((I32)fromlen > len)
a6ec74c1
JH
3681 todo = len;
3682 else
3683 todo = fromlen;
773e5b22
NC
3684 if (from_utf8) {
3685 char buffer[64];
3686 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3687 'u' | TYPE_IS_PACK)) {
3688 *cur = '\0';
3689 SvCUR(cat) = cur - start;
3690 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3691 }
3692 end = doencodes(hunk, buffer, todo);
3693 } else {
3694 end = doencodes(hunk, aptr, todo);
3695 aptr += todo;
3696 }
3697 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3698 fromlen -= todo;
3699 }
a6ec74c1
JH
3700 break;
3701 }
3314a176
TH
3702 }
3703 *cur = '\0';
3704 SvCUR(cat) = cur - start;
3705 no_change:
cf168e39 3706 *symptr = lookahead;
a6ec74c1 3707 }
cf168e39 3708 return beglist;
18529408
IZ
3709}
3710#undef NEXTFROM
3711
3712
3713PP(pp_pack)
3714{
3715 dSP; dMARK; dORIGMARK; dTARGET;
3716 register SV *cat = TARG;
3717 STRLEN fromlen;
23c2bb70
NC
3718 SV *pat_sv = *++MARK;
3719 register const char *pat = SvPV_const(pat_sv, fromlen);
8ec8dcb0 3720 register const char *patend = pat + fromlen;
18529408
IZ
3721
3722 MARK++;
3723 sv_setpvn(cat, "", 0);
3314a176 3724 SvUTF8_off(cat);
18529408 3725
8ec8dcb0 3726 packlist(cat, (char *) pat, (char *) patend, MARK, SP + 1);
18529408 3727
a6ec74c1
JH
3728 SvSETMAGIC(cat);
3729 SP = ORIGMARK;
3730 PUSHs(cat);
3731 RETURN;
3732}
a6ec74c1 3733
583439ab
NC
3734/*
3735 * Local variables:
3736 * c-indentation-style: bsd
3737 * c-basic-offset: 4
3738 * indent-tabs-mode: t
3739 * End:
3740 *
d8294a4d
NC
3741 * ex: set ts=8 sts=4 sw=4 noet:
3742 */