This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: unpack A strip patch
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
f337b084
TH
35# define PERL_PACK_CAN_BYTEORDER
36# define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
39#ifndef CHAR_BIT
40# define CHAR_BIT 8
7212898e 41#endif
3473cf63
RGS
42/* Maximum number of bytes to which a byte can grow due to upgrade */
43#define UTF8_EXPAND 2
7212898e 44
a6ec74c1 45/*
a6ec74c1
JH
46 * Offset for integer pack/unpack.
47 *
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
50 */
51
52/*
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
58 */
59/*
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
62 */
63#define SIZE16 2
64#define SIZE32 4
65
66/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 --jhi Feb 1999 */
68
1109a392
MHM
69#if U16SIZE > SIZE16 || U32SIZE > SIZE32
70# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
71# define OFF16(p) ((char*)(p))
72# define OFF32(p) ((char*)(p))
a6ec74c1 73# else
1109a392 74# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
75# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
77# else
08ca2aa3 78 ++++ bad cray byte order
a6ec74c1
JH
79# endif
80# endif
a6ec74c1 81#else
08ca2aa3
TH
82# define OFF16(p) ((char *) (p))
83# define OFF32(p) ((char *) (p))
a6ec74c1
JH
84#endif
85
f337b084
TH
86/* Only to be used inside a loop (see the break) */
87#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
88 if (utf8) { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
90 } else { \
91 Copy(s, OFF16(p), SIZE16, char); \
92 (s) += SIZE16; \
93 } \
94} STMT_END
95
96/* Only to be used inside a loop (see the break) */
97#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
98 if (utf8) { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
100 } else { \
101 Copy(s, OFF32(p), SIZE32, char); \
102 (s) += SIZE32; \
103 } \
104} STMT_END
105
106#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
108
109/* Only to be used inside a loop (see the break) */
f337b084 110#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
111STMT_START { \
112 if (utf8) { \
f337b084
TH
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
115 } else { \
116 Copy(s, (char *) &var, sizeof(var), char); \
117 s += sizeof(var); \
118 } \
08ca2aa3
TH
119} STMT_END
120
f337b084
TH
121#define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
123
49704364
LW
124/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125#define MAX_SUB_TEMPLATE_LEVEL 100
126
66c611c5 127/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
128#define FLAG_WAS_UTF8 0x40
129#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 130#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 131#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
132#define FLAG_SLASH 0x04
133#define FLAG_COMMA 0x02
134#define FLAG_PACK 0x01
135
a6ec74c1
JH
136STATIC SV *
137S_mul128(pTHX_ SV *sv, U8 m)
138{
139 STRLEN len;
140 char *s = SvPV(sv, len);
141 char *t;
142 U32 i = 0;
143
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
146
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
149 sv = tmpNew;
150 s = SvPV(sv, len);
151 }
152 t = s + len - 1;
153 while (!*t) /* trailing '\0'? */
154 t--;
155 while (t > s) {
156 i = ((*t - '0') << 7) + m;
eb160463
GS
157 *(t--) = '0' + (char)(i % 10);
158 m = (char)(i / 10);
a6ec74c1
JH
159 }
160 return (sv);
161}
162
163/* Explosives and implosives. */
164
165#if 'I' == 73 && 'J' == 74
166/* On an ASCII/ISO kind of system */
167#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168#else
169/*
170 Some other sort of character set - use memchr() so we don't match
171 the null byte.
172 */
173#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
174#endif
175
66c611c5 176/* type modifiers */
62f95557 177#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
178#define TYPE_IS_BIG_ENDIAN 0x200
179#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 180#define TYPE_IS_PACK 0x800
1109a392 181#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 182#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
183#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
184
7212898e
NC
185#ifdef PERL_PACK_CAN_SHRIEKSIGN
186#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
187#else
188#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
189#endif
190
191#ifndef PERL_PACK_CAN_BYTEORDER
192/* Put "can't" first because it is shorter */
193# define TYPE_ENDIANNESS(t) 0
194# define TYPE_NO_ENDIANNESS(t) (t)
195
196# define ENDIANNESS_ALLOWED_TYPES ""
197
198# define DO_BO_UNPACK(var, type)
199# define DO_BO_PACK(var, type)
200# define DO_BO_UNPACK_PTR(var, type, pre_cast)
201# define DO_BO_PACK_PTR(var, type, pre_cast)
202# define DO_BO_UNPACK_N(var, type)
203# define DO_BO_PACK_N(var, type)
204# define DO_BO_UNPACK_P(var)
205# define DO_BO_PACK_P(var)
66c611c5 206
f337b084 207#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
208
209# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
211
212# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
213
214# define DO_BO_UNPACK(var, type) \
1109a392 215 STMT_START { \
66c611c5 216 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
219 default: break; \
220 } \
221 } STMT_END
222
7212898e 223# define DO_BO_PACK(var, type) \
1109a392 224 STMT_START { \
66c611c5 225 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
228 default: break; \
229 } \
230 } STMT_END
231
7212898e 232# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 233 STMT_START { \
66c611c5 234 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
237 break; \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
240 break; \
241 default: \
242 break; \
243 } \
244 } STMT_END
245
7212898e 246# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 247 STMT_START { \
66c611c5 248 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
251 break; \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
254 break; \
255 default: \
256 break; \
257 } \
258 } STMT_END
259
7212898e 260# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
266 break; \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
270 break; \
271 default: \
272 break; \
273 } \
274 } STMT_END
275
7212898e
NC
276# if PTRSIZE == INTSIZE
277# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279# elif PTRSIZE == LONGSIZE
280# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
282# else
283# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285# endif
1109a392 286
7212898e 287# if defined(my_htolen) && defined(my_letohn) && \
1109a392 288 defined(my_htoben) && defined(my_betohn)
7212898e 289# define DO_BO_UNPACK_N(var, type) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
294 default: break; \
295 } \
296 } STMT_END
297
7212898e 298# define DO_BO_PACK_N(var, type) \
1109a392 299 STMT_START { \
66c611c5 300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
303 default: break; \
304 } \
305 } STMT_END
7212898e
NC
306# else
307# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309# endif
310
f337b084 311#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 312
78d46eaa 313#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 314#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
315#define PACK_SIZE_MASK 0x3F
316
78d46eaa
NC
317/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
319
320#if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
322#endif
323
324typedef U8 packprops_t;
78d46eaa
NC
325#if 'J'-'I' == 1
326/* ASCII */
a7a3cfaa
TH
327const packprops_t packprops[512] = {
328 /* normal */
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 335#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 336 /* D */ LONG_DOUBLESIZE,
80a13697 337#else
a7a3cfaa 338 0,
80a13697 339#endif
a7a3cfaa
TH
340 0,
341 /* F */ NVSIZE,
342 0, 0,
343 /* I */ sizeof(unsigned int),
344 /* J */ UVSIZE,
345 0,
346 /* L */ SIZE32,
347 0,
348 /* N */ SIZE32,
349 0, 0,
80a13697 350#if defined(HAS_QUAD)
a7a3cfaa 351 /* Q */ sizeof(Uquad_t),
80a13697 352#else
a7a3cfaa 353 0,
80a13697 354#endif
a7a3cfaa
TH
355 0,
356 /* S */ SIZE16,
357 0,
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
359 /* V */ SIZE32,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
364 0,
365 /* f */ sizeof(float),
366 0, 0,
367 /* i */ sizeof(int),
368 /* j */ IVSIZE,
369 0,
370 /* l */ SIZE32,
371 0,
372 /* n */ SIZE16,
373 0,
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 375#if defined(HAS_QUAD)
a7a3cfaa 376 /* q */ sizeof(Quad_t),
80a13697 377#else
a7a3cfaa 378 0,
80a13697 379#endif
a7a3cfaa
TH
380 0,
381 /* s */ SIZE16,
382 0, 0,
383 /* v */ SIZE16,
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
394 /* shrieking */
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
401 0, 0,
402 /* L */ sizeof(unsigned long),
403 0,
7212898e 404#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 405 /* N */ SIZE32,
7212898e 406#else
a7a3cfaa 407 0,
7212898e 408#endif
a7a3cfaa
TH
409 0, 0, 0, 0,
410 /* S */ sizeof(unsigned short),
411 0, 0,
7212898e 412#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 413 /* V */ SIZE32,
7212898e 414#else
a7a3cfaa 415 0,
7212898e 416#endif
a7a3cfaa
TH
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
418 0, 0,
419 /* i */ sizeof(int),
420 0, 0,
421 /* l */ sizeof(long),
422 0,
7212898e 423#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 424 /* n */ SIZE16,
7212898e 425#else
a7a3cfaa 426 0,
7212898e 427#endif
a7a3cfaa
TH
428 0, 0, 0, 0,
429 /* s */ sizeof(short),
430 0, 0,
7212898e 431#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 432 /* v */ SIZE16,
7212898e 433#else
a7a3cfaa 434 0,
7212898e 435#endif
a7a3cfaa
TH
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
445};
446#else
447/* EBCDIC (or bust) */
a7a3cfaa
TH
448const packprops_t packprops[512] = {
449 /* normal */
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
461 0,
462 /* f */ sizeof(float),
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0, 0, 0, 0, 0, 0,
466 /* j */ IVSIZE,
467 0,
468 /* l */ SIZE32,
469 0,
470 /* n */ SIZE16,
471 0,
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 473#if defined(HAS_QUAD)
a7a3cfaa 474 /* q */ sizeof(Quad_t),
80a13697 475#else
a7a3cfaa 476 0,
80a13697 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 /* s */ SIZE16,
480 0, 0,
481 /* v */ SIZE16,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 486#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 487 /* D */ LONG_DOUBLESIZE,
80a13697 488#else
a7a3cfaa 489 0,
80a13697 490#endif
a7a3cfaa
TH
491 0,
492 /* F */ NVSIZE,
493 0, 0,
494 /* I */ sizeof(unsigned int),
495 0, 0, 0, 0, 0, 0, 0,
496 /* J */ UVSIZE,
497 0,
498 /* L */ SIZE32,
499 0,
500 /* N */ SIZE32,
501 0, 0,
80a13697 502#if defined(HAS_QUAD)
a7a3cfaa 503 /* Q */ sizeof(Uquad_t),
80a13697 504#else
a7a3cfaa 505 0,
80a13697 506#endif
a7a3cfaa
TH
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 /* S */ SIZE16,
509 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
511 /* V */ SIZE32,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 /* shrieking */
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 /* i */ sizeof(int),
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
528 0,
7212898e 529#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 530 /* n */ SIZE16,
7212898e 531#else
a7a3cfaa 532 0,
7212898e 533#endif
a7a3cfaa
TH
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
536 0, 0,
7212898e 537#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 538 /* v */ SIZE16,
7212898e 539#else
a7a3cfaa 540 0,
7212898e 541#endif
a7a3cfaa
TH
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
548 0,
7212898e 549#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 550 /* N */ SIZE32,
7212898e 551#else
a7a3cfaa 552 0,
7212898e 553#endif
a7a3cfaa
TH
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
556 0, 0,
7212898e 557#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 558 /* V */ SIZE32,
7212898e 559#else
a7a3cfaa 560 0,
7212898e 561#endif
a7a3cfaa
TH
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
564};
565#endif
566
08ca2aa3 567STATIC U8
f337b084 568uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
08ca2aa3
TH
569{
570 UV val;
571 STRLEN retlen;
f337b084
TH
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 580 if (val >= 0x100) {
f337b084 581 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
f337b084 584 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
585 val &= 0xff;
586 }
587 *s += retlen;
588 return val;
589}
590
f337b084
TH
591#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
593 *(U8 *)(s)++)
594
595STATIC bool
f337b084 596uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
597{
598 UV val;
599 STRLEN retlen;
600 char *from = *s;
601 int bad = 0;
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
f337b084 606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
609 bad |= 1;
610 } else from += retlen;
611 if (val >= 0x100) {
612 bad |= 2;
613 val &= 0xff;
614 }
615 *(U8 *)buf++ = val;
616 }
617 /* We have enough characters for the buffer. Did we have problems ? */
618 if (bad) {
619 if (bad & 1) {
620 /* Rewalk the string fragment while warning */
621 char *ptr;
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
f337b084 625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
626 }
627 if (from > end) from = end;
628 }
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
fc241834 633 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
635 }
636 *s = from;
637 return TRUE;
638}
639
640STATIC bool
641next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
642{
643 UV val;
644 STRLEN retlen;
f337b084 645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
648 *out = 0;
649 return FALSE;
650 }
651 *out = PL_uudmap[val] & 077;
f337b084 652 *s += retlen;
08ca2aa3
TH
653 return TRUE;
654}
78d46eaa 655
f337b084
TH
656STATIC void
657bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
660 char *d = *dest;
661 while (start < end) {
662 int length =
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
664 switch(length) {
665 case 1:
666 *d++ = buffer[0];
667 break;
668 case 2:
669 *d++ = buffer[0];
670 *d++ = buffer[1];
671 break;
672 default:
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
674 *start, length);
675 }
676 start++;
677 }
678 *dest = d;
679}
680
681#define PUSH_BYTES(utf8, cur, buf, len) \
682STMT_START { \
683 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
684 else { \
685 Copy(buf, cur, len, char); \
686 (cur) += (len); \
687 } \
688} STMT_END
689
690#define GROWING(utf8, cat, start, cur, in_len) \
691STMT_START { \
692 STRLEN glen = (in_len); \
3473cf63 693 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
697 } \
698} STMT_END
699
700#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
701STMT_START { \
702 STRLEN glen = (in_len); \
703 STRLEN gl = glen; \
3473cf63 704 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
706 *cur = '\0'; \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
710 } \
711 PUSH_BYTES(utf8, cur, buf, glen); \
712} STMT_END
713
714#define PUSH_BYTE(utf8, s, byte) \
715STMT_START { \
716 if (utf8) { \
717 U8 au8 = (byte); \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
720} STMT_END
721
722/* Only to be used inside a loop (see the break) */
723#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
724STMT_START { \
725 STRLEN retlen; \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
729 *cur = '\0'; \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
731 } \
732 str += retlen; \
733} STMT_END
734
206947d2 735/* Returns the sizeof() struct described by pat */
028d1f6d 736STATIC I32
f337b084 737S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 738{
f337b084 739 I32 total = 0;
206947d2 740
49704364 741 while (next_symbol(symptr)) {
f337b084
TH
742 I32 len;
743 int star, size;
f337b084
TH
744
745 switch (symptr->howlen) {
fc241834 746 case e_star:
49704364
LW
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
749 break;
f337b084
TH
750 default:
751 /* e_no_len and e_number */
752 len = symptr->length;
753 break;
49704364
LW
754 }
755
a7a3cfaa 756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697
NC
757 if (!size) {
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
760 default:
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
764 case '@':
765 case '/':
766 case 'U': /* XXXX Is it correct? */
767 case 'w':
768 case 'u':
769 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
770 (int)symptr->code,
771 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
772 case '%':
773 size = 0;
774 break;
775 case '(':
fc241834
RGS
776 {
777 tempsym_t savsym = *symptr;
778 symptr->patptr = savsym.grpbeg;
779 symptr->patend = savsym.grpend;
780 /* XXXX Theoretically, we need to measure many times at
781 different positions, since the subexpression may contain
782 alignment commands, but be not of aligned length.
783 Need to detect this and croak(). */
784 size = measure_struct(symptr);
785 *symptr = savsym;
786 break;
787 }
80a13697
NC
788 case 'X' | TYPE_IS_SHRIEKING:
789 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
790 */
791 if (!len) /* Avoid division by 0 */
792 len = 1;
793 len = total % len; /* Assumed: the start is aligned. */
794 /* FALL THROUGH */
795 case 'X':
796 size = -1;
797 if (total < len)
798 Perl_croak(aTHX_ "'X' outside of string in %s",
799 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
800 break;
801 case 'x' | TYPE_IS_SHRIEKING:
802 if (!len) /* Avoid division by 0 */
803 len = 1;
804 star = total % len; /* Assumed: the start is aligned. */
805 if (star) /* Other portable ways? */
806 len = len - star;
807 else
808 len = 0;
809 /* FALL THROUGH */
810 case 'x':
811 case 'A':
812 case 'Z':
813 case 'a':
80a13697
NC
814 size = 1;
815 break;
816 case 'B':
817 case 'b':
818 len = (len + 7)/8;
819 size = 1;
820 break;
821 case 'H':
822 case 'h':
823 len = (len + 1)/2;
824 size = 1;
825 break;
78d46eaa 826
80a13697
NC
827 case 'P':
828 len = 1;
829 size = sizeof(char*);
78d46eaa
NC
830 break;
831 }
206947d2
IZ
832 }
833 total += len * size;
834 }
835 return total;
836}
837
49704364
LW
838
839/* locate matching closing parenthesis or bracket
840 * returns char pointer to char after match, or NULL
841 */
842STATIC char *
843S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 844{
49704364
LW
845 while (patptr < patend) {
846 char c = *patptr++;
847
848 if (isSPACE(c))
849 continue;
850 else if (c == ender)
851 return patptr-1;
852 else if (c == '#') {
853 while (patptr < patend && *patptr != '\n')
854 patptr++;
855 continue;
856 } else if (c == '(')
857 patptr = group_end(patptr, patend, ')') + 1;
858 else if (c == '[')
859 patptr = group_end(patptr, patend, ']') + 1;
18529408 860 }
49704364
LW
861 Perl_croak(aTHX_ "No group ending character '%c' found in template",
862 ender);
863 return 0;
18529408
IZ
864}
865
49704364
LW
866
867/* Convert unsigned decimal number to binary.
868 * Expects a pointer to the first digit and address of length variable
869 * Advances char pointer to 1st non-digit char and returns number
fc241834 870 */
18529408 871STATIC char *
49704364
LW
872S_get_num(pTHX_ register char *patptr, I32 *lenptr )
873{
874 I32 len = *patptr++ - '0';
875 while (isDIGIT(*patptr)) {
876 if (len >= 0x7FFFFFFF/10)
877 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
878 len = (len * 10) + (*patptr++ - '0');
879 }
880 *lenptr = len;
881 return patptr;
882}
883
884/* The marvellous template parsing routine: Using state stored in *symptr,
885 * locates next template code and count
886 */
887STATIC bool
f337b084 888S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 889{
fc241834
RGS
890 char* patptr = symptr->patptr;
891 char* patend = symptr->patend;
f337b084 892 const char *allowed = "";
49704364
LW
893
894 symptr->flags &= ~FLAG_SLASH;
895
896 while (patptr < patend) {
897 if (isSPACE(*patptr))
898 patptr++;
899 else if (*patptr == '#') {
900 patptr++;
901 while (patptr < patend && *patptr != '\n')
902 patptr++;
903 if (patptr < patend)
904 patptr++;
905 } else {
fc241834 906 /* We should have found a template code */
49704364 907 I32 code = *patptr++ & 0xFF;
66c611c5 908 U32 inherited_modifiers = 0;
49704364
LW
909
910 if (code == ','){ /* grandfather in commas but with a warning */
911 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
912 symptr->flags |= FLAG_COMMA;
913 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
914 "Invalid type ',' in %s",
915 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
916 }
917 continue;
918 }
fc241834 919
49704364 920 /* for '(', skip to ')' */
fc241834 921 if (code == '(') {
49704364
LW
922 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
923 Perl_croak(aTHX_ "()-group starts with a count in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 symptr->grpbeg = patptr;
926 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
927 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
928 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
930 }
931
66c611c5
MHM
932 /* look for group modifiers to inherit */
933 if (TYPE_ENDIANNESS(symptr->flags)) {
934 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
935 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
936 }
937
1109a392
MHM
938 /* look for modifiers */
939 while (patptr < patend) {
1109a392
MHM
940 I32 modifier = 0;
941 switch (*patptr) {
942 case '!':
943 modifier = TYPE_IS_SHRIEKING;
7212898e 944 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 945 break;
7212898e 946#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
947 case '>':
948 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 949 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
950 break;
951 case '<':
952 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 953 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 954 break;
f337b084 955#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392
MHM
956 default:
957 break;
958 }
66c611c5 959
1109a392
MHM
960 if (modifier == 0)
961 break;
66c611c5 962
1109a392
MHM
963 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
964 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
965 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
966
967 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
968 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
969 (int) TYPE_NO_MODIFIERS(code),
970 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
971 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
972 TYPE_ENDIANNESS_MASK)
973 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
974 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
975
1109a392
MHM
976 if (ckWARN(WARN_UNPACK)) {
977 if (code & modifier)
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Duplicate modifier '%c' after '%c' in %s",
980 *patptr, (int) TYPE_NO_MODIFIERS(code),
981 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
982 }
66c611c5 983
1109a392
MHM
984 code |= modifier;
985 patptr++;
49704364
LW
986 }
987
66c611c5
MHM
988 /* inherit modifiers */
989 code |= inherited_modifiers;
990
fc241834 991 /* look for count and/or / */
49704364
LW
992 if (patptr < patend) {
993 if (isDIGIT(*patptr)) {
994 patptr = get_num( patptr, &symptr->length );
995 symptr->howlen = e_number;
996
997 } else if (*patptr == '*') {
998 patptr++;
999 symptr->howlen = e_star;
1000
1001 } else if (*patptr == '[') {
fc241834 1002 char* lenptr = ++patptr;
49704364
LW
1003 symptr->howlen = e_number;
1004 patptr = group_end( patptr, patend, ']' ) + 1;
1005 /* what kind of [] is it? */
1006 if (isDIGIT(*lenptr)) {
1007 lenptr = get_num( lenptr, &symptr->length );
1008 if( *lenptr != ']' )
1009 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1010 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1011 } else {
1012 tempsym_t savsym = *symptr;
1013 symptr->patend = patptr-1;
1014 symptr->patptr = lenptr;
1015 savsym.length = measure_struct(symptr);
1016 *symptr = savsym;
1017 }
1018 } else {
1019 symptr->howlen = e_no_len;
1020 symptr->length = 1;
1021 }
1022
1023 /* try to find / */
1024 while (patptr < patend) {
1025 if (isSPACE(*patptr))
1026 patptr++;
1027 else if (*patptr == '#') {
1028 patptr++;
1029 while (patptr < patend && *patptr != '\n')
1030 patptr++;
1031 if (patptr < patend)
1032 patptr++;
1033 } else {
66c611c5 1034 if (*patptr == '/') {
49704364
LW
1035 symptr->flags |= FLAG_SLASH;
1036 patptr++;
66c611c5
MHM
1037 if (patptr < patend &&
1038 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
LW
1039 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1040 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1041 }
1042 break;
1043 }
18529408 1044 }
49704364
LW
1045 } else {
1046 /* at end - no count, no / */
1047 symptr->howlen = e_no_len;
1048 symptr->length = 1;
1049 }
1050
1051 symptr->code = code;
fc241834 1052 symptr->patptr = patptr;
49704364 1053 return TRUE;
18529408 1054 }
49704364 1055 }
fc241834 1056 symptr->patptr = patptr;
49704364 1057 return FALSE;
18529408
IZ
1058}
1059
18529408 1060/*
fc241834 1061 There is no way to cleanly handle the case where we should process the
08ca2aa3 1062 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1063 (e.g. estimates like strend-s as an upper bound for the number of
1064 characters left wouldn't work). So if we foresee the need of this
1065 (pattern starts with U or contains U0), we want to work on the encoded
1066 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1067 themselves if they need to do a lot of unpacks like this on it
1068*/
fc241834 1069STATIC bool
08ca2aa3
TH
1070need_utf8(const char *pat, const char *patend)
1071{
1072 bool first = TRUE;
1073 while (pat < patend) {
1074 if (pat[0] == '#') {
1075 pat++;
1076 pat = memchr(pat, '\n', patend-pat);
1077 if (!pat) return FALSE;
1078 } else if (pat[0] == 'U') {
1079 if (first || pat[1] == '0') return TRUE;
1080 } else first = FALSE;
1081 pat++;
1082 }
1083 return FALSE;
1084}
1085
1086STATIC char
1087first_symbol(const char *pat, const char *patend) {
1088 while (pat < patend) {
1089 if (pat[0] != '#') return pat[0];
1090 pat++;
1091 pat = memchr(pat, '\n', patend-pat);
1092 if (!pat) return 0;
1093 pat++;
1094 }
1095 return 0;
1096}
1097
1098/*
18529408
IZ
1099=for apidoc unpack_str
1100
7accc089
JH
1101The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1102and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1103
1104=cut */
1105
1106I32
f337b084 1107Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1108{
49704364 1109 tempsym_t sym = { 0 };
08ca2aa3 1110
f337b084 1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
f337b084 1116 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1117 SAVEFREEPV(s);
1118 strend = s + len;
f337b084 1119 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1120 }
1121
f337b084
TH
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1124
49704364
LW
1125 sym.patptr = pat;
1126 sym.patend = patend;
1127 sym.flags = flags;
1128
1129 return unpack_rec(&sym, s, s, strend, NULL );
1130}
1131
7accc089
JH
1132/*
1133=for apidoc unpackstring
1134
608d3aed
LW
1135The engine implementing unpack() Perl function. C<unpackstring> puts the
1136extracted list items on the stack and returns the number of elements.
1137Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1138
1139=cut */
1140
1141I32
f337b084 1142Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
7accc089
JH
1143{
1144 tempsym_t sym = { 0 };
08ca2aa3 1145
f337b084 1146 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1147 else if (need_utf8(pat, patend)) {
1148 /* We probably should try to avoid this in case a scalar context call
1149 wouldn't get to the "U0" */
1150 STRLEN len = strend - s;
f337b084 1151 s = (char *) bytes_to_utf8(s, &len);
08ca2aa3
TH
1152 SAVEFREEPV(s);
1153 strend = s + len;
f337b084 1154 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1155 }
1156
f337b084
TH
1157 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1158 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1159
7accc089
JH
1160 sym.patptr = pat;
1161 sym.patend = patend;
1162 sym.flags = flags;
1163
1164 return unpack_rec(&sym, s, s, strend, NULL );
1165}
1166
49704364
LW
1167STATIC
1168I32
08ca2aa3 1169S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1170{
a6ec74c1 1171 dSP;
18529408
IZ
1172 SV *sv;
1173 I32 start_sp_offset = SP - PL_stack_base;
49704364 1174 howlen_t howlen;
a6ec74c1 1175
a6ec74c1 1176 I32 checksum = 0;
92d41999 1177 UV cuv = 0;
a6ec74c1 1178 NV cdouble = 0.0;
f337b084 1179 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364
LW
1180 char* strrelbeg = s;
1181 bool beyond = FALSE;
21c16052 1182 bool explicit_length;
49704364 1183 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1184 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
49704364 1185
49704364 1186 while (next_symbol(symptr)) {
a7a3cfaa 1187 packprops_t props;
f337b084
TH
1188 I32 len, ai32;
1189 I32 datumtype = symptr->code;
206947d2 1190 /* do first one only unless in list context
08ca2aa3 1191 / is implemented by unpacking the count, then popping it from the
206947d2 1192 stack, so must check that we're not in the middle of a / */
49704364 1193 if ( unpack_only_one
206947d2 1194 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1195 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1196 break;
49704364 1197
f337b084 1198 switch (howlen = symptr->howlen) {
fc241834
RGS
1199 case e_star:
1200 len = strend - strbeg; /* long enough */
49704364 1201 break;
f337b084
TH
1202 default:
1203 /* e_no_len and e_number */
1204 len = symptr->length;
1205 break;
49704364 1206 }
18529408 1207
21c16052 1208 explicit_length = TRUE;
a6ec74c1 1209 redo_switch:
49704364 1210 beyond = s >= strend;
a7a3cfaa
TH
1211
1212 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1213 if (props) {
1214 /* props nonzero means we can process this letter. */
1215 long size = props & PACK_SIZE_MASK;
1216 long howmany = (strend - s) / size;
1217 if (len > howmany)
1218 len = howmany;
1219
1220 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1221 if (len && unpack_only_one) len = 1;
1222 EXTEND(SP, len);
1223 EXTEND_MORTAL(len);
78d46eaa
NC
1224 }
1225 }
a7a3cfaa 1226
1109a392 1227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1228 default:
1109a392 1229 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1230
a6ec74c1 1231 case '%':
49704364 1232 if (howlen == e_no_len)
18529408 1233 len = 16; /* len is not specified */
a6ec74c1 1234 checksum = len;
92d41999 1235 cuv = 0;
a6ec74c1 1236 cdouble = 0;
18529408 1237 continue;
a6ec74c1 1238 break;
18529408
IZ
1239 case '(':
1240 {
49704364 1241 tempsym_t savsym = *symptr;
66c611c5
MHM
1242 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1243 symptr->flags |= group_modifiers;
49704364
LW
1244 symptr->patend = savsym.grpend;
1245 symptr->level++;
18529408
IZ
1246 PUTBACK;
1247 while (len--) {
49704364 1248 symptr->patptr = savsym.grpbeg;
f337b084
TH
1249 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1250 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1251 unpack_rec(symptr, s, strbeg, strend, &s);
1252 if (s == strend && savsym.howlen == e_star)
49704364 1253 break; /* No way to continue */
18529408
IZ
1254 }
1255 SPAGAIN;
66c611c5 1256 symptr->flags &= ~group_modifiers;
49704364
LW
1257 savsym.flags = symptr->flags;
1258 *symptr = savsym;
18529408
IZ
1259 break;
1260 }
a6ec74c1 1261 case '@':
08ca2aa3
TH
1262 if (utf8) {
1263 s = strrelbeg;
1264 while (len > 0) {
1265 if (s >= strend)
1266 Perl_croak(aTHX_ "'@' outside of string in unpack");
1267 s += UTF8SKIP(s);
1268 len--;
1269 }
1270 if (s > strend)
1271 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1272 } else {
fc241834
RGS
1273 if (len > strend - strrelbeg)
1274 Perl_croak(aTHX_ "'@' outside of string in unpack");
1275 s = strrelbeg + len;
08ca2aa3 1276 }
a6ec74c1 1277 break;
62f95557
IZ
1278 case 'X' | TYPE_IS_SHRIEKING:
1279 if (!len) /* Avoid division by 0 */
1280 len = 1;
08ca2aa3
TH
1281 if (utf8) {
1282 char *hop, *last;
f337b084
TH
1283 I32 l = len;
1284 hop = last = strbeg;
1285 while (hop < s) {
1286 hop += UTF8SKIP(hop);
1287 if (--l == 0) {
08ca2aa3 1288 last = hop;
f337b084
TH
1289 l = len;
1290 }
fc241834 1291 }
f337b084
TH
1292 if (last > s)
1293 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1294 s = last;
1295 break;
f337b084
TH
1296 }
1297 len = (s - strbeg) % len;
62f95557 1298 /* FALL THROUGH */
a6ec74c1 1299 case 'X':
08ca2aa3
TH
1300 if (utf8) {
1301 while (len > 0) {
1302 if (s <= strbeg)
1303 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1304 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1305 if (s <= strbeg)
1306 Perl_croak(aTHX_ "'X' outside of string in unpack");
1307 }
1308 len--;
1309 }
1310 } else {
fc241834
RGS
1311 if (len > s - strbeg)
1312 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1313 s -= len;
08ca2aa3 1314 }
a6ec74c1 1315 break;
62f95557
IZ
1316 case 'x' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1318 len = 1;
f337b084
TH
1319 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1320 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1321 if (ai32 == 0) break;
1322 len -= ai32;
62f95557 1323 /* FALL THROUGH */
a6ec74c1 1324 case 'x':
08ca2aa3
TH
1325 if (utf8) {
1326 while (len>0) {
1327 if (s >= strend)
1328 Perl_croak(aTHX_ "'x' outside of string in unpack");
1329 s += UTF8SKIP(s);
1330 len--;
1331 }
1332 } else {
fc241834
RGS
1333 if (len > strend - s)
1334 Perl_croak(aTHX_ "'x' outside of string in unpack");
1335 s += len;
f337b084 1336 }
a6ec74c1
JH
1337 break;
1338 case '/':
49704364
LW
1339 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1340 break;
a6ec74c1
JH
1341 case 'A':
1342 case 'Z':
1343 case 'a':
08ca2aa3
TH
1344 if (checksum) {
1345 /* Preliminary length estimate is assumed done in 'W' */
1346 if (len > strend - s) len = strend - s;
1347 goto W_checksum;
1348 }
1349 if (utf8) {
1350 I32 l;
1351 char *hop;
1352 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1353 if (hop >= strend) {
1354 if (hop > strend)
1355 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1356 break;
fc241834 1357 }
a6ec74c1 1358 }
08ca2aa3
TH
1359 if (hop > strend)
1360 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1361 len = hop - s;
1362 } else if (len > strend - s)
1363 len = strend - s;
1364
1365 if (datumtype == 'Z') {
1366 /* 'Z' strips stuff after first null */
f337b084
TH
1367 char *ptr, *end;
1368 end = s + len;
1369 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1370 sv = newSVpvn(s, ptr-s);
1371 if (howlen == e_star) /* exact for 'Z*' */
1372 len = ptr-s + (ptr != strend ? 1 : 0);
1373 } else if (datumtype == 'A') {
1374 /* 'A' strips both nulls and spaces */
1375 char *ptr;
18bdf90a
TH
1376 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1377 for (ptr = s+len-1; ptr >= s; ptr--)
1378 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1379 !is_utf8_space(ptr)) break;
1380 if (ptr >= s) ptr += UTF8SKIP(ptr);
1381 else ptr++;
1382 if (ptr > s+len)
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1384 } else {
1385 for (ptr = s+len-1; ptr >= s; ptr--)
1386 if (*ptr != 0 && !isSPACE(*ptr)) break;
1387 ptr++;
1388 }
08ca2aa3
TH
1389 sv = newSVpvn(s, ptr-s);
1390 } else sv = newSVpvn(s, len);
1391
1392 if (utf8) {
1393 SvUTF8_on(sv);
1394 /* Undo any upgrade done due to need_utf8() */
f337b084 1395 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1396 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1397 }
1398 XPUSHs(sv_2mortal(sv));
08ca2aa3 1399 s += len;
a6ec74c1
JH
1400 break;
1401 case 'B':
08ca2aa3
TH
1402 case 'b': {
1403 char *str;
49704364 1404 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1405 len = (strend - s) * 8;
1406 if (checksum) {
1407 if (!PL_bitcount) {
08ca2aa3 1408 int bits;
a6ec74c1
JH
1409 Newz(601, PL_bitcount, 256, char);
1410 for (bits = 1; bits < 256; bits++) {
1411 if (bits & 1) PL_bitcount[bits]++;
1412 if (bits & 2) PL_bitcount[bits]++;
1413 if (bits & 4) PL_bitcount[bits]++;
1414 if (bits & 8) PL_bitcount[bits]++;
1415 if (bits & 16) PL_bitcount[bits]++;
1416 if (bits & 32) PL_bitcount[bits]++;
1417 if (bits & 64) PL_bitcount[bits]++;
1418 if (bits & 128) PL_bitcount[bits]++;
1419 }
1420 }
f337b084 1421 if (utf8)
08ca2aa3 1422 while (len >= 8 && s < strend) {
f337b084 1423 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1424 len -= 8;
1425 }
f337b084 1426 else
fc241834 1427 while (len >= 8) {
08ca2aa3 1428 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1429 len -= 8;
1430 }
08ca2aa3
TH
1431 if (len && s < strend) {
1432 U8 bits;
f337b084
TH
1433 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1434 if (datumtype == 'b')
a6ec74c1 1435 while (len-- > 0) {
92d41999 1436 if (bits & 1) cuv++;
a6ec74c1
JH
1437 bits >>= 1;
1438 }
f337b084 1439 else
a6ec74c1 1440 while (len-- > 0) {
08ca2aa3 1441 if (bits & 0x80) cuv++;
a6ec74c1
JH
1442 bits <<= 1;
1443 }
fc241834 1444 }
a6ec74c1
JH
1445 break;
1446 }
08ca2aa3
TH
1447
1448 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1449 SvPOK_on(sv);
1450 str = SvPVX(sv);
1451 if (datumtype == 'b') {
f337b084 1452 U8 bits = 0;
08ca2aa3
TH
1453 ai32 = len;
1454 for (len = 0; len < ai32; len++) {
1455 if (len & 7) bits >>= 1;
1456 else if (utf8) {
1457 if (s >= strend) break;
f337b084 1458 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1459 } else bits = *(U8 *) s++;
1460 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1461 }
08ca2aa3 1462 } else {
f337b084 1463 U8 bits = 0;
08ca2aa3
TH
1464 ai32 = len;
1465 for (len = 0; len < ai32; len++) {
1466 if (len & 7) bits <<= 1;
1467 else if (utf8) {
1468 if (s >= strend) break;
f337b084 1469 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1470 } else bits = *(U8 *) s++;
1471 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1472 }
1473 }
1474 *str = '\0';
08ca2aa3
TH
1475 SvCUR_set(sv, str - SvPVX(sv));
1476 XPUSHs(sv);
a6ec74c1 1477 break;
08ca2aa3 1478 }
a6ec74c1 1479 case 'H':
08ca2aa3
TH
1480 case 'h': {
1481 char *str;
fc241834 1482 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1483 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1484 len = (strend - s) * 2;
fc241834 1485 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1486 SvPOK_on(sv);
1487 str = SvPVX(sv);
1488 if (datumtype == 'h') {
f337b084 1489 U8 bits = 0;
fc241834
RGS
1490 ai32 = len;
1491 for (len = 0; len < ai32; len++) {
1492 if (len & 1) bits >>= 4;
1493 else if (utf8) {
1494 if (s >= strend) break;
f337b084 1495 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1496 } else bits = * (U8 *) s++;
a6ec74c1
JH
1497 *str++ = PL_hexdigit[bits & 15];
1498 }
08ca2aa3 1499 } else {
f337b084 1500 U8 bits = 0;
08ca2aa3
TH
1501 ai32 = len;
1502 for (len = 0; len < ai32; len++) {
1503 if (len & 1) bits <<= 4;
1504 else if (utf8) {
1505 if (s >= strend) break;
f337b084 1506 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1507 } else bits = *(U8 *) s++;
a6ec74c1
JH
1508 *str++ = PL_hexdigit[(bits >> 4) & 15];
1509 }
1510 }
1511 *str = '\0';
08ca2aa3
TH
1512 SvCUR_set(sv, str - SvPVX(sv));
1513 XPUSHs(sv);
a6ec74c1 1514 break;
08ca2aa3 1515 }
a6ec74c1 1516 case 'c':
73cb7263 1517 while (len-- > 0) {
f337b084 1518 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1519 if (aint >= 128) /* fake up signed chars */
1520 aint -= 256;
08ca2aa3 1521 if (!checksum)
ac7f3b1b 1522 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV)aint;
1525 else
1526 cuv += aint;
a6ec74c1
JH
1527 }
1528 break;
1529 case 'C':
08ca2aa3
TH
1530 case 'W':
1531 W_checksum:
35bcd338 1532 if (len == 0) {
fc241834 1533 if (explicit_length && datumtype == 'C')
08ca2aa3 1534 /* Switch to "character" mode */
f337b084 1535 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1536 break;
1537 }
fc241834 1538 if (datumtype == 'C' ?
f337b084
TH
1539 (symptr->flags & FLAG_DO_UTF8) &&
1540 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3
TH
1541 while (len-- > 0 && s < strend) {
1542 UV val;
1543 STRLEN retlen;
f337b084
TH
1544 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1545 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1546 if (retlen == (STRLEN) -1 || retlen == 0)
1547 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1548 s += retlen;
1549 if (!checksum)
1550 PUSHs(sv_2mortal(newSVuv((UV) val)));
1551 else if (checksum > bits_in_uv)
1552 cdouble += (NV) val;
d6d3e8bd 1553 else
08ca2aa3 1554 cuv += val;
fc241834 1555 }
08ca2aa3 1556 } else if (!checksum)
a6ec74c1 1557 while (len-- > 0) {
08ca2aa3
TH
1558 U8 ch = *(U8 *) s++;
1559 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1560 }
08ca2aa3
TH
1561 else if (checksum > bits_in_uv)
1562 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1563 else
1564 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1565 break;
1566 case 'U':
35bcd338 1567 if (len == 0) {
08ca2aa3
TH
1568 if (explicit_length) {
1569 /* Switch to "bytes in UTF-8" mode */
f337b084 1570 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1571 else
1572 /* Should be impossible due to the need_utf8() test */
1573 Perl_croak(aTHX_ "U0 mode on a byte string");
1574 }
35bcd338
JH
1575 break;
1576 }
08ca2aa3 1577 if (len > strend - s) len = strend - s;
fc241834 1578 if (!checksum) {
08ca2aa3
TH
1579 if (len && unpack_only_one) len = 1;
1580 EXTEND(SP, len);
1581 EXTEND_MORTAL(len);
fc241834 1582 }
08ca2aa3
TH
1583 while (len-- > 0 && s < strend) {
1584 STRLEN retlen;
1585 UV auv;
1586 if (utf8) {
1587 U8 result[UTF8_MAXLEN];
1588 char *ptr;
1589 STRLEN len;
1590 ptr = s;
1591 /* Bug: warns about bad utf8 even if we are short on bytes
1592 and will break out of the loop */
f337b084 1593 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
08ca2aa3
TH
1594 break;
1595 len = UTF8SKIP(result);
fc241834 1596 if (!uni_to_bytes(aTHX_ &ptr, strend,
f337b084 1597 &result[1], len-1, 'U')) break;
08ca2aa3
TH
1598 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1599 s = ptr;
1600 } else {
1601 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1602 if (retlen == (STRLEN) -1 || retlen == 0)
1603 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1604 s += retlen;
1605 }
1606 if (!checksum)
1607 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1608 else if (checksum > bits_in_uv)
08ca2aa3 1609 cdouble += (NV) auv;
73cb7263 1610 else
08ca2aa3 1611 cuv += auv;
a6ec74c1
JH
1612 }
1613 break;
49704364
LW
1614 case 's' | TYPE_IS_SHRIEKING:
1615#if SHORTSIZE != SIZE16
73cb7263 1616 while (len-- > 0) {
08ca2aa3 1617 short ashort;
f337b084
TH
1618 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1619 DO_BO_UNPACK(ashort, s);
08ca2aa3 1620 if (!checksum)
ac7f3b1b 1621 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1622 else if (checksum > bits_in_uv)
1623 cdouble += (NV)ashort;
1624 else
1625 cuv += ashort;
49704364
LW
1626 }
1627 break;
1628#else
1629 /* Fallthrough! */
a6ec74c1 1630#endif
49704364 1631 case 's':
73cb7263 1632 while (len-- > 0) {
08ca2aa3
TH
1633 I16 ai16;
1634
1635#if U16SIZE > SIZE16
1636 ai16 = 0;
1637#endif
f337b084 1638 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1639 DO_BO_UNPACK(ai16, 16);
1109a392 1640#if U16SIZE > SIZE16
73cb7263
NC
1641 if (ai16 > 32767)
1642 ai16 -= 65536;
a6ec74c1 1643#endif
08ca2aa3 1644 if (!checksum)
ac7f3b1b 1645 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV)ai16;
1648 else
1649 cuv += ai16;
a6ec74c1
JH
1650 }
1651 break;
49704364
LW
1652 case 'S' | TYPE_IS_SHRIEKING:
1653#if SHORTSIZE != SIZE16
73cb7263 1654 while (len-- > 0) {
08ca2aa3 1655 unsigned short aushort;
f337b084
TH
1656 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1657 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1658 if (!checksum)
1659 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1660 else if (checksum > bits_in_uv)
1661 cdouble += (NV)aushort;
1662 else
1663 cuv += aushort;
49704364
LW
1664 }
1665 break;
1666#else
1667 /* Fallhrough! */
1668#endif
a6ec74c1
JH
1669 case 'v':
1670 case 'n':
1671 case 'S':
73cb7263 1672 while (len-- > 0) {
08ca2aa3
TH
1673 U16 au16;
1674#if U16SIZE > SIZE16
1675 au16 = 0;
1676#endif
f337b084 1677 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1678 DO_BO_UNPACK(au16, 16);
a6ec74c1 1679#ifdef HAS_NTOHS
73cb7263
NC
1680 if (datumtype == 'n')
1681 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1682#endif
1683#ifdef HAS_VTOHS
73cb7263
NC
1684 if (datumtype == 'v')
1685 au16 = vtohs(au16);
a6ec74c1 1686#endif
08ca2aa3
TH
1687 if (!checksum)
1688 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1689 else if (checksum > bits_in_uv)
f337b084 1690 cdouble += (NV) au16;
73cb7263
NC
1691 else
1692 cuv += au16;
a6ec74c1
JH
1693 }
1694 break;
7212898e 1695#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1696 case 'v' | TYPE_IS_SHRIEKING:
1697 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1698 while (len-- > 0) {
08ca2aa3
TH
1699 I16 ai16;
1700# if U16SIZE > SIZE16
1701 ai16 = 0;
1702# endif
f337b084 1703 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1704# ifdef HAS_NTOHS
73cb7263 1705 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1706 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1707# endif /* HAS_NTOHS */
1708# ifdef HAS_VTOHS
73cb7263 1709 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1710 ai16 = (I16) vtohs((U16) ai16);
1711# endif /* HAS_VTOHS */
1712 if (!checksum)
ac7f3b1b 1713 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1714 else if (checksum > bits_in_uv)
08ca2aa3 1715 cdouble += (NV) ai16;
73cb7263
NC
1716 else
1717 cuv += ai16;
068bd2e7
MHM
1718 }
1719 break;
08ca2aa3 1720#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1721 case 'i':
49704364 1722 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1723 while (len-- > 0) {
08ca2aa3 1724 int aint;
f337b084
TH
1725 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1726 DO_BO_UNPACK(aint, i);
08ca2aa3 1727 if (!checksum)
ac7f3b1b 1728 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)aint;
1731 else
1732 cuv += aint;
a6ec74c1
JH
1733 }
1734 break;
1735 case 'I':
49704364 1736 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1737 while (len-- > 0) {
08ca2aa3 1738 unsigned int auint;
f337b084
TH
1739 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1740 DO_BO_UNPACK(auint, i);
08ca2aa3 1741 if (!checksum)
ac7f3b1b 1742 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)auint;
1745 else
1746 cuv += auint;
a6ec74c1
JH
1747 }
1748 break;
92d41999 1749 case 'j':
73cb7263 1750 while (len-- > 0) {
08ca2aa3 1751 IV aiv;
f337b084 1752 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1753#if IVSIZE == INTSIZE
f337b084 1754 DO_BO_UNPACK(aiv, i);
1109a392 1755#elif IVSIZE == LONGSIZE
f337b084 1756 DO_BO_UNPACK(aiv, l);
1109a392 1757#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1758 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1759#else
1760 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1761#endif
08ca2aa3 1762 if (!checksum)
ac7f3b1b 1763 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1764 else if (checksum > bits_in_uv)
1765 cdouble += (NV)aiv;
1766 else
1767 cuv += aiv;
92d41999
JH
1768 }
1769 break;
1770 case 'J':
73cb7263 1771 while (len-- > 0) {
08ca2aa3 1772 UV auv;
f337b084 1773 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1774#if IVSIZE == INTSIZE
f337b084 1775 DO_BO_UNPACK(auv, i);
08ca2aa3 1776#elif IVSIZE == LONGSIZE
f337b084 1777 DO_BO_UNPACK(auv, l);
08ca2aa3 1778#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1779 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1780#else
1781 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1782#endif
08ca2aa3 1783 if (!checksum)
ac7f3b1b 1784 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)auv;
1787 else
1788 cuv += auv;
92d41999
JH
1789 }
1790 break;
49704364
LW
1791 case 'l' | TYPE_IS_SHRIEKING:
1792#if LONGSIZE != SIZE32
73cb7263 1793 while (len-- > 0) {
08ca2aa3 1794 long along;
f337b084
TH
1795 SHIFT_VAR(utf8, s, strend, along, datumtype);
1796 DO_BO_UNPACK(along, l);
08ca2aa3 1797 if (!checksum)
ac7f3b1b 1798 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)along;
1801 else
1802 cuv += along;
49704364
LW
1803 }
1804 break;
1805#else
1806 /* Fallthrough! */
a6ec74c1 1807#endif
49704364 1808 case 'l':
73cb7263 1809 while (len-- > 0) {
08ca2aa3
TH
1810 I32 ai32;
1811#if U32SIZE > SIZE32
1812 ai32 = 0;
1813#endif
f337b084 1814 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1815 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1816#if U32SIZE > SIZE32
08ca2aa3 1817 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1818#endif
08ca2aa3 1819 if (!checksum)
ac7f3b1b 1820 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1821 else if (checksum > bits_in_uv)
1822 cdouble += (NV)ai32;
1823 else
1824 cuv += ai32;
a6ec74c1
JH
1825 }
1826 break;
49704364
LW
1827 case 'L' | TYPE_IS_SHRIEKING:
1828#if LONGSIZE != SIZE32
73cb7263 1829 while (len-- > 0) {
08ca2aa3 1830 unsigned long aulong;
f337b084
TH
1831 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1832 DO_BO_UNPACK(aulong, l);
08ca2aa3 1833 if (!checksum)
ac7f3b1b 1834 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1835 else if (checksum > bits_in_uv)
1836 cdouble += (NV)aulong;
1837 else
1838 cuv += aulong;
49704364
LW
1839 }
1840 break;
1841#else
1842 /* Fall through! */
1843#endif
a6ec74c1
JH
1844 case 'V':
1845 case 'N':
1846 case 'L':
73cb7263 1847 while (len-- > 0) {
08ca2aa3
TH
1848 U32 au32;
1849#if U32SIZE > SIZE32
1850 au32 = 0;
1851#endif
f337b084 1852 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1853 DO_BO_UNPACK(au32, 32);
a6ec74c1 1854#ifdef HAS_NTOHL
73cb7263
NC
1855 if (datumtype == 'N')
1856 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1857#endif
1858#ifdef HAS_VTOHL
73cb7263
NC
1859 if (datumtype == 'V')
1860 au32 = vtohl(au32);
a6ec74c1 1861#endif
08ca2aa3 1862 if (!checksum)
fc241834
RGS
1863 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1864 else if (checksum > bits_in_uv)
1865 cdouble += (NV)au32;
1866 else
1867 cuv += au32;
a6ec74c1
JH
1868 }
1869 break;
7212898e 1870#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1871 case 'V' | TYPE_IS_SHRIEKING:
1872 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1873 while (len-- > 0) {
08ca2aa3
TH
1874 I32 ai32;
1875# if U32SIZE > SIZE32
1876 ai32 = 0;
1877# endif
f337b084 1878 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1879# ifdef HAS_NTOHL
73cb7263
NC
1880 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1881 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1882# endif
1883# ifdef HAS_VTOHL
73cb7263
NC
1884 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1885 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1886# endif
1887 if (!checksum)
ac7f3b1b 1888 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)ai32;
1891 else
1892 cuv += ai32;
068bd2e7
MHM
1893 }
1894 break;
08ca2aa3 1895#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1896 case 'p':
a6ec74c1 1897 while (len-- > 0) {
08ca2aa3 1898 char *aptr;
f337b084 1899 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
5512a2f9 1900 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1901 /* newSVpv generates undef if aptr is NULL */
1902 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1903 }
1904 break;
1905 case 'w':
a6ec74c1
JH
1906 {
1907 UV auv = 0;
1908 U32 bytes = 0;
fc241834 1909
08ca2aa3
TH
1910 while (len > 0 && s < strend) {
1911 U8 ch;
f337b084 1912 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1913 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1914 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1915 if (ch < 0x80) {
a6ec74c1 1916 bytes = 0;
ac7f3b1b 1917 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1918 len--;
1919 auv = 0;
08ca2aa3 1920 continue;
a6ec74c1 1921 }
08ca2aa3 1922 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1923 char *t;
1924 STRLEN n_a;
1925
1926 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1927 while (s < strend) {
f337b084 1928 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1929 sv = mul128(sv, (U8)(ch & 0x7f));
1930 if (!(ch & 0x80)) {
a6ec74c1
JH
1931 bytes = 0;
1932 break;
1933 }
1934 }
1935 t = SvPV(sv, n_a);
1936 while (*t == '0')
1937 t++;
1938 sv_chop(sv, t);
1939 PUSHs(sv_2mortal(sv));
1940 len--;
1941 auv = 0;
1942 }
1943 }
1944 if ((s >= strend) && bytes)
49704364 1945 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1946 }
1947 break;
1948 case 'P':
49704364
LW
1949 if (symptr->howlen == e_star)
1950 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1951 EXTEND(SP, 1);
08ca2aa3
TH
1952 if (sizeof(char*) <= strend - s) {
1953 char *aptr;
f337b084 1954 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
08ca2aa3 1955 DO_BO_UNPACK_P(aptr);
fc241834
RGS
1956 /* newSVpvn generates undef if aptr is NULL */
1957 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1958 }
a6ec74c1
JH
1959 break;
1960#ifdef HAS_QUAD
1961 case 'q':
73cb7263 1962 while (len-- > 0) {
08ca2aa3 1963 Quad_t aquad;
f337b084
TH
1964 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1965 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
1966 if (!checksum)
1967 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 1968 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
1969 else if (checksum > bits_in_uv)
1970 cdouble += (NV)aquad;
1971 else
1972 cuv += aquad;
1973 }
a6ec74c1
JH
1974 break;
1975 case 'Q':
73cb7263 1976 while (len-- > 0) {
08ca2aa3 1977 Uquad_t auquad;
f337b084
TH
1978 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1979 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
1980 if (!checksum)
1981 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1982 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
1983 else if (checksum > bits_in_uv)
1984 cdouble += (NV)auquad;
1985 else
1986 cuv += auquad;
a6ec74c1
JH
1987 }
1988 break;
08ca2aa3 1989#endif /* HAS_QUAD */
a6ec74c1
JH
1990 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1991 case 'f':
73cb7263 1992 while (len-- > 0) {
08ca2aa3 1993 float afloat;
f337b084 1994 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1995 DO_BO_UNPACK_N(afloat, float);
1996 if (!checksum)
1997 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1998 else
73cb7263 1999 cdouble += afloat;
fc241834 2000 }
a6ec74c1
JH
2001 break;
2002 case 'd':
73cb7263 2003 while (len-- > 0) {
08ca2aa3 2004 double adouble;
f337b084 2005 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2006 DO_BO_UNPACK_N(adouble, double);
2007 if (!checksum)
2008 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2009 else
73cb7263 2010 cdouble += adouble;
fc241834 2011 }
a6ec74c1 2012 break;
92d41999 2013 case 'F':
73cb7263 2014 while (len-- > 0) {
08ca2aa3 2015 NV anv;
f337b084 2016 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2017 DO_BO_UNPACK_N(anv, NV);
2018 if (!checksum)
2019 PUSHs(sv_2mortal(newSVnv(anv)));
2020 else
73cb7263 2021 cdouble += anv;
fc241834 2022 }
92d41999
JH
2023 break;
2024#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2025 case 'D':
73cb7263 2026 while (len-- > 0) {
08ca2aa3 2027 long double aldouble;
f337b084 2028 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2029 DO_BO_UNPACK_N(aldouble, long double);
2030 if (!checksum)
2031 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2032 else
2033 cdouble += aldouble;
92d41999
JH
2034 }
2035 break;
2036#endif
a6ec74c1
JH
2037 case 'u':
2038 /* MKS:
2039 * Initialise the decode mapping. By using a table driven
2040 * algorithm, the code will be character-set independent
2041 * (and just as fast as doing character arithmetic)
2042 */
2043 if (PL_uudmap['M'] == 0) {
2044 int i;
2045
2046 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2047 PL_uudmap[(U8)PL_uuemap[i]] = i;
2048 /*
2049 * Because ' ' and '`' map to the same value,
2050 * we need to decode them both the same.
2051 */
2052 PL_uudmap[' '] = 0;
2053 }
08ca2aa3
TH
2054 {
2055 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2056 sv = sv_2mortal(NEWSV(42, l));
2057 if (l) SvPOK_on(sv);
2058 }
2059 if (utf8) {
2060 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2061 I32 a, b, c, d;
2062 char hunk[4];
2063
2064 hunk[3] = '\0';
2065 while (len > 0) {
2066 next_uni_uu(aTHX_ &s, strend, &a);
2067 next_uni_uu(aTHX_ &s, strend, &b);
2068 next_uni_uu(aTHX_ &s, strend, &c);
2069 next_uni_uu(aTHX_ &s, strend, &d);
2070 hunk[0] = (char)((a << 2) | (b >> 4));
2071 hunk[1] = (char)((b << 4) | (c >> 2));
2072 hunk[2] = (char)((c << 6) | d);
2073 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2074 len -= 3;
2075 }
2076 if (s < strend) {
2077 if (*s == '\n') s++;
2078 else {
2079 /* possible checksum byte */
2080 char *skip = s+UTF8SKIP(s);
2081 if (skip < strend && *skip == '\n') s = skip+1;
2082 }
2083 }
2084 }
2085 } else {
fc241834
RGS
2086 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2087 I32 a, b, c, d;
2088 char hunk[4];
a6ec74c1 2089
fc241834
RGS
2090 hunk[3] = '\0';
2091 len = PL_uudmap[*(U8*)s++] & 077;
2092 while (len > 0) {
2093 if (s < strend && ISUUCHAR(*s))
2094 a = PL_uudmap[*(U8*)s++] & 077;
2095 else
2096 a = 0;
2097 if (s < strend && ISUUCHAR(*s))
2098 b = PL_uudmap[*(U8*)s++] & 077;
2099 else
2100 b = 0;
2101 if (s < strend && ISUUCHAR(*s))
2102 c = PL_uudmap[*(U8*)s++] & 077;
2103 else
2104 c = 0;
2105 if (s < strend && ISUUCHAR(*s))
2106 d = PL_uudmap[*(U8*)s++] & 077;
2107 else
2108 d = 0;
2109 hunk[0] = (char)((a << 2) | (b >> 4));
2110 hunk[1] = (char)((b << 4) | (c >> 2));
2111 hunk[2] = (char)((c << 6) | d);
2112 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2113 len -= 3;
2114 }
2115 if (*s == '\n')
2116 s++;
2117 else /* possible checksum byte */
2118 if (s + 1 < strend && s[1] == '\n')
2119 s += 2;
a6ec74c1 2120 }
08ca2aa3
TH
2121 }
2122 XPUSHs(sv);
a6ec74c1
JH
2123 break;
2124 }
49704364 2125
a6ec74c1 2126 if (checksum) {
1109a392 2127 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2128 (checksum > bits_in_uv &&
08ca2aa3
TH
2129 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2130 NV trouble, anv;
a6ec74c1 2131
08ca2aa3 2132 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2133 while (checksum >= 16) {
2134 checksum -= 16;
08ca2aa3 2135 anv *= 65536.0;
a6ec74c1 2136 }
a6ec74c1 2137 while (cdouble < 0.0)
08ca2aa3
TH
2138 cdouble += anv;
2139 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2140 sv = newSVnv(cdouble);
a6ec74c1
JH
2141 }
2142 else {
fa8ec7c1
NC
2143 if (checksum < bits_in_uv) {
2144 UV mask = ((UV)1 << checksum) - 1;
92d41999 2145 cuv &= mask;
a6ec74c1 2146 }
c4c5f44a 2147 sv = newSVuv(cuv);
a6ec74c1
JH
2148 }
2149 XPUSHs(sv_2mortal(sv));
2150 checksum = 0;
2151 }
fc241834 2152
49704364
LW
2153 if (symptr->flags & FLAG_SLASH){
2154 if (SP - PL_stack_base - start_sp_offset <= 0)
2155 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2156 if( next_symbol(symptr) ){
2157 if( symptr->howlen == e_number )
2158 Perl_croak(aTHX_ "Count after length/code in unpack" );
2159 if( beyond ){
2160 /* ...end of char buffer then no decent length available */
2161 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2162 } else {
2163 /* take top of stack (hope it's numeric) */
2164 len = POPi;
2165 if( len < 0 )
2166 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2167 }
2168 } else {
2169 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2170 }
2171 datumtype = symptr->code;
21c16052 2172 explicit_length = FALSE;
49704364
LW
2173 goto redo_switch;
2174 }
a6ec74c1 2175 }
49704364 2176
18529408
IZ
2177 if (new_s)
2178 *new_s = s;
2179 PUTBACK;
2180 return SP - PL_stack_base - start_sp_offset;
2181}
2182
2183PP(pp_unpack)
2184{
2185 dSP;
bab9c0ac 2186 dPOPPOPssrl;
18529408
IZ
2187 I32 gimme = GIMME_V;
2188 STRLEN llen;
2189 STRLEN rlen;
08ca2aa3
TH
2190 char *pat = SvPV(left, llen);
2191 char *s = SvPV(right, rlen);
18529408 2192 char *strend = s + rlen;
08ca2aa3
TH
2193 char *patend = pat + llen;
2194 I32 cnt;
18529408
IZ
2195
2196 PUTBACK;
7accc089 2197 cnt = unpackstring(pat, patend, s, strend,
49704364 2198 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2199 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2200
18529408
IZ
2201 SPAGAIN;
2202 if ( !cnt && gimme == G_SCALAR )
2203 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2204 RETURN;
2205}
2206
f337b084
TH
2207STATIC U8 *
2208doencodes(U8 *h, char *s, I32 len)
a6ec74c1 2209{
f337b084 2210 *h++ = PL_uuemap[len];
a6ec74c1 2211 while (len > 2) {
f337b084
TH
2212 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2213 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2214 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2215 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2216 s += 3;
2217 len -= 3;
2218 }
2219 if (len > 0) {
2220 char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2221 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2222 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2223 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2224 *h++ = PL_uuemap[0];
a6ec74c1 2225 }
f337b084
TH
2226 *h++ = '\n';
2227 return h;
a6ec74c1
JH
2228}
2229
2230STATIC SV *
2231S_is_an_int(pTHX_ char *s, STRLEN l)
2232{
2233 STRLEN n_a;
2234 SV *result = newSVpvn(s, l);
2235 char *result_c = SvPV(result, n_a); /* convenience */
2236 char *out = result_c;
2237 bool skip = 1;
2238 bool ignore = 0;
2239
2240 while (*s) {
2241 switch (*s) {
2242 case ' ':
2243 break;
2244 case '+':
2245 if (!skip) {
2246 SvREFCNT_dec(result);
2247 return (NULL);
2248 }
2249 break;
2250 case '0':
2251 case '1':
2252 case '2':
2253 case '3':
2254 case '4':
2255 case '5':
2256 case '6':
2257 case '7':
2258 case '8':
2259 case '9':
2260 skip = 0;
2261 if (!ignore) {
2262 *(out++) = *s;
2263 }
2264 break;
2265 case '.':
2266 ignore = 1;
2267 break;
2268 default:
2269 SvREFCNT_dec(result);
2270 return (NULL);
2271 }
2272 s++;
2273 }
2274 *(out++) = '\0';
2275 SvCUR_set(result, out - result_c);
2276 return (result);
2277}
2278
2279/* pnum must be '\0' terminated */
2280STATIC int
2281S_div128(pTHX_ SV *pnum, bool *done)
2282{
2283 STRLEN len;
2284 char *s = SvPV(pnum, len);
2285 int m = 0;
2286 int r = 0;
2287 char *t = s;
2288
2289 *done = 1;
2290 while (*t) {
2291 int i;
2292
2293 i = m * 10 + (*t - '0');
2294 m = i & 0x7F;
2295 r = (i >> 7); /* r < 10 */
2296 if (r) {
2297 *done = 0;
2298 }
2299 *(t++) = '0' + r;
2300 }
2301 *(t++) = '\0';
2302 SvCUR_set(pnum, (STRLEN) (t - s));
2303 return (m);
2304}
2305
49704364 2306
a6ec74c1 2307
18529408
IZ
2308/*
2309=for apidoc pack_cat
2310
7accc089
JH
2311The engine implementing pack() Perl function. Note: parameters next_in_list and
2312flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2313
2314=cut */
2315
49704364 2316
18529408
IZ
2317void
2318Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2319{
49704364
LW
2320 tempsym_t sym = { 0 };
2321 sym.patptr = pat;
2322 sym.patend = patend;
7accc089
JH
2323 sym.flags = FLAG_PACK;
2324
2325 (void)pack_rec( cat, &sym, beglist, endlist );
2326}
2327
2328
2329/*
2330=for apidoc packlist
2331
2332The engine implementing pack() Perl function.
2333
2334=cut */
2335
2336
2337void
2338Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2339{
f337b084 2340 STRLEN no_len;
7accc089 2341 tempsym_t sym = { 0 };
f337b084 2342
7accc089
JH
2343 sym.patptr = pat;
2344 sym.patend = patend;
2345 sym.flags = FLAG_PACK;
49704364 2346
f337b084
TH
2347 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2348 Also make sure any UTF8 flag is loaded */
2349 SvPV_force(cat, no_len);
2350 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2351
49704364
LW
2352 (void)pack_rec( cat, &sym, beglist, endlist );
2353}
2354
f337b084
TH
2355/* like sv_utf8_upgrade, but also repoint the group start markers */
2356STATIC void
2357marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2358 STRLEN len;
2359 tempsym_t *group;
2360 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2361
2362 if (SvUTF8(sv)) return;
2363
2364 from_start = SvPVX(sv);
2365 from_end = from_start + SvCUR(sv);
2366 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2367 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2368 if (from_ptr == from_end) {
2369 /* Simple case: no character needs to be changed */
2370 SvUTF8_on(sv);
2371 return;
2372 }
2373
3473cf63 2374 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2375 New('U', to_start, len, char);
2376 Copy(from_start, to_start, from_ptr-from_start, char);
2377 to_ptr = to_start + (from_ptr-from_start);
2378
2379 New('U', marks, sym_ptr->level+2, char *);
2380 for (group=sym_ptr; group; group = group->previous)
2381 marks[group->level] = from_start + group->strbeg;
2382 marks[sym_ptr->level+1] = from_end+1;
2383 for (m = marks; *m < from_ptr; m++)
2384 *m = to_start + (*m-from_start);
2385
2386 for (;from_ptr < from_end; from_ptr++) {
2387 while (*m == from_ptr) *m++ = to_ptr;
2388 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2389 }
2390 *to_ptr = 0;
2391
2392 while (*m == from_ptr) *m++ = to_ptr;
2393 if (m != marks + sym_ptr->level+1) {
2394 Safefree(marks);
2395 Safefree(to_start);
2396 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2397 }
2398 for (group=sym_ptr; group; group = group->previous)
2399 group->strbeg = marks[group->level] - to_start;
2400 Safefree(marks);
2401
2402 if (SvOOK(sv)) {
2403 if (SvIVX(sv)) {
2404 SvLEN(sv) += SvIVX(sv);
2405 from_start -= SvIVX(sv);
2406 SvIV_set(sv, 0);
2407 }
2408 SvFLAGS(sv) &= ~SVf_OOK;
2409 }
2410 if (SvLEN(sv) != 0)
2411 Safefree(from_start);
2412 SvPVX(sv) = to_start;
2413 SvCUR(sv) = to_ptr - to_start;
2414 SvLEN(sv) = len;
2415 SvUTF8_on(sv);
2416}
2417
2418/* Exponential string grower. Makes string extension effectively O(n)
2419 needed says how many extra bytes we need (not counting the final '\0')
2420 Only grows the string if there is an actual lack of space
2421*/
2422STATIC char *
2423sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2424 STRLEN cur = SvCUR(sv);
2425 STRLEN len = SvLEN(sv);
2426 STRLEN extend;
2427 if (len - cur > needed) return SvPVX(sv);
2428 extend = needed > len ? needed : len;
2429 return SvGROW(sv, len+extend+1);
2430}
49704364
LW
2431
2432STATIC
2433SV **
f337b084 2434S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2435{
49704364 2436 tempsym_t lookahead;
f337b084
TH
2437 I32 items = endlist - beglist;
2438 bool found = next_symbol(symptr);
2439 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2440
2441 if (symptr->level == 0 && found && symptr->code == 'U') {
2442 marked_upgrade(aTHX_ cat, symptr);
2443 symptr->flags |= FLAG_DO_UTF8;
2444 utf8 = 0;
49704364 2445 }
f337b084 2446 symptr->strbeg = SvCUR(cat);
49704364
LW
2447
2448 while (found) {
f337b084
TH
2449 SV *fromstr;
2450 STRLEN fromlen;
2451 I32 len;
a6ec74c1 2452 SV *lengthcode = Nullsv;
49704364 2453 I32 datumtype = symptr->code;
f337b084
TH
2454 howlen_t howlen = symptr->howlen;
2455 char *start = SvPVX(cat);
2456 char *cur = start + SvCUR(cat);
49704364 2457
f337b084
TH
2458#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2459
2460 switch (howlen) {
fc241834 2461 case e_star:
f337b084
TH
2462 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2463 0 : items;
2464 break;
2465 default:
2466 /* e_no_len and e_number */
2467 len = symptr->length;
49704364
LW
2468 break;
2469 }
2470
f337b084 2471 if (len) {
a7a3cfaa 2472 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2473
a7a3cfaa
TH
2474 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2475 /* We can process this letter. */
2476 STRLEN size = props & PACK_SIZE_MASK;
2477 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2478 }
f337b084
TH
2479 }
2480
49704364
LW
2481 /* Look ahead for next symbol. Do we have code/code? */
2482 lookahead = *symptr;
2483 found = next_symbol(&lookahead);
246f24af
TH
2484 if (symptr->flags & FLAG_SLASH) {
2485 IV count;
f337b084 2486 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2487 if (strchr("aAZ", lookahead.code)) {
2488 if (lookahead.howlen == e_number) count = lookahead.length;
2489 else {
2490 if (items > 0)
2491 count = DO_UTF8(*beglist) ?
2492 sv_len_utf8(*beglist) : sv_len(*beglist);
2493 else count = 0;
2494 if (lookahead.code == 'Z') count++;
2495 }
2496 } else {
2497 if (lookahead.howlen == e_number && lookahead.length < items)
2498 count = lookahead.length;
2499 else count = items;
2500 }
2501 lookahead.howlen = e_number;
2502 lookahead.length = count;
2503 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2504 }
49704364 2505
fc241834
RGS
2506 /* Code inside the switch must take care to properly update
2507 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2508 doesn't simply leave using break */
1109a392 2509 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2510 default:
f337b084
TH
2511 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2512 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2513 case '%':
49704364 2514 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2515 case '@':
f337b084
TH
2516 if (utf8) {
2517 char *s = start + symptr->strbeg;
2518 while (len > 0 && s < cur) {
2519 s += UTF8SKIP(s);
2520 len--;
2521 }
2522 if (s > cur)
2523 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2524 if (len > 0) {
2525 grow:
2526 GROWING(0, cat, start, cur, len);
2527 Zero(cur, len, char);
2528 cur += len;
2529 } else if (s < cur) cur = s;
2530 else goto no_change;
2531 } else {
2532 len -= cur - (start+symptr->strbeg);
2533 if (len > 0) goto grow;
fc241834 2534 len = -len;
f337b084
TH
2535 if (len > 0) goto shrink;
2536 else goto no_change;
2537 }
a6ec74c1 2538 break;
fc241834 2539 case '(': {
49704364 2540 tempsym_t savsym = *symptr;
66c611c5
MHM
2541 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2542 symptr->flags |= group_modifiers;
49704364
LW
2543 symptr->patend = savsym.grpend;
2544 symptr->level++;
f337b084 2545 symptr->previous = &lookahead;
18529408 2546 while (len--) {
f337b084
TH
2547 U32 was_utf8;
2548 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2549 else symptr->flags &= ~FLAG_PARSE_UTF8;
2550 was_utf8 = SvUTF8(cat);
49704364 2551 symptr->patptr = savsym.grpbeg;
f337b084
TH
2552 beglist = pack_rec(cat, symptr, beglist, endlist);
2553 if (SvUTF8(cat) != was_utf8)
2554 /* This had better be an upgrade while in utf8==0 mode */
2555 utf8 = 1;
2556
49704364 2557 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2558 break; /* No way to continue */
2559 }
f337b084
TH
2560 lookahead.flags = symptr->flags & ~group_modifiers;
2561 goto no_change;
18529408 2562 }
62f95557
IZ
2563 case 'X' | TYPE_IS_SHRIEKING:
2564 if (!len) /* Avoid division by 0 */
2565 len = 1;
f337b084
TH
2566 if (utf8) {
2567 char *hop, *last;
2568 I32 l = len;
2569 hop = last = start;
2570 while (hop < cur) {
2571 hop += UTF8SKIP(hop);
2572 if (--l == 0) {
2573 last = hop;
2574 l = len;
2575 }
2576 }
2577 if (last > cur)
2578 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2579 cur = last;
2580 break;
2581 }
2582 len = (cur-start) % len;
62f95557 2583 /* FALL THROUGH */
a6ec74c1 2584 case 'X':
f337b084
TH
2585 if (utf8) {
2586 if (len < 1) goto no_change;
2587 while (len > 0) {
2588 if (cur <= start)
2589 Perl_croak(aTHX_ "'X' outside of string in pack");
2590 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2591 if (cur <= start)
2592 Perl_croak(aTHX_ "'X' outside of string in pack");
2593 }
2594 len--;
2595 }
2596 } else {
fc241834 2597 shrink:
f337b084 2598 if (cur - start < len)
fc241834 2599 Perl_croak(aTHX_ "'X' outside of string in pack");
f337b084
TH
2600 cur -= len;
2601 }
2602 if (cur < start+symptr->strbeg) {
2603 /* Make sure group starts don't point into the void */
2604 tempsym_t *group;
2605 STRLEN length = cur-start;
2606 for (group = symptr;
2607 group && length < group->strbeg;
2608 group = group->previous) group->strbeg = length;
2609 lookahead.strbeg = length;
2610 }
a6ec74c1 2611 break;
fc241834
RGS
2612 case 'x' | TYPE_IS_SHRIEKING: {
2613 I32 ai32;
62f95557
IZ
2614 if (!len) /* Avoid division by 0 */
2615 len = 1;
fc241834
RGS
2616 if (utf8) ai32 = utf8_length(start, cur) % len;
2617 else ai32 = (cur - start) % len;
2618 if (ai32 == 0) goto no_change;
2619 len -= ai32;
2620 }
2621 /* FALL THROUGH */
a6ec74c1 2622 case 'x':
f337b084 2623 goto grow;
a6ec74c1
JH
2624 case 'A':
2625 case 'Z':
f337b084
TH
2626 case 'a': {
2627 char *aptr;
2628
a6ec74c1
JH
2629 fromstr = NEXTFROM;
2630 aptr = SvPV(fromstr, fromlen);
f337b084
TH
2631 if (DO_UTF8(fromstr)) {
2632 char *end, *s;
2633
2634 if (!utf8 && !SvUTF8(cat)) {
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2638 utf8 = 1;
2639 start = SvPVX(cat);
2640 cur = start + SvCUR(cat);
2641 }
fc241834 2642 if (howlen == e_star) {
f337b084
TH
2643 if (utf8) goto string_copy;
2644 len = fromlen+1;
2645 }
2646 s = aptr;
2647 end = aptr + fromlen;
2648 fromlen = datumtype == 'Z' ? len-1 : len;
2649 while ((I32) fromlen > 0 && s < end) {
2650 s += UTF8SKIP(s);
2651 fromlen--;
2652 }
2653 if (s > end)
2654 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2655 if (utf8) {
fc241834 2656 len = fromlen;
f337b084
TH
2657 if (datumtype == 'Z') len++;
2658 fromlen = s-aptr;
2659 len += fromlen;
fc241834 2660
f337b084 2661 goto string_copy;
fc241834 2662 }
f337b084
TH
2663 fromlen = len - fromlen;
2664 if (datumtype == 'Z') fromlen--;
2665 if (howlen == e_star) {
2666 len = fromlen;
2667 if (datumtype == 'Z') len++;
fc241834 2668 }
f337b084 2669 GROWING(0, cat, start, cur, len);
fc241834 2670 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2671 datumtype | TYPE_IS_PACK))
2672 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2673 cur += fromlen;
a6ec74c1 2674 len -= fromlen;
f337b084
TH
2675 } else if (utf8) {
2676 if (howlen == e_star) {
2677 len = fromlen;
2678 if (datumtype == 'Z') len++;
a6ec74c1 2679 }
f337b084
TH
2680 if (len <= (I32) fromlen) {
2681 fromlen = len;
2682 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2683 }
fc241834 2684 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2685 upgrade, so:
2686 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2687 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2688 len -= fromlen;
2689 while (fromlen > 0) {
2690 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2691 aptr++;
2692 fromlen--;
fc241834 2693 }
f337b084
TH
2694 } else {
2695 string_copy:
2696 if (howlen == e_star) {
2697 len = fromlen;
2698 if (datumtype == 'Z') len++;
2699 }
2700 if (len <= (I32) fromlen) {
2701 fromlen = len;
2702 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2703 }
f337b084
TH
2704 GROWING(0, cat, start, cur, len);
2705 Copy(aptr, cur, fromlen, char);
2706 cur += fromlen;
2707 len -= fromlen;
a6ec74c1 2708 }
f337b084
TH
2709 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2710 cur += len;
a6ec74c1 2711 break;
f337b084 2712 }
a6ec74c1 2713 case 'B':
f337b084
TH
2714 case 'b': {
2715 char *str, *end;
2716 I32 l, field_len;
2717 U8 bits;
2718 bool utf8_source;
2719 U32 utf8_flags;
a6ec74c1 2720
fc241834
RGS
2721 fromstr = NEXTFROM;
2722 str = SvPV(fromstr, fromlen);
f337b084
TH
2723 end = str + fromlen;
2724 if (DO_UTF8(fromstr)) {
2725 utf8_source = TRUE;
2726 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2727 } else {
2728 utf8_source = FALSE;
2729 utf8_flags = 0; /* Unused, but keep compilers happy */
2730 }
2731 if (howlen == e_star) len = fromlen;
2732 field_len = (len+7)/8;
2733 GROWING(utf8, cat, start, cur, field_len);
2734 if (len > (I32)fromlen) len = fromlen;
2735 bits = 0;
2736 l = 0;
2737 if (datumtype == 'B')
2738 while (l++ < len) {
2739 if (utf8_source) {
2740 UV val;
2741 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2742 bits |= val & 1;
2743 } else bits |= *str++ & 1;
2744 if (l & 7) bits <<= 1;
fc241834 2745 else {
f337b084
TH
2746 PUSH_BYTE(utf8, cur, bits);
2747 bits = 0;
a6ec74c1
JH
2748 }
2749 }
f337b084
TH
2750 else
2751 /* datumtype == 'b' */
2752 while (l++ < len) {
2753 if (utf8_source) {
2754 UV val;
2755 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2756 if (val & 1) bits |= 0x80;
2757 } else if (*str++ & 1)
2758 bits |= 0x80;
2759 if (l & 7) bits >>= 1;
fc241834 2760 else {
f337b084
TH
2761 PUSH_BYTE(utf8, cur, bits);
2762 bits = 0;
a6ec74c1
JH
2763 }
2764 }
f337b084
TH
2765 l--;
2766 if (l & 7) {
fc241834 2767 if (datumtype == 'B')
f337b084 2768 bits <<= 7 - (l & 7);
fc241834 2769 else
f337b084
TH
2770 bits >>= 7 - (l & 7);
2771 PUSH_BYTE(utf8, cur, bits);
2772 l += 7;
a6ec74c1 2773 }
f337b084
TH
2774 /* Determine how many chars are left in the requested field */
2775 l /= 8;
2776 if (howlen == e_star) field_len = 0;
2777 else field_len -= l;
2778 Zero(cur, field_len, char);
2779 cur += field_len;
a6ec74c1 2780 break;
f337b084 2781 }
a6ec74c1 2782 case 'H':
f337b084
TH
2783 case 'h': {
2784 char *str, *end;
2785 I32 l, field_len;
2786 U8 bits;
2787 bool utf8_source;
2788 U32 utf8_flags;
a6ec74c1 2789
fc241834
RGS
2790 fromstr = NEXTFROM;
2791 str = SvPV(fromstr, fromlen);
f337b084
TH
2792 end = str + fromlen;
2793 if (DO_UTF8(fromstr)) {
2794 utf8_source = TRUE;
2795 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2796 } else {
2797 utf8_source = FALSE;
2798 utf8_flags = 0; /* Unused, but keep compilers happy */
2799 }
2800 if (howlen == e_star) len = fromlen;
2801 field_len = (len+1)/2;
2802 GROWING(utf8, cat, start, cur, field_len);
2803 if (!utf8 && len > (I32)fromlen) len = fromlen;
2804 bits = 0;
2805 l = 0;
2806 if (datumtype == 'H')
2807 while (l++ < len) {
2808 if (utf8_source) {
2809 UV val;
2810 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2811 if (val < 256 && isALPHA(val))
2812 bits |= (val + 9) & 0xf;
a6ec74c1 2813 else
f337b084
TH
2814 bits |= val & 0xf;
2815 } else if (isALPHA(*str))
2816 bits |= (*str++ + 9) & 0xf;
2817 else
2818 bits |= *str++ & 0xf;
2819 if (l & 1) bits <<= 4;
fc241834 2820 else {
f337b084
TH
2821 PUSH_BYTE(utf8, cur, bits);
2822 bits = 0;
a6ec74c1
JH
2823 }
2824 }
f337b084
TH
2825 else
2826 while (l++ < len) {
2827 if (utf8_source) {
2828 UV val;
2829 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2830 if (val < 256 && isALPHA(val))
2831 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2832 else
f337b084
TH
2833 bits |= (val & 0xf) << 4;
2834 } else if (isALPHA(*str))
2835 bits |= ((*str++ + 9) & 0xf) << 4;
2836 else
2837 bits |= (*str++ & 0xf) << 4;
2838 if (l & 1) bits >>= 4;
fc241834 2839 else {
f337b084
TH
2840 PUSH_BYTE(utf8, cur, bits);
2841 bits = 0;
a6ec74c1 2842 }
fc241834 2843 }
f337b084
TH
2844 l--;
2845 if (l & 1) {
2846 PUSH_BYTE(utf8, cur, bits);
2847 l++;
2848 }
2849 /* Determine how many chars are left in the requested field */
2850 l /= 2;
2851 if (howlen == e_star) field_len = 0;
2852 else field_len -= l;
2853 Zero(cur, field_len, char);
2854 cur += field_len;
2855 break;
fc241834
RGS
2856 }
2857 case 'c':
f337b084
TH
2858 while (len-- > 0) {
2859 IV aiv;
2860 fromstr = NEXTFROM;
2861 aiv = SvIV(fromstr);
2862 if ((-128 > aiv || aiv > 127) &&
2863 ckWARN(WARN_PACK))
2864 Perl_warner(aTHX_ packWARN(WARN_PACK),
2865 "Character in 'c' format wrapped in pack");
2866 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2867 }
2868 break;
2869 case 'C':
f337b084
TH
2870 if (len == 0) {
2871 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2872 break;
2873 }
2874 GROWING(0, cat, start, cur, len);
a6ec74c1 2875 while (len-- > 0) {
f337b084 2876 IV aiv;
a6ec74c1 2877 fromstr = NEXTFROM;
f337b084
TH
2878 aiv = SvIV(fromstr);
2879 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2880 ckWARN(WARN_PACK))
2881 Perl_warner(aTHX_ packWARN(WARN_PACK),
2882 "Character in 'C' format wrapped in pack");
f337b084
TH
2883 *cur++ = aiv & 0xff;
2884 }
fc241834
RGS
2885 break;
2886 case 'W': {
2887 char *end;
2888 U8 in_bytes = IN_BYTES;
2889
2890 end = start+SvLEN(cat)-1;
2891 if (utf8) end -= UTF8_MAXLEN-1;
2892 while (len-- > 0) {
2893 UV auv;
2894 fromstr = NEXTFROM;
2895 auv = SvUV(fromstr);
2896 if (in_bytes) auv = auv % 0x100;
2897 if (utf8) {
2898 W_utf8:
2899 if (cur > end) {
2900 *cur = '\0';
2901 SvCUR(cat) = cur - start;
2902
2903 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2904 end = start+SvLEN(cat)-UTF8_MAXLEN;
2905 }
2906 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2907 ckWARN(WARN_UTF8) ?
2908 0 : UNICODE_ALLOW_ANY);
2909 } else {
2910 if (auv >= 0x100) {
2911 if (!SvUTF8(cat)) {
2912 *cur = '\0';
2913 SvCUR(cat) = cur - start;
2914 marked_upgrade(aTHX_ cat, symptr);
2915 lookahead.flags |= FLAG_DO_UTF8;
2916 lookahead.strbeg = symptr->strbeg;
2917 utf8 = 1;
2918 start = SvPVX(cat);
2919 cur = start + SvCUR(cat);
2920 end = start+SvLEN(cat)-UTF8_MAXLEN;
2921 goto W_utf8;
2922 }
2923 if (ckWARN(WARN_PACK))
2924 Perl_warner(aTHX_ packWARN(WARN_PACK),
2925 "Character in 'W' format wrapped in pack");
2926 auv &= 0xff;
2927 }
2928 if (cur >= end) {
2929 *cur = '\0';
2930 SvCUR(cat) = cur - start;
2931 GROWING(0, cat, start, cur, len+1);
2932 end = start+SvLEN(cat)-1;
2933 }
2934 *(U8 *) cur++ = auv;
a6ec74c1
JH
2935 }
2936 }
2937 break;
fc241834
RGS
2938 }
2939 case 'U': {
2940 char *end;
2941
2942 if (len == 0) {
2943 if (!(symptr->flags & FLAG_DO_UTF8)) {
2944 marked_upgrade(aTHX_ cat, symptr);
2945 lookahead.flags |= FLAG_DO_UTF8;
2946 lookahead.strbeg = symptr->strbeg;
2947 }
2948 utf8 = 0;
2949 goto no_change;
2950 }
2951
2952 end = start+SvLEN(cat);
2953 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2954 while (len-- > 0) {
fc241834 2955 UV auv;
a6ec74c1 2956 fromstr = NEXTFROM;
fc241834
RGS
2957 auv = SvUV(fromstr);
2958 if (utf8) {
2959 char buffer[UTF8_MAXLEN], *endb;
2960 endb = uvuni_to_utf8_flags(buffer, auv,
2961 ckWARN(WARN_UTF8) ?
2962 0 : UNICODE_ALLOW_ANY);
2963 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2964 *cur = '\0';
2965 SvCUR(cat) = cur - start;
2966 GROWING(0, cat, start, cur,
2967 len+(endb-buffer)*UTF8_EXPAND);
2968 end = start+SvLEN(cat);
2969 }
2970 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2971 } else {
2972 if (cur >= end) {
2973 *cur = '\0';
2974 SvCUR(cat) = cur - start;
2975 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2976 end = start+SvLEN(cat)-UTF8_MAXLEN;
2977 }
2978 cur = uvuni_to_utf8_flags(cur, auv,
2979 ckWARN(WARN_UTF8) ?
2980 0 : UNICODE_ALLOW_ANY);
2981 }
a6ec74c1 2982 }
a6ec74c1 2983 break;
fc241834 2984 }
a6ec74c1
JH
2985 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2986 case 'f':
a6ec74c1 2987 while (len-- > 0) {
f337b084
TH
2988 float afloat;
2989 NV anv;
a6ec74c1 2990 fromstr = NEXTFROM;
f337b084 2991 anv = SvNV(fromstr);
5cdb9e01 2992#ifdef __VOS__
f337b084 2993 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
2994 during conversion from double to float into infinity, so we
2995 do it by hand. This code should either be generalized for
2996 any OS that needs it, or removed if and when VOS implements
2997 posix-976 (suggestion to support mapping to infinity).
2998 Paul.Green@stratus.com 02-04-02. */
f337b084 2999 if (anv > FLT_MAX)
fc241834 3000 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3001 else if (anv < -FLT_MAX)
fc241834 3002 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3003 else afloat = (float) anv;
3004#else /* __VOS__ */
baf3cf9c 3005# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3006 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3007 * on Alpha; fake it if we don't have them.
3008 */
f337b084 3009 if (anv > FLT_MAX)
fc241834 3010 afloat = FLT_MAX;
f337b084 3011 else if (anv < -FLT_MAX)
fc241834 3012 afloat = -FLT_MAX;
f337b084 3013 else afloat = (float)anv;
baf3cf9c 3014# else
f337b084 3015 afloat = (float)anv;
baf3cf9c 3016# endif
f337b084 3017#endif /* __VOS__ */
1109a392 3018 DO_BO_PACK_N(afloat, float);
f337b084 3019 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3020 }
3021 break;
3022 case 'd':
a6ec74c1 3023 while (len-- > 0) {
f337b084
TH
3024 double adouble;
3025 NV anv;
a6ec74c1 3026 fromstr = NEXTFROM;
f337b084 3027 anv = SvNV(fromstr);
5cdb9e01 3028#ifdef __VOS__
f337b084 3029 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3030 during conversion from long double to double into infinity,
3031 so we do it by hand. This code should either be generalized
3032 for any OS that needs it, or removed if and when VOS
3033 implements posix-976 (suggestion to support mapping to
3034 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3035 if (anv > DBL_MAX)
fc241834 3036 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3037 else if (anv < -DBL_MAX)
fc241834 3038 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3039 else adouble = (double) anv;
3040#else /* __VOS__ */
baf3cf9c 3041# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3042 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3043 * on Alpha; fake it if we don't have them.
3044 */
f337b084 3045 if (anv > DBL_MAX)
fc241834 3046 adouble = DBL_MAX;
f337b084 3047 else if (anv < -DBL_MAX)
fc241834 3048 adouble = -DBL_MAX;
f337b084 3049 else adouble = (double)anv;
baf3cf9c 3050# else
f337b084 3051 adouble = (double)anv;
baf3cf9c 3052# endif
f337b084 3053#endif /* __VOS__ */
1109a392 3054 DO_BO_PACK_N(adouble, double);
f337b084 3055 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3056 }
3057 break;
fc241834
RGS
3058 case 'F': {
3059 NV anv;
1109a392 3060 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3061 while (len-- > 0) {
3062 fromstr = NEXTFROM;
3063 anv = SvNV(fromstr);
1109a392 3064 DO_BO_PACK_N(anv, NV);
fc241834 3065 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3066 }
3067 break;
fc241834 3068 }
92d41999 3069#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3070 case 'D': {
3071 long double aldouble;
1109a392
MHM
3072 /* long doubles can have unused bits, which may be nonzero */
3073 Zero(&aldouble, 1, long double);
92d41999
JH
3074 while (len-- > 0) {
3075 fromstr = NEXTFROM;
3076 aldouble = (long double)SvNV(fromstr);
1109a392 3077 DO_BO_PACK_N(aldouble, long double);
fc241834 3078 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3079 }
3080 break;
fc241834 3081 }
92d41999 3082#endif
7212898e 3083#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3084 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3085#endif
a6ec74c1
JH
3086 case 'n':
3087 while (len-- > 0) {
f337b084 3088 I16 ai16;
a6ec74c1 3089 fromstr = NEXTFROM;
ef108786 3090 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3091#ifdef HAS_HTONS
ef108786 3092 ai16 = PerlSock_htons(ai16);
a6ec74c1 3093#endif
f337b084 3094 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3095 }
3096 break;
7212898e 3097#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3098 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3099#endif
a6ec74c1
JH
3100 case 'v':
3101 while (len-- > 0) {
f337b084 3102 I16 ai16;
a6ec74c1 3103 fromstr = NEXTFROM;
ef108786 3104 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3105#ifdef HAS_HTOVS
ef108786 3106 ai16 = htovs(ai16);
a6ec74c1 3107#endif
f337b084 3108 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3109 }
3110 break;
49704364 3111 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3112#if SHORTSIZE != SIZE16
fc241834 3113 while (len-- > 0) {
f337b084 3114 unsigned short aushort;
fc241834
RGS
3115 fromstr = NEXTFROM;
3116 aushort = SvUV(fromstr);
3117 DO_BO_PACK(aushort, s);
f337b084 3118 PUSH_VAR(utf8, cur, aushort);
fc241834 3119 }
49704364
LW
3120 break;
3121#else
3122 /* Fall through! */
a6ec74c1 3123#endif
49704364 3124 case 'S':
fc241834 3125 while (len-- > 0) {
f337b084 3126 U16 au16;
fc241834
RGS
3127 fromstr = NEXTFROM;
3128 au16 = (U16)SvUV(fromstr);
3129 DO_BO_PACK(au16, 16);
f337b084 3130 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3131 }
3132 break;
49704364 3133 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3134#if SHORTSIZE != SIZE16
fc241834 3135 while (len-- > 0) {
f337b084 3136 short ashort;
fc241834
RGS
3137 fromstr = NEXTFROM;
3138 ashort = SvIV(fromstr);
3139 DO_BO_PACK(ashort, s);
f337b084 3140 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3141 }
49704364
LW
3142 break;
3143#else
3144 /* Fall through! */
a6ec74c1 3145#endif
49704364
LW
3146 case 's':
3147 while (len-- > 0) {
f337b084 3148 I16 ai16;
49704364 3149 fromstr = NEXTFROM;
ef108786
MHM
3150 ai16 = (I16)SvIV(fromstr);
3151 DO_BO_PACK(ai16, 16);
f337b084 3152 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3153 }
3154 break;
3155 case 'I':
49704364 3156 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3157 while (len-- > 0) {
f337b084 3158 unsigned int auint;
a6ec74c1
JH
3159 fromstr = NEXTFROM;
3160 auint = SvUV(fromstr);
1109a392 3161 DO_BO_PACK(auint, i);
f337b084 3162 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3163 }
3164 break;
92d41999
JH
3165 case 'j':
3166 while (len-- > 0) {
f337b084 3167 IV aiv;
92d41999
JH
3168 fromstr = NEXTFROM;
3169 aiv = SvIV(fromstr);
1109a392
MHM
3170#if IVSIZE == INTSIZE
3171 DO_BO_PACK(aiv, i);
3172#elif IVSIZE == LONGSIZE
3173 DO_BO_PACK(aiv, l);
3174#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3175 DO_BO_PACK(aiv, 64);
f337b084
TH
3176#else
3177 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3178#endif
f337b084 3179 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3180 }
3181 break;
3182 case 'J':
3183 while (len-- > 0) {
f337b084 3184 UV auv;
92d41999
JH
3185 fromstr = NEXTFROM;
3186 auv = SvUV(fromstr);
1109a392
MHM
3187#if UVSIZE == INTSIZE
3188 DO_BO_PACK(auv, i);
3189#elif UVSIZE == LONGSIZE
3190 DO_BO_PACK(auv, l);
3191#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3192 DO_BO_PACK(auv, 64);
f337b084
TH
3193#else
3194 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3195#endif
f337b084 3196 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3197 }
3198 break;
a6ec74c1
JH
3199 case 'w':
3200 while (len-- > 0) {
f337b084 3201 NV anv;
a6ec74c1 3202 fromstr = NEXTFROM;
15e9f109 3203 anv = SvNV(fromstr);
a6ec74c1 3204
f337b084
TH
3205 if (anv < 0) {
3206 *cur = '\0';
3207 SvCUR(cat) = cur - start;
49704364 3208 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3209 }
a6ec74c1 3210
196b62db
NC
3211 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3212 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3213 any negative IVs will have already been got by the croak()
3214 above. IOK is untrue for fractions, so we test them
3215 against UV_MAX_P1. */
f337b084
TH
3216 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3217 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3218 char *in = buf + sizeof(buf);
196b62db 3219 UV auv = SvUV(fromstr);
a6ec74c1
JH
3220
3221 do {
eb160463 3222 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3223 auv >>= 7;
3224 } while (auv);
3225 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3226 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3227 in, (buf + sizeof(buf)) - in);
3228 } else if (SvPOKp(fromstr))
3229 goto w_string;
a6ec74c1 3230 else if (SvNOKp(fromstr)) {
0258719b
NC
3231 /* 10**NV_MAX_10_EXP is the largest power of 10
3232 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3233 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3234 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3235 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3236 Some C compilers are strict about integral constant
3237 expressions so we conservatively divide by a slightly
3238 smaller integer instead of multiplying by the exact
3239 floating-point value.
0258719b
NC
3240 */
3241#ifdef NV_MAX_10_EXP
f337b084 3242 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3243 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3244#else
f337b084 3245 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3246 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3247#endif
a6ec74c1
JH
3248 char *in = buf + sizeof(buf);
3249
15e9f109 3250 anv = Perl_floor(anv);
a6ec74c1 3251 do {
15e9f109 3252 NV next = Perl_floor(anv / 128);
a6ec74c1 3253 if (in <= buf) /* this cannot happen ;-) */
49704364 3254 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3255 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3256 anv = next;
3257 } while (anv > 0);
a6ec74c1 3258 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3259 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3260 in, (buf + sizeof(buf)) - in);
3261 } else {
735b914b
JH
3262 char *from, *result, *in;
3263 SV *norm;
3264 STRLEN len;
3265 bool done;
3266
f337b084 3267 w_string:
735b914b
JH
3268 /* Copy string and check for compliance */
3269 from = SvPV(fromstr, len);
3270 if ((norm = is_an_int(from, len)) == NULL)
49704364 3271 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3272
3273 New('w', result, len, char);
3274 in = result + len;
3275 done = FALSE;
f337b084 3276 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3277 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3278 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3279 in, (result + len) - in);
735b914b
JH
3280 Safefree(result);
3281 SvREFCNT_dec(norm); /* free norm */
fc241834 3282 }
a6ec74c1
JH
3283 }
3284 break;
3285 case 'i':
49704364 3286 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3287 while (len-- > 0) {
f337b084 3288 int aint;
a6ec74c1
JH
3289 fromstr = NEXTFROM;
3290 aint = SvIV(fromstr);
1109a392 3291 DO_BO_PACK(aint, i);
f337b084 3292 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3293 }
3294 break;
7212898e 3295#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3296 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3297#endif
a6ec74c1
JH
3298 case 'N':
3299 while (len-- > 0) {
f337b084 3300 U32 au32;
a6ec74c1 3301 fromstr = NEXTFROM;
ef108786 3302 au32 = SvUV(fromstr);
a6ec74c1 3303#ifdef HAS_HTONL
ef108786 3304 au32 = PerlSock_htonl(au32);
a6ec74c1 3305#endif
f337b084 3306 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3307 }
3308 break;
7212898e 3309#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3310 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3311#endif
a6ec74c1
JH
3312 case 'V':
3313 while (len-- > 0) {
f337b084 3314 U32 au32;
a6ec74c1 3315 fromstr = NEXTFROM;
ef108786 3316 au32 = SvUV(fromstr);
a6ec74c1 3317#ifdef HAS_HTOVL
ef108786 3318 au32 = htovl(au32);
a6ec74c1 3319#endif
f337b084 3320 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3321 }
3322 break;
49704364 3323 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3324#if LONGSIZE != SIZE32
fc241834 3325 while (len-- > 0) {
f337b084 3326 unsigned long aulong;
fc241834
RGS
3327 fromstr = NEXTFROM;
3328 aulong = SvUV(fromstr);
3329 DO_BO_PACK(aulong, l);
f337b084 3330 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3331 }
49704364
LW
3332 break;
3333#else
3334 /* Fall though! */
a6ec74c1 3335#endif
49704364 3336 case 'L':
fc241834 3337 while (len-- > 0) {
f337b084 3338 U32 au32;
fc241834
RGS
3339 fromstr = NEXTFROM;
3340 au32 = SvUV(fromstr);
3341 DO_BO_PACK(au32, 32);
f337b084 3342 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3343 }
3344 break;
49704364 3345 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3346#if LONGSIZE != SIZE32
fc241834 3347 while (len-- > 0) {
f337b084 3348 long along;
fc241834
RGS
3349 fromstr = NEXTFROM;
3350 along = SvIV(fromstr);
3351 DO_BO_PACK(along, l);
f337b084 3352 PUSH_VAR(utf8, cur, along);
a6ec74c1 3353 }
49704364
LW
3354 break;
3355#else
3356 /* Fall though! */
a6ec74c1 3357#endif
49704364
LW
3358 case 'l':
3359 while (len-- > 0) {
f337b084 3360 I32 ai32;
49704364 3361 fromstr = NEXTFROM;
ef108786
MHM
3362 ai32 = SvIV(fromstr);
3363 DO_BO_PACK(ai32, 32);
f337b084 3364 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3365 }
3366 break;
3367#ifdef HAS_QUAD
3368 case 'Q':
3369 while (len-- > 0) {
f337b084 3370 Uquad_t auquad;
a6ec74c1 3371 fromstr = NEXTFROM;
f337b084 3372 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3373 DO_BO_PACK(auquad, 64);
f337b084 3374 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3375 }
3376 break;
3377 case 'q':
3378 while (len-- > 0) {
f337b084 3379 Quad_t aquad;
a6ec74c1
JH
3380 fromstr = NEXTFROM;
3381 aquad = (Quad_t)SvIV(fromstr);
1109a392 3382 DO_BO_PACK(aquad, 64);
f337b084 3383 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3384 }
3385 break;
f337b084 3386#endif /* HAS_QUAD */
a6ec74c1
JH
3387 case 'P':
3388 len = 1; /* assume SV is correct length */
f337b084 3389 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3390 /* Fall through! */
a6ec74c1
JH
3391 case 'p':
3392 while (len-- > 0) {
f337b084
TH
3393 char *aptr;
3394
a6ec74c1 3395 fromstr = NEXTFROM;
28a4f200 3396 SvGETMAGIC(fromstr);
3397 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3398 else {
3399 STRLEN n_a;
3400 /* XXX better yet, could spirit away the string to
3401 * a safe spot and hang on to it until the result
3402 * of pack() (and all copies of the result) are
3403 * gone.
3404 */
f337b084
TH
3405 if (ckWARN(WARN_PACK) &&
3406 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3407 !SvREADONLY(fromstr)))) {
9014280d 3408 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3409 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3410 }
3411 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3412 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3413 else
28a4f200 3414 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3415 }
1109a392 3416 DO_BO_PACK_P(aptr);
f337b084 3417 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3418 }
3419 break;
fc241834
RGS
3420 case 'u': {
3421 char *aptr, *aend;
3422 bool from_utf8;
f337b084 3423
a6ec74c1 3424 fromstr = NEXTFROM;
fc241834
RGS
3425 if (len <= 2) len = 45;
3426 else len = len / 3 * 3;
3427 if (len >= 64) {
3428 Perl_warner(aTHX_ packWARN(WARN_PACK),
3429 "Field too wide in 'u' format in pack");
3430 len = 63;
3431 }
a6ec74c1 3432 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3433 from_utf8 = DO_UTF8(fromstr);
3434 if (from_utf8) {
3435 aend = aptr + fromlen;
3436 fromlen = sv_len_utf8(fromstr);
3437 } else aend = NULL; /* Unused, but keep compilers happy */
3438 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3439 while (fromlen > 0) {
fc241834 3440 U8 *end;
a6ec74c1 3441 I32 todo;
fc241834 3442 U8 hunk[1+63/3*4+1];
a6ec74c1 3443
eb160463 3444 if ((I32)fromlen > len)
a6ec74c1
JH
3445 todo = len;
3446 else
3447 todo = fromlen;
fc241834
RGS
3448 if (from_utf8) {
3449 char buffer[64];
3450 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3451 'u' | TYPE_IS_PACK)) {
3452 *cur = '\0';
3453 SvCUR(cat) = cur - start;
3454 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3455 }
3456 end = doencodes(hunk, buffer, todo);
3457 } else {
3458 end = doencodes(hunk, aptr, todo);
3459 aptr += todo;
3460 }
3461 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3462 fromlen -= todo;
3463 }
a6ec74c1
JH
3464 break;
3465 }
f337b084
TH
3466 }
3467 *cur = '\0';
3468 SvCUR(cat) = cur - start;
3469 no_change:
49704364 3470 *symptr = lookahead;
a6ec74c1 3471 }
49704364 3472 return beglist;
18529408
IZ
3473}
3474#undef NEXTFROM
3475
3476
3477PP(pp_pack)
3478{
3479 dSP; dMARK; dORIGMARK; dTARGET;
3480 register SV *cat = TARG;
3481 STRLEN fromlen;
3482 register char *pat = SvPVx(*++MARK, fromlen);
3483 register char *patend = pat + fromlen;
3484
3485 MARK++;
3486 sv_setpvn(cat, "", 0);
f337b084 3487 SvUTF8_off(cat);
18529408 3488
7accc089 3489 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3490
a6ec74c1
JH
3491 SvSETMAGIC(cat);
3492 SP = ORIGMARK;
3493 PUSHs(cat);
3494 RETURN;
3495}
a6ec74c1 3496
73cb7263
NC
3497/*
3498 * Local variables:
3499 * c-indentation-style: bsd
3500 * c-basic-offset: 4
3501 * indent-tabs-mode: t
3502 * End:
3503 *
edf815fd 3504 * vim: shiftwidth=4:
73cb7263 3505*/