This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Extract the generated packprops array into packsizetables.c.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
WL
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
298bc19c 392#include "packsizetables.c"
78d46eaa 393
08ca2aa3 394STATIC U8
f7fe979e 395uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 396{
08ca2aa3 397 STRLEN retlen;
0bcc34c2 398 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 399 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 400 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
401 warnings), but these two mean we make no progress in the string and
402 might enter an infinite loop */
403 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
404 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
405 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 406 if (val >= 0x100) {
a2a5de95
NC
407 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
408 "Character in '%c' format wrapped in unpack",
409 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
410 val &= 0xff;
411 }
412 *s += retlen;
fe2774ed 413 return (U8)val;
08ca2aa3
TH
414}
415
f337b084
TH
416#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
417 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
418 *(U8 *)(s)++)
419
420STATIC bool
f7fe979e 421uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
422{
423 UV val;
424 STRLEN retlen;
f7fe979e 425 const char *from = *s;
08ca2aa3 426 int bad = 0;
f7fe979e 427 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
428 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
429 for (;buf_len > 0; buf_len--) {
430 if (from >= end) return FALSE;
f337b084 431 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
432 if (retlen == (STRLEN) -1 || retlen == 0) {
433 from += UTF8SKIP(from);
434 bad |= 1;
435 } else from += retlen;
436 if (val >= 0x100) {
437 bad |= 2;
438 val &= 0xff;
439 }
fe2774ed 440 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
441 }
442 /* We have enough characters for the buffer. Did we have problems ? */
443 if (bad) {
444 if (bad & 1) {
445 /* Rewalk the string fragment while warning */
f7fe979e 446 const char *ptr;
9e27e96a 447 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
448 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
449 if (ptr >= end) break;
f337b084 450 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
451 }
452 if (from > end) from = end;
453 }
a2a5de95
NC
454 if ((bad & 2))
455 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 456 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
457 "Character(s) in '%c' format wrapped in %s",
458 (int) TYPE_NO_MODIFIERS(datumtype),
459 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
460 }
461 *s = from;
462 return TRUE;
463}
464
465STATIC bool
f7fe979e 466next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 467{
97aff369 468 dVAR;
08ca2aa3 469 STRLEN retlen;
0bcc34c2 470 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
471 if (val >= 0x100 || !ISUUCHAR(val) ||
472 retlen == (STRLEN) -1 || retlen == 0) {
473 *out = 0;
474 return FALSE;
475 }
476 *out = PL_uudmap[val] & 077;
f337b084 477 *s += retlen;
08ca2aa3
TH
478 return TRUE;
479}
78d46eaa 480
64844641 481STATIC char *
14333449 482S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 483 const U8 * const end = start + len;
64844641 484
7918f24d
NC
485 PERL_ARGS_ASSERT_BYTES_TO_UNI;
486
f337b084 487 while (start < end) {
48fa4626
TS
488 const UV uv = NATIVE_TO_ASCII(*start);
489 if (UNI_IS_INVARIANT(uv))
490 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
491 else {
492 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
493 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
494 }
495 start++;
496 }
64844641 497 return dest;
f337b084
TH
498}
499
230e1fce
NC
500#define PUSH_BYTES(utf8, cur, buf, len) \
501STMT_START { \
64844641
AL
502 if (utf8) \
503 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
504 else { \
505 Copy(buf, cur, len, char); \
506 (cur) += (len); \
507 } \
f337b084
TH
508} STMT_END
509
510#define GROWING(utf8, cat, start, cur, in_len) \
511STMT_START { \
512 STRLEN glen = (in_len); \
3473cf63 513 if (utf8) glen *= UTF8_EXPAND; \
f337b084 514 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 515 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
516 (cur) = (start) + SvCUR(cat); \
517 } \
518} STMT_END
519
520#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
521STMT_START { \
f7fe979e 522 const STRLEN glen = (in_len); \
f337b084 523 STRLEN gl = glen; \
3473cf63 524 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
525 if ((cur) + gl >= (start) + SvLEN(cat)) { \
526 *cur = '\0'; \
b162af07 527 SvCUR_set((cat), (cur) - (start)); \
0bd48802 528 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
529 (cur) = (start) + SvCUR(cat); \
530 } \
531 PUSH_BYTES(utf8, cur, buf, glen); \
532} STMT_END
533
534#define PUSH_BYTE(utf8, s, byte) \
535STMT_START { \
536 if (utf8) { \
f7fe979e 537 const U8 au8 = (byte); \
64844641 538 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
539 } else *(U8 *)(s)++ = (byte); \
540} STMT_END
541
542/* Only to be used inside a loop (see the break) */
543#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
544STMT_START { \
545 STRLEN retlen; \
546 if (str >= end) break; \
547 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
548 if (retlen == (STRLEN) -1 || retlen == 0) { \
549 *cur = '\0'; \
550 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
551 } \
552 str += retlen; \
553} STMT_END
554
f7fe979e
AL
555static const char *_action( const tempsym_t* symptr )
556{
10edeb5d 557 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
558}
559
206947d2 560/* Returns the sizeof() struct described by pat */
028d1f6d 561STATIC I32
f337b084 562S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 563{
f337b084 564 I32 total = 0;
206947d2 565
7918f24d
NC
566 PERL_ARGS_ASSERT_MEASURE_STRUCT;
567
49704364 568 while (next_symbol(symptr)) {
f337b084 569 I32 len;
f7fe979e 570 int size;
f337b084
TH
571
572 switch (symptr->howlen) {
fc241834 573 case e_star:
49704364 574 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 575 _action( symptr ) );
49704364 576 break;
f337b084
TH
577 default:
578 /* e_no_len and e_number */
579 len = symptr->length;
580 break;
49704364
WL
581 }
582
a7a3cfaa 583 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 584 if (!size) {
f7fe979e 585 int star;
80a13697
NC
586 /* endianness doesn't influence the size of a type */
587 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
588 default:
589 Perl_croak(aTHX_ "Invalid type '%c' in %s",
590 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 591 _action( symptr ) );
28be1210
TH
592#ifdef PERL_PACK_CAN_SHRIEKSIGN
593 case '.' | TYPE_IS_SHRIEKING:
594 case '@' | TYPE_IS_SHRIEKING:
595#endif
80a13697 596 case '@':
28be1210 597 case '.':
80a13697
NC
598 case '/':
599 case 'U': /* XXXX Is it correct? */
600 case 'w':
601 case 'u':
602 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 603 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 604 _action( symptr ) );
80a13697
NC
605 case '%':
606 size = 0;
607 break;
608 case '(':
fc241834
RGS
609 {
610 tempsym_t savsym = *symptr;
611 symptr->patptr = savsym.grpbeg;
612 symptr->patend = savsym.grpend;
613 /* XXXX Theoretically, we need to measure many times at
614 different positions, since the subexpression may contain
615 alignment commands, but be not of aligned length.
616 Need to detect this and croak(). */
617 size = measure_struct(symptr);
618 *symptr = savsym;
619 break;
620 }
80a13697
NC
621 case 'X' | TYPE_IS_SHRIEKING:
622 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
623 */
624 if (!len) /* Avoid division by 0 */
625 len = 1;
626 len = total % len; /* Assumed: the start is aligned. */
627 /* FALL THROUGH */
628 case 'X':
629 size = -1;
630 if (total < len)
f7fe979e 631 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
632 break;
633 case 'x' | TYPE_IS_SHRIEKING:
634 if (!len) /* Avoid division by 0 */
635 len = 1;
636 star = total % len; /* Assumed: the start is aligned. */
637 if (star) /* Other portable ways? */
638 len = len - star;
639 else
640 len = 0;
641 /* FALL THROUGH */
642 case 'x':
643 case 'A':
644 case 'Z':
645 case 'a':
80a13697
NC
646 size = 1;
647 break;
648 case 'B':
649 case 'b':
650 len = (len + 7)/8;
651 size = 1;
652 break;
653 case 'H':
654 case 'h':
655 len = (len + 1)/2;
656 size = 1;
657 break;
78d46eaa 658
80a13697
NC
659 case 'P':
660 len = 1;
661 size = sizeof(char*);
78d46eaa
NC
662 break;
663 }
206947d2
IZ
664 }
665 total += len * size;
666 }
667 return total;
668}
669
49704364
WL
670
671/* locate matching closing parenthesis or bracket
672 * returns char pointer to char after match, or NULL
673 */
f7fe979e 674STATIC const char *
5aaab254 675S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 676{
7918f24d
NC
677 PERL_ARGS_ASSERT_GROUP_END;
678
49704364 679 while (patptr < patend) {
f7fe979e 680 const char c = *patptr++;
49704364
WL
681
682 if (isSPACE(c))
683 continue;
684 else if (c == ender)
685 return patptr-1;
686 else if (c == '#') {
687 while (patptr < patend && *patptr != '\n')
688 patptr++;
689 continue;
690 } else if (c == '(')
691 patptr = group_end(patptr, patend, ')') + 1;
692 else if (c == '[')
693 patptr = group_end(patptr, patend, ']') + 1;
18529408 694 }
49704364
WL
695 Perl_croak(aTHX_ "No group ending character '%c' found in template",
696 ender);
697 return 0;
18529408
IZ
698}
699
49704364
WL
700
701/* Convert unsigned decimal number to binary.
702 * Expects a pointer to the first digit and address of length variable
703 * Advances char pointer to 1st non-digit char and returns number
fc241834 704 */
f7fe979e 705STATIC const char *
5aaab254 706S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
707{
708 I32 len = *patptr++ - '0';
7918f24d
NC
709
710 PERL_ARGS_ASSERT_GET_NUM;
711
49704364
WL
712 while (isDIGIT(*patptr)) {
713 if (len >= 0x7FFFFFFF/10)
714 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
715 len = (len * 10) + (*patptr++ - '0');
716 }
717 *lenptr = len;
718 return patptr;
719}
720
721/* The marvellous template parsing routine: Using state stored in *symptr,
722 * locates next template code and count
723 */
724STATIC bool
f337b084 725S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 726{
f7fe979e 727 const char* patptr = symptr->patptr;
0bcc34c2 728 const char* const patend = symptr->patend;
49704364 729
7918f24d
NC
730 PERL_ARGS_ASSERT_NEXT_SYMBOL;
731
49704364
WL
732 symptr->flags &= ~FLAG_SLASH;
733
734 while (patptr < patend) {
735 if (isSPACE(*patptr))
736 patptr++;
737 else if (*patptr == '#') {
738 patptr++;
739 while (patptr < patend && *patptr != '\n')
740 patptr++;
741 if (patptr < patend)
742 patptr++;
743 } else {
fc241834 744 /* We should have found a template code */
49704364 745 I32 code = *patptr++ & 0xFF;
66c611c5 746 U32 inherited_modifiers = 0;
49704364
WL
747
748 if (code == ','){ /* grandfather in commas but with a warning */
749 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
750 symptr->flags |= FLAG_COMMA;
751 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 752 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
753 }
754 continue;
755 }
fc241834 756
49704364 757 /* for '(', skip to ')' */
fc241834 758 if (code == '(') {
49704364
WL
759 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
760 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 761 _action( symptr ) );
49704364
WL
762 symptr->grpbeg = patptr;
763 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
764 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
765 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 766 _action( symptr ) );
49704364
WL
767 }
768
66c611c5
MHM
769 /* look for group modifiers to inherit */
770 if (TYPE_ENDIANNESS(symptr->flags)) {
771 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
772 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
773 }
774
1109a392
MHM
775 /* look for modifiers */
776 while (patptr < patend) {
b7787f18
AL
777 const char *allowed;
778 I32 modifier;
1109a392
MHM
779 switch (*patptr) {
780 case '!':
781 modifier = TYPE_IS_SHRIEKING;
7212898e 782 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 783 break;
7212898e 784#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
785 case '>':
786 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 787 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
788 break;
789 case '<':
790 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 791 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 792 break;
f337b084 793#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 794 default:
b7787f18
AL
795 allowed = "";
796 modifier = 0;
1109a392
MHM
797 break;
798 }
66c611c5 799
1109a392
MHM
800 if (modifier == 0)
801 break;
66c611c5 802
1109a392
MHM
803 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
804 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 805 allowed, _action( symptr ) );
66c611c5
MHM
806
807 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 808 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 809 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
810 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
811 TYPE_ENDIANNESS_MASK)
812 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 813 *patptr, _action( symptr ) );
66c611c5 814
a2a5de95
NC
815 if ((code & modifier)) {
816 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
817 "Duplicate modifier '%c' after '%c' in %s",
818 *patptr, (int) TYPE_NO_MODIFIERS(code),
819 _action( symptr ) );
1109a392 820 }
66c611c5 821
1109a392
MHM
822 code |= modifier;
823 patptr++;
49704364
WL
824 }
825
66c611c5
MHM
826 /* inherit modifiers */
827 code |= inherited_modifiers;
828
fc241834 829 /* look for count and/or / */
49704364
WL
830 if (patptr < patend) {
831 if (isDIGIT(*patptr)) {
832 patptr = get_num( patptr, &symptr->length );
833 symptr->howlen = e_number;
834
835 } else if (*patptr == '*') {
836 patptr++;
837 symptr->howlen = e_star;
838
839 } else if (*patptr == '[') {
f7fe979e 840 const char* lenptr = ++patptr;
49704364
WL
841 symptr->howlen = e_number;
842 patptr = group_end( patptr, patend, ']' ) + 1;
843 /* what kind of [] is it? */
844 if (isDIGIT(*lenptr)) {
845 lenptr = get_num( lenptr, &symptr->length );
846 if( *lenptr != ']' )
847 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 848 _action( symptr ) );
49704364
WL
849 } else {
850 tempsym_t savsym = *symptr;
851 symptr->patend = patptr-1;
852 symptr->patptr = lenptr;
853 savsym.length = measure_struct(symptr);
854 *symptr = savsym;
855 }
856 } else {
857 symptr->howlen = e_no_len;
858 symptr->length = 1;
859 }
860
861 /* try to find / */
862 while (patptr < patend) {
863 if (isSPACE(*patptr))
864 patptr++;
865 else if (*patptr == '#') {
866 patptr++;
867 while (patptr < patend && *patptr != '\n')
868 patptr++;
869 if (patptr < patend)
870 patptr++;
871 } else {
66c611c5 872 if (*patptr == '/') {
49704364
WL
873 symptr->flags |= FLAG_SLASH;
874 patptr++;
66c611c5
MHM
875 if (patptr < patend &&
876 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 877 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 878 _action( symptr ) );
49704364
WL
879 }
880 break;
881 }
18529408 882 }
49704364
WL
883 } else {
884 /* at end - no count, no / */
885 symptr->howlen = e_no_len;
886 symptr->length = 1;
887 }
888
889 symptr->code = code;
fc241834 890 symptr->patptr = patptr;
49704364 891 return TRUE;
18529408 892 }
49704364 893 }
fc241834 894 symptr->patptr = patptr;
49704364 895 return FALSE;
18529408
IZ
896}
897
18529408 898/*
fc241834 899 There is no way to cleanly handle the case where we should process the
08ca2aa3 900 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
901 (e.g. estimates like strend-s as an upper bound for the number of
902 characters left wouldn't work). So if we foresee the need of this
903 (pattern starts with U or contains U0), we want to work on the encoded
904 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
905 themselves if they need to do a lot of unpacks like this on it
906*/
fc241834 907STATIC bool
08ca2aa3
TH
908need_utf8(const char *pat, const char *patend)
909{
910 bool first = TRUE;
7918f24d
NC
911
912 PERL_ARGS_ASSERT_NEED_UTF8;
913
08ca2aa3
TH
914 while (pat < patend) {
915 if (pat[0] == '#') {
916 pat++;
f7fe979e 917 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
918 if (!pat) return FALSE;
919 } else if (pat[0] == 'U') {
920 if (first || pat[1] == '0') return TRUE;
921 } else first = FALSE;
922 pat++;
923 }
924 return FALSE;
925}
926
927STATIC char
928first_symbol(const char *pat, const char *patend) {
7918f24d
NC
929 PERL_ARGS_ASSERT_FIRST_SYMBOL;
930
08ca2aa3
TH
931 while (pat < patend) {
932 if (pat[0] != '#') return pat[0];
933 pat++;
f7fe979e 934 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
935 if (!pat) return 0;
936 pat++;
937 }
938 return 0;
939}
940
941/*
7accc089
JH
942=for apidoc unpackstring
943
21ebfc7a
DM
944The engine implementing the unpack() Perl function.
945
946Using the template pat..patend, this function unpacks the string
947s..strend into a number of mortal SVs, which it pushes onto the perl
948argument (@_) stack (so you will need to issue a C<PUTBACK> before and
949C<SPAGAIN> after the call to this function). It returns the number of
950pushed elements.
951
952The strend and patend pointers should point to the byte following the last
953character of each string.
954
955Although this function returns its values on the perl argument stack, it
956doesn't take any parameters from that stack (and thus in particular
957there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
958example).
7accc089
JH
959
960=cut */
961
962I32
f7fe979e 963Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 964{
f7fe979e 965 tempsym_t sym;
08ca2aa3 966
7918f24d
NC
967 PERL_ARGS_ASSERT_UNPACKSTRING;
968
f337b084 969 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
970 else if (need_utf8(pat, patend)) {
971 /* We probably should try to avoid this in case a scalar context call
972 wouldn't get to the "U0" */
973 STRLEN len = strend - s;
230e1fce 974 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
975 SAVEFREEPV(s);
976 strend = s + len;
f337b084 977 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
978 }
979
f337b084
TH
980 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
981 flags |= FLAG_PARSE_UTF8;
08ca2aa3 982
f7fe979e 983 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
984
985 return unpack_rec(&sym, s, s, strend, NULL );
986}
987
4136a0f7 988STATIC I32
f7fe979e 989S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 990{
27da23d5 991 dVAR; dSP;
3297d27d 992 SV *sv = NULL;
f7fe979e 993 const I32 start_sp_offset = SP - PL_stack_base;
49704364 994 howlen_t howlen;
a6ec74c1 995 I32 checksum = 0;
92d41999 996 UV cuv = 0;
a6ec74c1 997 NV cdouble = 0.0;
f337b084 998 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 999 bool beyond = FALSE;
21c16052 1000 bool explicit_length;
9e27e96a 1001 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1002 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1003
1004 PERL_ARGS_ASSERT_UNPACK_REC;
1005
28be1210 1006 symptr->strbeg = s - strbeg;
49704364 1007
49704364 1008 while (next_symbol(symptr)) {
a7a3cfaa 1009 packprops_t props;
9e27e96a 1010 I32 len;
f337b084 1011 I32 datumtype = symptr->code;
206947d2 1012 /* do first one only unless in list context
08ca2aa3 1013 / is implemented by unpacking the count, then popping it from the
206947d2 1014 stack, so must check that we're not in the middle of a / */
49704364 1015 if ( unpack_only_one
206947d2 1016 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1017 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1018 break;
49704364 1019
f337b084 1020 switch (howlen = symptr->howlen) {
fc241834
RGS
1021 case e_star:
1022 len = strend - strbeg; /* long enough */
49704364 1023 break;
f337b084
TH
1024 default:
1025 /* e_no_len and e_number */
1026 len = symptr->length;
1027 break;
49704364 1028 }
18529408 1029
21c16052 1030 explicit_length = TRUE;
a6ec74c1 1031 redo_switch:
49704364 1032 beyond = s >= strend;
a7a3cfaa
TH
1033
1034 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1035 if (props) {
1036 /* props nonzero means we can process this letter. */
9e27e96a
AL
1037 const long size = props & PACK_SIZE_MASK;
1038 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1039 if (len > howmany)
1040 len = howmany;
1041
1042 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1043 if (len && unpack_only_one) len = 1;
1044 EXTEND(SP, len);
1045 EXTEND_MORTAL(len);
78d46eaa
NC
1046 }
1047 }
a7a3cfaa 1048
1109a392 1049 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1050 default:
1109a392 1051 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1052
a6ec74c1 1053 case '%':
49704364 1054 if (howlen == e_no_len)
18529408 1055 len = 16; /* len is not specified */
a6ec74c1 1056 checksum = len;
92d41999 1057 cuv = 0;
a6ec74c1 1058 cdouble = 0;
18529408 1059 continue;
a6ec74c1 1060 break;
18529408
IZ
1061 case '(':
1062 {
49704364 1063 tempsym_t savsym = *symptr;
9e27e96a 1064 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1065 symptr->flags |= group_modifiers;
49704364 1066 symptr->patend = savsym.grpend;
28be1210 1067 symptr->previous = &savsym;
49704364 1068 symptr->level++;
18529408 1069 PUTBACK;
c6f750d1 1070 if (len && unpack_only_one) len = 1;
18529408 1071 while (len--) {
49704364 1072 symptr->patptr = savsym.grpbeg;
f337b084
TH
1073 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1074 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1075 unpack_rec(symptr, s, strbeg, strend, &s);
1076 if (s == strend && savsym.howlen == e_star)
49704364 1077 break; /* No way to continue */
18529408
IZ
1078 }
1079 SPAGAIN;
28be1210 1080 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1081 *symptr = savsym;
18529408
IZ
1082 break;
1083 }
28be1210
TH
1084#ifdef PERL_PACK_CAN_SHRIEKSIGN
1085 case '.' | TYPE_IS_SHRIEKING:
1086#endif
1087 case '.': {
9e27e96a 1088 const char *from;
28be1210
TH
1089 SV *sv;
1090#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1091 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1092#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1093 const bool u8 = utf8;
28be1210
TH
1094#endif
1095 if (howlen == e_star) from = strbeg;
1096 else if (len <= 0) from = s;
1097 else {
1098 tempsym_t *group = symptr;
1099
1100 while (--len && group) group = group->previous;
1101 from = group ? strbeg + group->strbeg : strbeg;
1102 }
1103 sv = from <= s ?
00646304
CB
1104 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1105 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1106 mXPUSHs(sv);
28be1210
TH
1107 break;
1108 }
1109#ifdef PERL_PACK_CAN_SHRIEKSIGN
1110 case '@' | TYPE_IS_SHRIEKING:
1111#endif
a6ec74c1 1112 case '@':
28be1210
TH
1113 s = strbeg + symptr->strbeg;
1114#ifdef PERL_PACK_CAN_SHRIEKSIGN
1115 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1116#else /* PERL_PACK_CAN_SHRIEKSIGN */
1117 if (utf8)
1118#endif
1119 {
08ca2aa3
TH
1120 while (len > 0) {
1121 if (s >= strend)
1122 Perl_croak(aTHX_ "'@' outside of string in unpack");
1123 s += UTF8SKIP(s);
1124 len--;
1125 }
1126 if (s > strend)
1127 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1128 } else {
28be1210 1129 if (strend-s < len)
fc241834 1130 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1131 s += len;
08ca2aa3 1132 }
a6ec74c1 1133 break;
62f95557
IZ
1134 case 'X' | TYPE_IS_SHRIEKING:
1135 if (!len) /* Avoid division by 0 */
1136 len = 1;
08ca2aa3 1137 if (utf8) {
f7fe979e 1138 const char *hop, *last;
f337b084
TH
1139 I32 l = len;
1140 hop = last = strbeg;
1141 while (hop < s) {
1142 hop += UTF8SKIP(hop);
1143 if (--l == 0) {
08ca2aa3 1144 last = hop;
f337b084
TH
1145 l = len;
1146 }
fc241834 1147 }
f337b084
TH
1148 if (last > s)
1149 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1150 s = last;
1151 break;
f337b084
TH
1152 }
1153 len = (s - strbeg) % len;
62f95557 1154 /* FALL THROUGH */
a6ec74c1 1155 case 'X':
08ca2aa3
TH
1156 if (utf8) {
1157 while (len > 0) {
1158 if (s <= strbeg)
1159 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1160 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1161 if (s <= strbeg)
1162 Perl_croak(aTHX_ "'X' outside of string in unpack");
1163 }
1164 len--;
1165 }
1166 } else {
fc241834
RGS
1167 if (len > s - strbeg)
1168 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1169 s -= len;
08ca2aa3 1170 }
a6ec74c1 1171 break;
9e27e96a
AL
1172 case 'x' | TYPE_IS_SHRIEKING: {
1173 I32 ai32;
62f95557
IZ
1174 if (!len) /* Avoid division by 0 */
1175 len = 1;
230e1fce
NC
1176 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1177 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1178 if (ai32 == 0) break;
1179 len -= ai32;
9e27e96a 1180 }
62f95557 1181 /* FALL THROUGH */
a6ec74c1 1182 case 'x':
08ca2aa3
TH
1183 if (utf8) {
1184 while (len>0) {
1185 if (s >= strend)
1186 Perl_croak(aTHX_ "'x' outside of string in unpack");
1187 s += UTF8SKIP(s);
1188 len--;
1189 }
1190 } else {
fc241834
RGS
1191 if (len > strend - s)
1192 Perl_croak(aTHX_ "'x' outside of string in unpack");
1193 s += len;
f337b084 1194 }
a6ec74c1
JH
1195 break;
1196 case '/':
49704364
WL
1197 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1198 break;
a6ec74c1
JH
1199 case 'A':
1200 case 'Z':
1201 case 'a':
08ca2aa3
TH
1202 if (checksum) {
1203 /* Preliminary length estimate is assumed done in 'W' */
1204 if (len > strend - s) len = strend - s;
1205 goto W_checksum;
1206 }
1207 if (utf8) {
1208 I32 l;
f7fe979e 1209 const char *hop;
08ca2aa3
TH
1210 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1211 if (hop >= strend) {
1212 if (hop > strend)
1213 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1214 break;
fc241834 1215 }
a6ec74c1 1216 }
08ca2aa3
TH
1217 if (hop > strend)
1218 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1219 len = hop - s;
1220 } else if (len > strend - s)
1221 len = strend - s;
1222
1223 if (datumtype == 'Z') {
1224 /* 'Z' strips stuff after first null */
f7fe979e 1225 const char *ptr, *end;
f337b084
TH
1226 end = s + len;
1227 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1228 sv = newSVpvn(s, ptr-s);
1229 if (howlen == e_star) /* exact for 'Z*' */
1230 len = ptr-s + (ptr != strend ? 1 : 0);
1231 } else if (datumtype == 'A') {
1232 /* 'A' strips both nulls and spaces */
f7fe979e 1233 const char *ptr;
18bdf90a
TH
1234 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1235 for (ptr = s+len-1; ptr >= s; ptr--)
1236 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1237 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1238 if (ptr >= s) ptr += UTF8SKIP(ptr);
1239 else ptr++;
28be1210 1240 if (ptr > s+len)
18bdf90a
TH
1241 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1242 } else {
1243 for (ptr = s+len-1; ptr >= s; ptr--)
1244 if (*ptr != 0 && !isSPACE(*ptr)) break;
1245 ptr++;
1246 }
08ca2aa3
TH
1247 sv = newSVpvn(s, ptr-s);
1248 } else sv = newSVpvn(s, len);
1249
1250 if (utf8) {
1251 SvUTF8_on(sv);
1252 /* Undo any upgrade done due to need_utf8() */
f337b084 1253 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1254 sv_utf8_downgrade(sv, 0);
a6ec74c1 1255 }
6e449a3a 1256 mXPUSHs(sv);
08ca2aa3 1257 s += len;
a6ec74c1
JH
1258 break;
1259 case 'B':
08ca2aa3
TH
1260 case 'b': {
1261 char *str;
49704364 1262 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1263 len = (strend - s) * 8;
1264 if (checksum) {
f337b084 1265 if (utf8)
08ca2aa3 1266 while (len >= 8 && s < strend) {
f337b084 1267 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1268 len -= 8;
1269 }
f337b084 1270 else
fc241834 1271 while (len >= 8) {
08ca2aa3 1272 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1273 len -= 8;
1274 }
08ca2aa3
TH
1275 if (len && s < strend) {
1276 U8 bits;
f337b084
TH
1277 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1278 if (datumtype == 'b')
a6ec74c1 1279 while (len-- > 0) {
92d41999 1280 if (bits & 1) cuv++;
a6ec74c1
JH
1281 bits >>= 1;
1282 }
f337b084 1283 else
a6ec74c1 1284 while (len-- > 0) {
08ca2aa3 1285 if (bits & 0x80) cuv++;
a6ec74c1
JH
1286 bits <<= 1;
1287 }
fc241834 1288 }
a6ec74c1
JH
1289 break;
1290 }
08ca2aa3 1291
561b68a9 1292 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1293 SvPOK_on(sv);
1294 str = SvPVX(sv);
1295 if (datumtype == 'b') {
f337b084 1296 U8 bits = 0;
f7fe979e 1297 const I32 ai32 = len;
08ca2aa3
TH
1298 for (len = 0; len < ai32; len++) {
1299 if (len & 7) bits >>= 1;
1300 else if (utf8) {
1301 if (s >= strend) break;
f337b084 1302 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1303 } else bits = *(U8 *) s++;
1304 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1305 }
08ca2aa3 1306 } else {
f337b084 1307 U8 bits = 0;
f7fe979e 1308 const I32 ai32 = len;
08ca2aa3
TH
1309 for (len = 0; len < ai32; len++) {
1310 if (len & 7) bits <<= 1;
1311 else if (utf8) {
1312 if (s >= strend) break;
f337b084 1313 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1314 } else bits = *(U8 *) s++;
1315 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1316 }
1317 }
1318 *str = '\0';
aa07b2f6 1319 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1320 XPUSHs(sv);
a6ec74c1 1321 break;
08ca2aa3 1322 }
a6ec74c1 1323 case 'H':
08ca2aa3 1324 case 'h': {
3297d27d 1325 char *str = NULL;
fc241834 1326 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1327 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1328 len = (strend - s) * 2;
858fe5e1
TC
1329 if (!checksum) {
1330 sv = sv_2mortal(newSV(len ? len : 1));
1331 SvPOK_on(sv);
1332 str = SvPVX(sv);
1333 }
a6ec74c1 1334 if (datumtype == 'h') {
f337b084 1335 U8 bits = 0;
9e27e96a 1336 I32 ai32 = len;
fc241834
RGS
1337 for (len = 0; len < ai32; len++) {
1338 if (len & 1) bits >>= 4;
1339 else if (utf8) {
1340 if (s >= strend) break;
f337b084 1341 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1342 } else bits = * (U8 *) s++;
858fe5e1
TC
1343 if (!checksum)
1344 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1345 }
08ca2aa3 1346 } else {
f337b084 1347 U8 bits = 0;
f7fe979e 1348 const I32 ai32 = len;
08ca2aa3
TH
1349 for (len = 0; len < ai32; len++) {
1350 if (len & 1) bits <<= 4;
1351 else if (utf8) {
1352 if (s >= strend) break;
f337b084 1353 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1354 } else bits = *(U8 *) s++;
858fe5e1
TC
1355 if (!checksum)
1356 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1357 }
1358 }
858fe5e1
TC
1359 if (!checksum) {
1360 *str = '\0';
1361 SvCUR_set(sv, str - SvPVX_const(sv));
1362 XPUSHs(sv);
1363 }
a6ec74c1 1364 break;
08ca2aa3 1365 }
1651fc44
ML
1366 case 'C':
1367 if (len == 0) {
1368 if (explicit_length)
1369 /* Switch to "character" mode */
1370 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1371 break;
1372 }
1373 /* FALL THROUGH */
a6ec74c1 1374 case 'c':
1651fc44
ML
1375 while (len-- > 0 && s < strend) {
1376 int aint;
1377 if (utf8)
1378 {
1379 STRLEN retlen;
1380 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1381 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1382 if (retlen == (STRLEN) -1 || retlen == 0)
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1384 s += retlen;
1385 }
1386 else
1387 aint = *(U8 *)(s)++;
1388 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1389 aint -= 256;
08ca2aa3 1390 if (!checksum)
6e449a3a 1391 mPUSHi(aint);
73cb7263
NC
1392 else if (checksum > bits_in_uv)
1393 cdouble += (NV)aint;
1394 else
1395 cuv += aint;
a6ec74c1
JH
1396 }
1397 break;
08ca2aa3
TH
1398 case 'W':
1399 W_checksum:
1651fc44 1400 if (utf8) {
08ca2aa3 1401 while (len-- > 0 && s < strend) {
08ca2aa3 1402 STRLEN retlen;
f7fe979e 1403 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1404 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1405 if (retlen == (STRLEN) -1 || retlen == 0)
1406 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1407 s += retlen;
1408 if (!checksum)
6e449a3a 1409 mPUSHu(val);
08ca2aa3
TH
1410 else if (checksum > bits_in_uv)
1411 cdouble += (NV) val;
d6d3e8bd 1412 else
08ca2aa3 1413 cuv += val;
fc241834 1414 }
08ca2aa3 1415 } else if (!checksum)
a6ec74c1 1416 while (len-- > 0) {
f7fe979e 1417 const U8 ch = *(U8 *) s++;
6e449a3a 1418 mPUSHu(ch);
a6ec74c1 1419 }
08ca2aa3
TH
1420 else if (checksum > bits_in_uv)
1421 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1422 else
1423 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1424 break;
1425 case 'U':
35bcd338 1426 if (len == 0) {
c5333953 1427 if (explicit_length && howlen != e_star) {
08ca2aa3 1428 /* Switch to "bytes in UTF-8" mode */
f337b084 1429 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1430 else
1431 /* Should be impossible due to the need_utf8() test */
1432 Perl_croak(aTHX_ "U0 mode on a byte string");
1433 }
35bcd338
JH
1434 break;
1435 }
08ca2aa3 1436 if (len > strend - s) len = strend - s;
fc241834 1437 if (!checksum) {
08ca2aa3
TH
1438 if (len && unpack_only_one) len = 1;
1439 EXTEND(SP, len);
1440 EXTEND_MORTAL(len);
fc241834 1441 }
08ca2aa3
TH
1442 while (len-- > 0 && s < strend) {
1443 STRLEN retlen;
1444 UV auv;
1445 if (utf8) {
1446 U8 result[UTF8_MAXLEN];
f7fe979e 1447 const char *ptr = s;
08ca2aa3 1448 STRLEN len;
08ca2aa3
TH
1449 /* Bug: warns about bad utf8 even if we are short on bytes
1450 and will break out of the loop */
230e1fce
NC
1451 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1452 'U'))
08ca2aa3
TH
1453 break;
1454 len = UTF8SKIP(result);
fc241834 1455 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1456 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1457 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1458 s = ptr;
1459 } else {
cde84f2a 1460 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1461 if (retlen == (STRLEN) -1 || retlen == 0)
1462 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1463 s += retlen;
1464 }
1465 if (!checksum)
6e449a3a 1466 mPUSHu(auv);
73cb7263 1467 else if (checksum > bits_in_uv)
08ca2aa3 1468 cdouble += (NV) auv;
73cb7263 1469 else
08ca2aa3 1470 cuv += auv;
a6ec74c1
JH
1471 }
1472 break;
49704364
WL
1473 case 's' | TYPE_IS_SHRIEKING:
1474#if SHORTSIZE != SIZE16
73cb7263 1475 while (len-- > 0) {
08ca2aa3 1476 short ashort;
f337b084
TH
1477 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1478 DO_BO_UNPACK(ashort, s);
08ca2aa3 1479 if (!checksum)
6e449a3a 1480 mPUSHi(ashort);
73cb7263
NC
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)ashort;
1483 else
1484 cuv += ashort;
49704364
WL
1485 }
1486 break;
1487#else
1488 /* Fallthrough! */
a6ec74c1 1489#endif
49704364 1490 case 's':
73cb7263 1491 while (len-- > 0) {
08ca2aa3
TH
1492 I16 ai16;
1493
1494#if U16SIZE > SIZE16
1495 ai16 = 0;
1496#endif
f337b084 1497 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1498 DO_BO_UNPACK(ai16, 16);
1109a392 1499#if U16SIZE > SIZE16
73cb7263
NC
1500 if (ai16 > 32767)
1501 ai16 -= 65536;
a6ec74c1 1502#endif
08ca2aa3 1503 if (!checksum)
6e449a3a 1504 mPUSHi(ai16);
73cb7263
NC
1505 else if (checksum > bits_in_uv)
1506 cdouble += (NV)ai16;
1507 else
1508 cuv += ai16;
a6ec74c1
JH
1509 }
1510 break;
49704364
WL
1511 case 'S' | TYPE_IS_SHRIEKING:
1512#if SHORTSIZE != SIZE16
73cb7263 1513 while (len-- > 0) {
08ca2aa3 1514 unsigned short aushort;
f337b084
TH
1515 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1516 DO_BO_UNPACK(aushort, s);
08ca2aa3 1517 if (!checksum)
6e449a3a 1518 mPUSHu(aushort);
73cb7263
NC
1519 else if (checksum > bits_in_uv)
1520 cdouble += (NV)aushort;
1521 else
1522 cuv += aushort;
49704364
WL
1523 }
1524 break;
1525#else
486ec47a 1526 /* Fallthrough! */
49704364 1527#endif
a6ec74c1
JH
1528 case 'v':
1529 case 'n':
1530 case 'S':
73cb7263 1531 while (len-- > 0) {
08ca2aa3
TH
1532 U16 au16;
1533#if U16SIZE > SIZE16
1534 au16 = 0;
1535#endif
f337b084 1536 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1537 DO_BO_UNPACK(au16, 16);
a6ec74c1 1538#ifdef HAS_NTOHS
73cb7263
NC
1539 if (datumtype == 'n')
1540 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1541#endif
1542#ifdef HAS_VTOHS
73cb7263
NC
1543 if (datumtype == 'v')
1544 au16 = vtohs(au16);
a6ec74c1 1545#endif
08ca2aa3 1546 if (!checksum)
6e449a3a 1547 mPUSHu(au16);
73cb7263 1548 else if (checksum > bits_in_uv)
f337b084 1549 cdouble += (NV) au16;
73cb7263
NC
1550 else
1551 cuv += au16;
a6ec74c1
JH
1552 }
1553 break;
7212898e 1554#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1555 case 'v' | TYPE_IS_SHRIEKING:
1556 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1557 while (len-- > 0) {
08ca2aa3
TH
1558 I16 ai16;
1559# if U16SIZE > SIZE16
1560 ai16 = 0;
1561# endif
f337b084 1562 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1563# ifdef HAS_NTOHS
73cb7263 1564 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1565 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1566# endif /* HAS_NTOHS */
1567# ifdef HAS_VTOHS
73cb7263 1568 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1569 ai16 = (I16) vtohs((U16) ai16);
1570# endif /* HAS_VTOHS */
1571 if (!checksum)
6e449a3a 1572 mPUSHi(ai16);
73cb7263 1573 else if (checksum > bits_in_uv)
08ca2aa3 1574 cdouble += (NV) ai16;
73cb7263
NC
1575 else
1576 cuv += ai16;
068bd2e7
MHM
1577 }
1578 break;
08ca2aa3 1579#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1580 case 'i':
49704364 1581 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1582 while (len-- > 0) {
08ca2aa3 1583 int aint;
f337b084
TH
1584 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1585 DO_BO_UNPACK(aint, i);
08ca2aa3 1586 if (!checksum)
6e449a3a 1587 mPUSHi(aint);
73cb7263
NC
1588 else if (checksum > bits_in_uv)
1589 cdouble += (NV)aint;
1590 else
1591 cuv += aint;
a6ec74c1
JH
1592 }
1593 break;
1594 case 'I':
49704364 1595 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1596 while (len-- > 0) {
08ca2aa3 1597 unsigned int auint;
f337b084
TH
1598 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1599 DO_BO_UNPACK(auint, i);
08ca2aa3 1600 if (!checksum)
6e449a3a 1601 mPUSHu(auint);
73cb7263
NC
1602 else if (checksum > bits_in_uv)
1603 cdouble += (NV)auint;
1604 else
1605 cuv += auint;
a6ec74c1
JH
1606 }
1607 break;
92d41999 1608 case 'j':
73cb7263 1609 while (len-- > 0) {
08ca2aa3 1610 IV aiv;
f337b084 1611 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1612#if IVSIZE == INTSIZE
f337b084 1613 DO_BO_UNPACK(aiv, i);
1109a392 1614#elif IVSIZE == LONGSIZE
f337b084 1615 DO_BO_UNPACK(aiv, l);
1109a392 1616#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1617 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1618#else
1619 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1620#endif
08ca2aa3 1621 if (!checksum)
6e449a3a 1622 mPUSHi(aiv);
73cb7263
NC
1623 else if (checksum > bits_in_uv)
1624 cdouble += (NV)aiv;
1625 else
1626 cuv += aiv;
92d41999
JH
1627 }
1628 break;
1629 case 'J':
73cb7263 1630 while (len-- > 0) {
08ca2aa3 1631 UV auv;
f337b084 1632 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1633#if IVSIZE == INTSIZE
f337b084 1634 DO_BO_UNPACK(auv, i);
08ca2aa3 1635#elif IVSIZE == LONGSIZE
f337b084 1636 DO_BO_UNPACK(auv, l);
08ca2aa3 1637#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1638 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1639#else
1640 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1641#endif
08ca2aa3 1642 if (!checksum)
6e449a3a 1643 mPUSHu(auv);
73cb7263
NC
1644 else if (checksum > bits_in_uv)
1645 cdouble += (NV)auv;
1646 else
1647 cuv += auv;
92d41999
JH
1648 }
1649 break;
49704364
WL
1650 case 'l' | TYPE_IS_SHRIEKING:
1651#if LONGSIZE != SIZE32
73cb7263 1652 while (len-- > 0) {
08ca2aa3 1653 long along;
f337b084
TH
1654 SHIFT_VAR(utf8, s, strend, along, datumtype);
1655 DO_BO_UNPACK(along, l);
08ca2aa3 1656 if (!checksum)
6e449a3a 1657 mPUSHi(along);
73cb7263
NC
1658 else if (checksum > bits_in_uv)
1659 cdouble += (NV)along;
1660 else
1661 cuv += along;
49704364
WL
1662 }
1663 break;
1664#else
1665 /* Fallthrough! */
a6ec74c1 1666#endif
49704364 1667 case 'l':
73cb7263 1668 while (len-- > 0) {
08ca2aa3
TH
1669 I32 ai32;
1670#if U32SIZE > SIZE32
1671 ai32 = 0;
1672#endif
f337b084 1673 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1674 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1675#if U32SIZE > SIZE32
08ca2aa3 1676 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1677#endif
08ca2aa3 1678 if (!checksum)
6e449a3a 1679 mPUSHi(ai32);
73cb7263
NC
1680 else if (checksum > bits_in_uv)
1681 cdouble += (NV)ai32;
1682 else
1683 cuv += ai32;
a6ec74c1
JH
1684 }
1685 break;
49704364
WL
1686 case 'L' | TYPE_IS_SHRIEKING:
1687#if LONGSIZE != SIZE32
73cb7263 1688 while (len-- > 0) {
08ca2aa3 1689 unsigned long aulong;
f337b084
TH
1690 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1691 DO_BO_UNPACK(aulong, l);
08ca2aa3 1692 if (!checksum)
6e449a3a 1693 mPUSHu(aulong);
73cb7263
NC
1694 else if (checksum > bits_in_uv)
1695 cdouble += (NV)aulong;
1696 else
1697 cuv += aulong;
49704364
WL
1698 }
1699 break;
1700#else
1701 /* Fall through! */
1702#endif
a6ec74c1
JH
1703 case 'V':
1704 case 'N':
1705 case 'L':
73cb7263 1706 while (len-- > 0) {
08ca2aa3
TH
1707 U32 au32;
1708#if U32SIZE > SIZE32
1709 au32 = 0;
1710#endif
f337b084 1711 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1712 DO_BO_UNPACK(au32, 32);
a6ec74c1 1713#ifdef HAS_NTOHL
73cb7263
NC
1714 if (datumtype == 'N')
1715 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1716#endif
1717#ifdef HAS_VTOHL
73cb7263
NC
1718 if (datumtype == 'V')
1719 au32 = vtohl(au32);
a6ec74c1 1720#endif
08ca2aa3 1721 if (!checksum)
6e449a3a 1722 mPUSHu(au32);
fc241834
RGS
1723 else if (checksum > bits_in_uv)
1724 cdouble += (NV)au32;
1725 else
1726 cuv += au32;
a6ec74c1
JH
1727 }
1728 break;
7212898e 1729#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1730 case 'V' | TYPE_IS_SHRIEKING:
1731 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1732 while (len-- > 0) {
08ca2aa3
TH
1733 I32 ai32;
1734# if U32SIZE > SIZE32
1735 ai32 = 0;
1736# endif
f337b084 1737 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1738# ifdef HAS_NTOHL
73cb7263
NC
1739 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1740 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1741# endif
1742# ifdef HAS_VTOHL
73cb7263
NC
1743 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1744 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1745# endif
1746 if (!checksum)
6e449a3a 1747 mPUSHi(ai32);
73cb7263
NC
1748 else if (checksum > bits_in_uv)
1749 cdouble += (NV)ai32;
1750 else
1751 cuv += ai32;
068bd2e7
MHM
1752 }
1753 break;
08ca2aa3 1754#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1755 case 'p':
a6ec74c1 1756 while (len-- > 0) {
f7fe979e 1757 const char *aptr;
f337b084 1758 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1759 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1760 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1761 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1762 }
1763 break;
1764 case 'w':
a6ec74c1
JH
1765 {
1766 UV auv = 0;
1767 U32 bytes = 0;
fc241834 1768
08ca2aa3
TH
1769 while (len > 0 && s < strend) {
1770 U8 ch;
f337b084 1771 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1772 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1773 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1774 if (ch < 0x80) {
a6ec74c1 1775 bytes = 0;
6e449a3a 1776 mPUSHu(auv);
a6ec74c1
JH
1777 len--;
1778 auv = 0;
08ca2aa3 1779 continue;
a6ec74c1 1780 }
08ca2aa3 1781 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1782 const char *t;
a6ec74c1 1783
f5992bc4 1784 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1785 while (s < strend) {
f337b084 1786 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1787 sv = mul128(sv, (U8)(ch & 0x7f));
1788 if (!(ch & 0x80)) {
a6ec74c1
JH
1789 bytes = 0;
1790 break;
1791 }
1792 }
10516c54 1793 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1794 while (*t == '0')
1795 t++;
1796 sv_chop(sv, t);
6e449a3a 1797 mPUSHs(sv);
a6ec74c1
JH
1798 len--;
1799 auv = 0;
1800 }
1801 }
1802 if ((s >= strend) && bytes)
49704364 1803 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1804 }
1805 break;
1806 case 'P':
49704364
WL
1807 if (symptr->howlen == e_star)
1808 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1809 EXTEND(SP, 1);
2d3e0934 1810 if (s + sizeof(char*) <= strend) {
08ca2aa3 1811 char *aptr;
f337b084 1812 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1813 DO_BO_UNPACK_PC(aptr);
fc241834 1814 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1815 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1816 }
a6ec74c1
JH
1817 break;
1818#ifdef HAS_QUAD
1819 case 'q':
73cb7263 1820 while (len-- > 0) {
08ca2aa3 1821 Quad_t aquad;
f337b084
TH
1822 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1823 DO_BO_UNPACK(aquad, 64);
08ca2aa3 1824 if (!checksum)
6e449a3a
MHM
1825 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1826 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1827 else if (checksum > bits_in_uv)
1828 cdouble += (NV)aquad;
1829 else
1830 cuv += aquad;
1831 }
a6ec74c1
JH
1832 break;
1833 case 'Q':
73cb7263 1834 while (len-- > 0) {
08ca2aa3 1835 Uquad_t auquad;
f337b084
TH
1836 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1837 DO_BO_UNPACK(auquad, 64);
08ca2aa3 1838 if (!checksum)
6e449a3a
MHM
1839 mPUSHs(auquad <= UV_MAX ?
1840 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1841 else if (checksum > bits_in_uv)
1842 cdouble += (NV)auquad;
1843 else
1844 cuv += auquad;
a6ec74c1
JH
1845 }
1846 break;
08ca2aa3 1847#endif /* HAS_QUAD */
a6ec74c1
JH
1848 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1849 case 'f':
73cb7263 1850 while (len-- > 0) {
08ca2aa3 1851 float afloat;
f337b084 1852 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
1853 DO_BO_UNPACK_N(afloat, float);
1854 if (!checksum)
6e449a3a 1855 mPUSHn(afloat);
08ca2aa3 1856 else
73cb7263 1857 cdouble += afloat;
fc241834 1858 }
a6ec74c1
JH
1859 break;
1860 case 'd':
73cb7263 1861 while (len-- > 0) {
08ca2aa3 1862 double adouble;
f337b084 1863 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
1864 DO_BO_UNPACK_N(adouble, double);
1865 if (!checksum)
6e449a3a 1866 mPUSHn(adouble);
08ca2aa3 1867 else
73cb7263 1868 cdouble += adouble;
fc241834 1869 }
a6ec74c1 1870 break;
92d41999 1871 case 'F':
73cb7263 1872 while (len-- > 0) {
275663fa
TC
1873 NV_bytes anv;
1874 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1875 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 1876 if (!checksum)
275663fa 1877 mPUSHn(anv.nv);
08ca2aa3 1878 else
275663fa 1879 cdouble += anv.nv;
fc241834 1880 }
92d41999
JH
1881 break;
1882#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1883 case 'D':
73cb7263 1884 while (len-- > 0) {
275663fa
TC
1885 ld_bytes aldouble;
1886 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1887 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 1888 if (!checksum)
275663fa 1889 mPUSHn(aldouble.ld);
08ca2aa3 1890 else
275663fa 1891 cdouble += aldouble.ld;
92d41999
JH
1892 }
1893 break;
1894#endif
a6ec74c1 1895 case 'u':
858fe5e1 1896 if (!checksum) {
f7fe979e 1897 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1898 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1899 if (l) SvPOK_on(sv);
1900 }
1901 if (utf8) {
1902 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1903 I32 a, b, c, d;
db187877 1904 char hunk[3];
08ca2aa3 1905
08ca2aa3
TH
1906 while (len > 0) {
1907 next_uni_uu(aTHX_ &s, strend, &a);
1908 next_uni_uu(aTHX_ &s, strend, &b);
1909 next_uni_uu(aTHX_ &s, strend, &c);
1910 next_uni_uu(aTHX_ &s, strend, &d);
1911 hunk[0] = (char)((a << 2) | (b >> 4));
1912 hunk[1] = (char)((b << 4) | (c >> 2));
1913 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1914 if (!checksum)
1915 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1916 len -= 3;
1917 }
1918 if (s < strend) {
f7fe979e
AL
1919 if (*s == '\n') {
1920 s++;
1921 }
08ca2aa3
TH
1922 else {
1923 /* possible checksum byte */
f7fe979e
AL
1924 const char *skip = s+UTF8SKIP(s);
1925 if (skip < strend && *skip == '\n')
1926 s = skip+1;
08ca2aa3
TH
1927 }
1928 }
1929 }
1930 } else {
fc241834
RGS
1931 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1932 I32 a, b, c, d;
db187877 1933 char hunk[3];
a6ec74c1 1934
fc241834
RGS
1935 len = PL_uudmap[*(U8*)s++] & 077;
1936 while (len > 0) {
1937 if (s < strend && ISUUCHAR(*s))
1938 a = PL_uudmap[*(U8*)s++] & 077;
1939 else
1940 a = 0;
1941 if (s < strend && ISUUCHAR(*s))
1942 b = PL_uudmap[*(U8*)s++] & 077;
1943 else
1944 b = 0;
1945 if (s < strend && ISUUCHAR(*s))
1946 c = PL_uudmap[*(U8*)s++] & 077;
1947 else
1948 c = 0;
1949 if (s < strend && ISUUCHAR(*s))
1950 d = PL_uudmap[*(U8*)s++] & 077;
1951 else
1952 d = 0;
1953 hunk[0] = (char)((a << 2) | (b >> 4));
1954 hunk[1] = (char)((b << 4) | (c >> 2));
1955 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1956 if (!checksum)
1957 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1958 len -= 3;
1959 }
1960 if (*s == '\n')
1961 s++;
1962 else /* possible checksum byte */
1963 if (s + 1 < strend && s[1] == '\n')
1964 s += 2;
a6ec74c1 1965 }
08ca2aa3 1966 }
858fe5e1
TC
1967 if (!checksum)
1968 XPUSHs(sv);
a6ec74c1
JH
1969 break;
1970 }
49704364 1971
a6ec74c1 1972 if (checksum) {
1109a392 1973 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1974 (checksum > bits_in_uv &&
08ca2aa3
TH
1975 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1976 NV trouble, anv;
a6ec74c1 1977
08ca2aa3 1978 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1979 while (checksum >= 16) {
1980 checksum -= 16;
08ca2aa3 1981 anv *= 65536.0;
a6ec74c1 1982 }
a6ec74c1 1983 while (cdouble < 0.0)
08ca2aa3
TH
1984 cdouble += anv;
1985 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1986 sv = newSVnv(cdouble);
a6ec74c1
JH
1987 }
1988 else {
fa8ec7c1
NC
1989 if (checksum < bits_in_uv) {
1990 UV mask = ((UV)1 << checksum) - 1;
92d41999 1991 cuv &= mask;
a6ec74c1 1992 }
c4c5f44a 1993 sv = newSVuv(cuv);
a6ec74c1 1994 }
6e449a3a 1995 mXPUSHs(sv);
a6ec74c1
JH
1996 checksum = 0;
1997 }
fc241834 1998
49704364
WL
1999 if (symptr->flags & FLAG_SLASH){
2000 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 2001 break;
49704364
WL
2002 if( next_symbol(symptr) ){
2003 if( symptr->howlen == e_number )
2004 Perl_croak(aTHX_ "Count after length/code in unpack" );
2005 if( beyond ){
2006 /* ...end of char buffer then no decent length available */
2007 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2008 } else {
2009 /* take top of stack (hope it's numeric) */
2010 len = POPi;
2011 if( len < 0 )
2012 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2013 }
2014 } else {
2015 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2016 }
2017 datumtype = symptr->code;
21c16052 2018 explicit_length = FALSE;
49704364
WL
2019 goto redo_switch;
2020 }
a6ec74c1 2021 }
49704364 2022
18529408
IZ
2023 if (new_s)
2024 *new_s = s;
2025 PUTBACK;
2026 return SP - PL_stack_base - start_sp_offset;
2027}
2028
2029PP(pp_unpack)
2030{
97aff369 2031 dVAR;
18529408 2032 dSP;
bab9c0ac 2033 dPOPPOPssrl;
18529408
IZ
2034 I32 gimme = GIMME_V;
2035 STRLEN llen;
2036 STRLEN rlen;
5c144d81
NC
2037 const char *pat = SvPV_const(left, llen);
2038 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2039 const char *strend = s + rlen;
2040 const char *patend = pat + llen;
08ca2aa3 2041 I32 cnt;
18529408
IZ
2042
2043 PUTBACK;
7accc089 2044 cnt = unpackstring(pat, patend, s, strend,
49704364 2045 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2046 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2047
18529408
IZ
2048 SPAGAIN;
2049 if ( !cnt && gimme == G_SCALAR )
2050 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2051 RETURN;
2052}
2053
f337b084 2054STATIC U8 *
f7fe979e 2055doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2056{
f337b084 2057 *h++ = PL_uuemap[len];
a6ec74c1 2058 while (len > 2) {
f337b084
TH
2059 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2060 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2061 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2062 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2063 s += 3;
2064 len -= 3;
2065 }
2066 if (len > 0) {
f7fe979e 2067 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2068 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2069 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2070 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2071 *h++ = PL_uuemap[0];
a6ec74c1 2072 }
f337b084
TH
2073 *h++ = '\n';
2074 return h;
a6ec74c1
JH
2075}
2076
2077STATIC SV *
f7fe979e 2078S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2079{
8b6e33c7
AL
2080 SV *result = newSVpvn(s, l);
2081 char *const result_c = SvPV_nolen(result); /* convenience */
2082 char *out = result_c;
2083 bool skip = 1;
2084 bool ignore = 0;
a6ec74c1 2085
7918f24d
NC
2086 PERL_ARGS_ASSERT_IS_AN_INT;
2087
a6ec74c1
JH
2088 while (*s) {
2089 switch (*s) {
2090 case ' ':
2091 break;
2092 case '+':
2093 if (!skip) {
2094 SvREFCNT_dec(result);
2095 return (NULL);
2096 }
2097 break;
2098 case '0':
2099 case '1':
2100 case '2':
2101 case '3':
2102 case '4':
2103 case '5':
2104 case '6':
2105 case '7':
2106 case '8':
2107 case '9':
2108 skip = 0;
2109 if (!ignore) {
2110 *(out++) = *s;
2111 }
2112 break;
2113 case '.':
2114 ignore = 1;
2115 break;
2116 default:
2117 SvREFCNT_dec(result);
2118 return (NULL);
2119 }
2120 s++;
2121 }
2122 *(out++) = '\0';
2123 SvCUR_set(result, out - result_c);
2124 return (result);
2125}
2126
2127/* pnum must be '\0' terminated */
2128STATIC int
2129S_div128(pTHX_ SV *pnum, bool *done)
2130{
8b6e33c7
AL
2131 STRLEN len;
2132 char * const s = SvPV(pnum, len);
2133 char *t = s;
2134 int m = 0;
2135
7918f24d
NC
2136 PERL_ARGS_ASSERT_DIV128;
2137
8b6e33c7
AL
2138 *done = 1;
2139 while (*t) {
2140 const int i = m * 10 + (*t - '0');
2141 const int r = (i >> 7); /* r < 10 */
2142 m = i & 0x7F;
2143 if (r) {
2144 *done = 0;
2145 }
2146 *(t++) = '0' + r;
a6ec74c1 2147 }
8b6e33c7
AL
2148 *(t++) = '\0';
2149 SvCUR_set(pnum, (STRLEN) (t - s));
2150 return (m);
a6ec74c1
JH
2151}
2152
18529408 2153/*
7accc089
JH
2154=for apidoc packlist
2155
2156The engine implementing pack() Perl function.
2157
bfce84ec
AL
2158=cut
2159*/
7accc089
JH
2160
2161void
5aaab254 2162Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2163{
97aff369 2164 dVAR;
aadb217d
JH
2165 tempsym_t sym;
2166
7918f24d
NC
2167 PERL_ARGS_ASSERT_PACKLIST;
2168
f7fe979e 2169 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2170
f337b084
TH
2171 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2172 Also make sure any UTF8 flag is loaded */
56eb0262 2173 SvPV_force_nolen(cat);
bfce84ec
AL
2174 if (DO_UTF8(cat))
2175 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2176
49704364
WL
2177 (void)pack_rec( cat, &sym, beglist, endlist );
2178}
2179
f337b084
TH
2180/* like sv_utf8_upgrade, but also repoint the group start markers */
2181STATIC void
2182marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2183 STRLEN len;
2184 tempsym_t *group;
f7fe979e
AL
2185 const char *from_ptr, *from_start, *from_end, **marks, **m;
2186 char *to_start, *to_ptr;
f337b084
TH
2187
2188 if (SvUTF8(sv)) return;
2189
aa07b2f6 2190 from_start = SvPVX_const(sv);
f337b084
TH
2191 from_end = from_start + SvCUR(sv);
2192 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2193 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2194 if (from_ptr == from_end) {
2195 /* Simple case: no character needs to be changed */
2196 SvUTF8_on(sv);
2197 return;
2198 }
2199
3473cf63 2200 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2201 Newx(to_start, len, char);
f337b084
TH
2202 Copy(from_start, to_start, from_ptr-from_start, char);
2203 to_ptr = to_start + (from_ptr-from_start);
2204
a02a5408 2205 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2206 for (group=sym_ptr; group; group = group->previous)
2207 marks[group->level] = from_start + group->strbeg;
2208 marks[sym_ptr->level+1] = from_end+1;
2209 for (m = marks; *m < from_ptr; m++)
2210 *m = to_start + (*m-from_start);
2211
2212 for (;from_ptr < from_end; from_ptr++) {
2213 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2214 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2215 }
2216 *to_ptr = 0;
2217
2218 while (*m == from_ptr) *m++ = to_ptr;
2219 if (m != marks + sym_ptr->level+1) {
2220 Safefree(marks);
2221 Safefree(to_start);
5637ef5b
NC
2222 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2223 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2224 }
2225 for (group=sym_ptr; group; group = group->previous)
2226 group->strbeg = marks[group->level] - to_start;
2227 Safefree(marks);
2228
2229 if (SvOOK(sv)) {
2230 if (SvIVX(sv)) {
b162af07 2231 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2232 from_start -= SvIVX(sv);
2233 SvIV_set(sv, 0);
2234 }
2235 SvFLAGS(sv) &= ~SVf_OOK;
2236 }
2237 if (SvLEN(sv) != 0)
2238 Safefree(from_start);
f880fe2f 2239 SvPV_set(sv, to_start);
b162af07
SP
2240 SvCUR_set(sv, to_ptr - to_start);
2241 SvLEN_set(sv, len);
f337b084
TH
2242 SvUTF8_on(sv);
2243}
2244
2245/* Exponential string grower. Makes string extension effectively O(n)
2246 needed says how many extra bytes we need (not counting the final '\0')
2247 Only grows the string if there is an actual lack of space
2248*/
2249STATIC char *
0bd48802 2250S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2251 const STRLEN cur = SvCUR(sv);
2252 const STRLEN len = SvLEN(sv);
f337b084 2253 STRLEN extend;
7918f24d
NC
2254
2255 PERL_ARGS_ASSERT_SV_EXP_GROW;
2256
f337b084
TH
2257 if (len - cur > needed) return SvPVX(sv);
2258 extend = needed > len ? needed : len;
2259 return SvGROW(sv, len+extend+1);
2260}
49704364
WL
2261
2262STATIC
2263SV **
f337b084 2264S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2265{
97aff369 2266 dVAR;
49704364 2267 tempsym_t lookahead;
f337b084
TH
2268 I32 items = endlist - beglist;
2269 bool found = next_symbol(symptr);
2270 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2271 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2272
7918f24d
NC
2273 PERL_ARGS_ASSERT_PACK_REC;
2274
f337b084
TH
2275 if (symptr->level == 0 && found && symptr->code == 'U') {
2276 marked_upgrade(aTHX_ cat, symptr);
2277 symptr->flags |= FLAG_DO_UTF8;
2278 utf8 = 0;
49704364 2279 }
f337b084 2280 symptr->strbeg = SvCUR(cat);
49704364
WL
2281
2282 while (found) {
f337b084
TH
2283 SV *fromstr;
2284 STRLEN fromlen;
2285 I32 len;
a0714e2c 2286 SV *lengthcode = NULL;
49704364 2287 I32 datumtype = symptr->code;
f337b084
TH
2288 howlen_t howlen = symptr->howlen;
2289 char *start = SvPVX(cat);
2290 char *cur = start + SvCUR(cat);
49704364 2291
f337b084
TH
2292#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2293
2294 switch (howlen) {
fc241834 2295 case e_star:
f337b084
TH
2296 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2297 0 : items;
2298 break;
2299 default:
2300 /* e_no_len and e_number */
2301 len = symptr->length;
49704364
WL
2302 break;
2303 }
2304
f337b084 2305 if (len) {
a7a3cfaa 2306 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2307
a7a3cfaa
TH
2308 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2309 /* We can process this letter. */
2310 STRLEN size = props & PACK_SIZE_MASK;
2311 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2312 }
f337b084
TH
2313 }
2314
49704364
WL
2315 /* Look ahead for next symbol. Do we have code/code? */
2316 lookahead = *symptr;
2317 found = next_symbol(&lookahead);
246f24af
TH
2318 if (symptr->flags & FLAG_SLASH) {
2319 IV count;
f337b084 2320 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2321 if (strchr("aAZ", lookahead.code)) {
2322 if (lookahead.howlen == e_number) count = lookahead.length;
2323 else {
ce399ba6 2324 if (items > 0) {
48a5da33 2325 count = sv_len_utf8(*beglist);
ce399ba6 2326 }
246f24af
TH
2327 else count = 0;
2328 if (lookahead.code == 'Z') count++;
2329 }
2330 } else {
2331 if (lookahead.howlen == e_number && lookahead.length < items)
2332 count = lookahead.length;
2333 else count = items;
2334 }
2335 lookahead.howlen = e_number;
2336 lookahead.length = count;
2337 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2338 }
49704364 2339
fc241834
RGS
2340 /* Code inside the switch must take care to properly update
2341 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2342 doesn't simply leave using break */
1109a392 2343 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2344 default:
f337b084
TH
2345 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2346 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2347 case '%':
49704364 2348 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2349 {
2350 char *from;
2351#ifdef PERL_PACK_CAN_SHRIEKSIGN
2352 case '.' | TYPE_IS_SHRIEKING:
2353#endif
2354 case '.':
2355 if (howlen == e_star) from = start;
2356 else if (len == 0) from = cur;
2357 else {
2358 tempsym_t *group = symptr;
2359
2360 while (--len && group) group = group->previous;
2361 from = group ? start + group->strbeg : start;
2362 }
2363 fromstr = NEXTFROM;
2364 len = SvIV(fromstr);
2365 goto resize;
2366#ifdef PERL_PACK_CAN_SHRIEKSIGN
2367 case '@' | TYPE_IS_SHRIEKING:
2368#endif
a6ec74c1 2369 case '@':
28be1210
TH
2370 from = start + symptr->strbeg;
2371 resize:
2372#ifdef PERL_PACK_CAN_SHRIEKSIGN
2373 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2374#else /* PERL_PACK_CAN_SHRIEKSIGN */
2375 if (utf8)
2376#endif
2377 if (len >= 0) {
2378 while (len && from < cur) {
2379 from += UTF8SKIP(from);
2380 len--;
2381 }
2382 if (from > cur)
2383 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2384 if (len) {
2385 /* Here we know from == cur */
2386 grow:
2387 GROWING(0, cat, start, cur, len);
2388 Zero(cur, len, char);
2389 cur += len;
2390 } else if (from < cur) {
2391 len = cur - from;
2392 goto shrink;
2393 } else goto no_change;
2394 } else {
2395 cur = from;
2396 len = -len;
2397 goto utf8_shrink;
f337b084 2398 }
28be1210
TH
2399 else {
2400 len -= cur - from;
f337b084 2401 if (len > 0) goto grow;
28be1210 2402 if (len == 0) goto no_change;
fc241834 2403 len = -len;
28be1210 2404 goto shrink;
f337b084 2405 }
a6ec74c1 2406 break;
28be1210 2407 }
fc241834 2408 case '(': {
49704364 2409 tempsym_t savsym = *symptr;
66c611c5
MHM
2410 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2411 symptr->flags |= group_modifiers;
49704364
WL
2412 symptr->patend = savsym.grpend;
2413 symptr->level++;
f337b084 2414 symptr->previous = &lookahead;
18529408 2415 while (len--) {
f337b084
TH
2416 U32 was_utf8;
2417 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2418 else symptr->flags &= ~FLAG_PARSE_UTF8;
2419 was_utf8 = SvUTF8(cat);
49704364 2420 symptr->patptr = savsym.grpbeg;
f337b084
TH
2421 beglist = pack_rec(cat, symptr, beglist, endlist);
2422 if (SvUTF8(cat) != was_utf8)
2423 /* This had better be an upgrade while in utf8==0 mode */
2424 utf8 = 1;
2425
49704364 2426 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2427 break; /* No way to continue */
2428 }
ee790063 2429 items = endlist - beglist;
f337b084
TH
2430 lookahead.flags = symptr->flags & ~group_modifiers;
2431 goto no_change;
18529408 2432 }
62f95557
IZ
2433 case 'X' | TYPE_IS_SHRIEKING:
2434 if (!len) /* Avoid division by 0 */
2435 len = 1;
f337b084
TH
2436 if (utf8) {
2437 char *hop, *last;
2438 I32 l = len;
2439 hop = last = start;
2440 while (hop < cur) {
2441 hop += UTF8SKIP(hop);
2442 if (--l == 0) {
2443 last = hop;
2444 l = len;
2445 }
2446 }
2447 if (last > cur)
2448 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2449 cur = last;
2450 break;
2451 }
2452 len = (cur-start) % len;
62f95557 2453 /* FALL THROUGH */
a6ec74c1 2454 case 'X':
f337b084
TH
2455 if (utf8) {
2456 if (len < 1) goto no_change;
28be1210 2457 utf8_shrink:
f337b084
TH
2458 while (len > 0) {
2459 if (cur <= start)
28be1210
TH
2460 Perl_croak(aTHX_ "'%c' outside of string in pack",
2461 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2462 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2463 if (cur <= start)
28be1210
TH
2464 Perl_croak(aTHX_ "'%c' outside of string in pack",
2465 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2466 }
2467 len--;
2468 }
2469 } else {
fc241834 2470 shrink:
f337b084 2471 if (cur - start < len)
28be1210
TH
2472 Perl_croak(aTHX_ "'%c' outside of string in pack",
2473 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2474 cur -= len;
2475 }
2476 if (cur < start+symptr->strbeg) {
2477 /* Make sure group starts don't point into the void */
2478 tempsym_t *group;
9e27e96a 2479 const STRLEN length = cur-start;
f337b084
TH
2480 for (group = symptr;
2481 group && length < group->strbeg;
2482 group = group->previous) group->strbeg = length;
2483 lookahead.strbeg = length;
2484 }
a6ec74c1 2485 break;
fc241834
RGS
2486 case 'x' | TYPE_IS_SHRIEKING: {
2487 I32 ai32;
62f95557
IZ
2488 if (!len) /* Avoid division by 0 */
2489 len = 1;
230e1fce 2490 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2491 else ai32 = (cur - start) % len;
2492 if (ai32 == 0) goto no_change;
2493 len -= ai32;
2494 }
2495 /* FALL THROUGH */
a6ec74c1 2496 case 'x':
f337b084 2497 goto grow;
a6ec74c1
JH
2498 case 'A':
2499 case 'Z':
f337b084 2500 case 'a': {
f7fe979e 2501 const char *aptr;
f337b084 2502
a6ec74c1 2503 fromstr = NEXTFROM;
e62f0680 2504 aptr = SvPV_const(fromstr, fromlen);
f337b084 2505 if (DO_UTF8(fromstr)) {
f7fe979e 2506 const char *end, *s;
f337b084
TH
2507
2508 if (!utf8 && !SvUTF8(cat)) {
2509 marked_upgrade(aTHX_ cat, symptr);
2510 lookahead.flags |= FLAG_DO_UTF8;
2511 lookahead.strbeg = symptr->strbeg;
2512 utf8 = 1;
2513 start = SvPVX(cat);
2514 cur = start + SvCUR(cat);
2515 }
fc241834 2516 if (howlen == e_star) {
f337b084
TH
2517 if (utf8) goto string_copy;
2518 len = fromlen+1;
2519 }
2520 s = aptr;
2521 end = aptr + fromlen;
2522 fromlen = datumtype == 'Z' ? len-1 : len;
2523 while ((I32) fromlen > 0 && s < end) {
2524 s += UTF8SKIP(s);
2525 fromlen--;
2526 }
2527 if (s > end)
2528 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2529 if (utf8) {
fc241834 2530 len = fromlen;
f337b084
TH
2531 if (datumtype == 'Z') len++;
2532 fromlen = s-aptr;
2533 len += fromlen;
fc241834 2534
f337b084 2535 goto string_copy;
fc241834 2536 }
f337b084
TH
2537 fromlen = len - fromlen;
2538 if (datumtype == 'Z') fromlen--;
2539 if (howlen == e_star) {
2540 len = fromlen;
2541 if (datumtype == 'Z') len++;
fc241834 2542 }
f337b084 2543 GROWING(0, cat, start, cur, len);
fc241834 2544 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2545 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2546 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2547 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2548 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2549 cur += fromlen;
a6ec74c1 2550 len -= fromlen;
f337b084
TH
2551 } else if (utf8) {
2552 if (howlen == e_star) {
2553 len = fromlen;
2554 if (datumtype == 'Z') len++;
a6ec74c1 2555 }
f337b084
TH
2556 if (len <= (I32) fromlen) {
2557 fromlen = len;
2558 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2559 }
fc241834 2560 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2561 upgrade, so:
2562 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2563 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2564 len -= fromlen;
2565 while (fromlen > 0) {
230e1fce 2566 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2567 aptr++;
2568 fromlen--;
fc241834 2569 }
f337b084
TH
2570 } else {
2571 string_copy:
2572 if (howlen == e_star) {
2573 len = fromlen;
2574 if (datumtype == 'Z') len++;
2575 }
2576 if (len <= (I32) fromlen) {
2577 fromlen = len;
2578 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2579 }
f337b084
TH
2580 GROWING(0, cat, start, cur, len);
2581 Copy(aptr, cur, fromlen, char);
2582 cur += fromlen;
2583 len -= fromlen;
a6ec74c1 2584 }
f337b084
TH
2585 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2586 cur += len;
3c4fb04a 2587 SvTAINT(cat);
a6ec74c1 2588 break;
f337b084 2589 }
a6ec74c1 2590 case 'B':
f337b084 2591 case 'b': {
b83604b4 2592 const char *str, *end;
f337b084
TH
2593 I32 l, field_len;
2594 U8 bits;
2595 bool utf8_source;
2596 U32 utf8_flags;
a6ec74c1 2597
fc241834 2598 fromstr = NEXTFROM;
b83604b4 2599 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2600 end = str + fromlen;
2601 if (DO_UTF8(fromstr)) {
2602 utf8_source = TRUE;
041457d9 2603 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2604 } else {
2605 utf8_source = FALSE;
2606 utf8_flags = 0; /* Unused, but keep compilers happy */
2607 }
2608 if (howlen == e_star) len = fromlen;
2609 field_len = (len+7)/8;
2610 GROWING(utf8, cat, start, cur, field_len);
2611 if (len > (I32)fromlen) len = fromlen;
2612 bits = 0;
2613 l = 0;
2614 if (datumtype == 'B')
2615 while (l++ < len) {
2616 if (utf8_source) {
95b63a38 2617 UV val = 0;
f337b084
TH
2618 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2619 bits |= val & 1;
2620 } else bits |= *str++ & 1;
2621 if (l & 7) bits <<= 1;
fc241834 2622 else {
f337b084
TH
2623 PUSH_BYTE(utf8, cur, bits);
2624 bits = 0;
a6ec74c1
JH
2625 }
2626 }
f337b084
TH
2627 else
2628 /* datumtype == 'b' */
2629 while (l++ < len) {
2630 if (utf8_source) {
95b63a38 2631 UV val = 0;
f337b084
TH
2632 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2633 if (val & 1) bits |= 0x80;
2634 } else if (*str++ & 1)
2635 bits |= 0x80;
2636 if (l & 7) bits >>= 1;
fc241834 2637 else {
f337b084
TH
2638 PUSH_BYTE(utf8, cur, bits);
2639 bits = 0;
a6ec74c1
JH
2640 }
2641 }
f337b084
TH
2642 l--;
2643 if (l & 7) {
fc241834 2644 if (datumtype == 'B')
f337b084 2645 bits <<= 7 - (l & 7);
fc241834 2646 else
f337b084
TH
2647 bits >>= 7 - (l & 7);
2648 PUSH_BYTE(utf8, cur, bits);
2649 l += 7;
a6ec74c1 2650 }
f337b084
TH
2651 /* Determine how many chars are left in the requested field */
2652 l /= 8;
2653 if (howlen == e_star) field_len = 0;
2654 else field_len -= l;
2655 Zero(cur, field_len, char);
2656 cur += field_len;
a6ec74c1 2657 break;
f337b084 2658 }
a6ec74c1 2659 case 'H':
f337b084 2660 case 'h': {
10516c54 2661 const char *str, *end;
f337b084
TH
2662 I32 l, field_len;
2663 U8 bits;
2664 bool utf8_source;
2665 U32 utf8_flags;
a6ec74c1 2666
fc241834 2667 fromstr = NEXTFROM;
10516c54 2668 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2669 end = str + fromlen;
2670 if (DO_UTF8(fromstr)) {
2671 utf8_source = TRUE;
041457d9 2672 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2673 } else {
2674 utf8_source = FALSE;
2675 utf8_flags = 0; /* Unused, but keep compilers happy */
2676 }
2677 if (howlen == e_star) len = fromlen;
2678 field_len = (len+1)/2;
2679 GROWING(utf8, cat, start, cur, field_len);
2680 if (!utf8 && len > (I32)fromlen) len = fromlen;
2681 bits = 0;
2682 l = 0;
2683 if (datumtype == 'H')
2684 while (l++ < len) {
2685 if (utf8_source) {
95b63a38 2686 UV val = 0;
f337b084
TH
2687 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2688 if (val < 256 && isALPHA(val))
2689 bits |= (val + 9) & 0xf;
a6ec74c1 2690 else
f337b084
TH
2691 bits |= val & 0xf;
2692 } else if (isALPHA(*str))
2693 bits |= (*str++ + 9) & 0xf;
2694 else
2695 bits |= *str++ & 0xf;
2696 if (l & 1) bits <<= 4;
fc241834 2697 else {
f337b084
TH
2698 PUSH_BYTE(utf8, cur, bits);
2699 bits = 0;
a6ec74c1
JH
2700 }
2701 }
f337b084
TH
2702 else
2703 while (l++ < len) {
2704 if (utf8_source) {
95b63a38 2705 UV val = 0;
f337b084
TH
2706 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2707 if (val < 256 && isALPHA(val))
2708 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2709 else
f337b084
TH
2710 bits |= (val & 0xf) << 4;
2711 } else if (isALPHA(*str))
2712 bits |= ((*str++ + 9) & 0xf) << 4;
2713 else
2714 bits |= (*str++ & 0xf) << 4;
2715 if (l & 1) bits >>= 4;
fc241834 2716 else {
f337b084
TH
2717 PUSH_BYTE(utf8, cur, bits);
2718 bits = 0;
a6ec74c1 2719 }
fc241834 2720 }
f337b084
TH
2721 l--;
2722 if (l & 1) {
2723 PUSH_BYTE(utf8, cur, bits);
2724 l++;
2725 }
2726 /* Determine how many chars are left in the requested field */
2727 l /= 2;
2728 if (howlen == e_star) field_len = 0;
2729 else field_len -= l;
2730 Zero(cur, field_len, char);
2731 cur += field_len;
2732 break;
fc241834
RGS
2733 }
2734 case 'c':
f337b084
TH
2735 while (len-- > 0) {
2736 IV aiv;
2737 fromstr = NEXTFROM;
2738 aiv = SvIV(fromstr);
a2a5de95
NC
2739 if ((-128 > aiv || aiv > 127))
2740 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2741 "Character in 'c' format wrapped in pack");
585ec06d 2742 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2743 }
2744 break;
2745 case 'C':
f337b084
TH
2746 if (len == 0) {
2747 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2748 break;
2749 }
a6ec74c1 2750 while (len-- > 0) {
f337b084 2751 IV aiv;
a6ec74c1 2752 fromstr = NEXTFROM;
f337b084 2753 aiv = SvIV(fromstr);
a2a5de95
NC
2754 if ((0 > aiv || aiv > 0xff))
2755 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2756 "Character in 'C' format wrapped in pack");
1651fc44 2757 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2758 }
fc241834
RGS
2759 break;
2760 case 'W': {
2761 char *end;
670f1322 2762 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2763
2764 end = start+SvLEN(cat)-1;
2765 if (utf8) end -= UTF8_MAXLEN-1;
2766 while (len-- > 0) {
2767 UV auv;
2768 fromstr = NEXTFROM;
2769 auv = SvUV(fromstr);
2770 if (in_bytes) auv = auv % 0x100;
2771 if (utf8) {
2772 W_utf8:
2773 if (cur > end) {
2774 *cur = '\0';
b162af07 2775 SvCUR_set(cat, cur - start);
fc241834
RGS
2776
2777 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2778 end = start+SvLEN(cat)-UTF8_MAXLEN;
2779 }
230e1fce
NC
2780 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2781 NATIVE_TO_UNI(auv),
041457d9 2782 warn_utf8 ?
230e1fce 2783 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2784 } else {
2785 if (auv >= 0x100) {
2786 if (!SvUTF8(cat)) {
2787 *cur = '\0';
b162af07 2788 SvCUR_set(cat, cur - start);
fc241834
RGS
2789 marked_upgrade(aTHX_ cat, symptr);
2790 lookahead.flags |= FLAG_DO_UTF8;
2791 lookahead.strbeg = symptr->strbeg;
2792 utf8 = 1;
2793 start = SvPVX(cat);
2794 cur = start + SvCUR(cat);
2795 end = start+SvLEN(cat)-UTF8_MAXLEN;
2796 goto W_utf8;
2797 }
a2a5de95
NC
2798 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2799 "Character in 'W' format wrapped in pack");
fc241834
RGS
2800 auv &= 0xff;
2801 }
2802 if (cur >= end) {
2803 *cur = '\0';
b162af07 2804 SvCUR_set(cat, cur - start);
fc241834
RGS
2805 GROWING(0, cat, start, cur, len+1);
2806 end = start+SvLEN(cat)-1;
2807 }
fe2774ed 2808 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2809 }
2810 }
2811 break;
fc241834
RGS
2812 }
2813 case 'U': {
2814 char *end;
2815
2816 if (len == 0) {
2817 if (!(symptr->flags & FLAG_DO_UTF8)) {
2818 marked_upgrade(aTHX_ cat, symptr);
2819 lookahead.flags |= FLAG_DO_UTF8;
2820 lookahead.strbeg = symptr->strbeg;
2821 }
2822 utf8 = 0;
2823 goto no_change;
2824 }
2825
2826 end = start+SvLEN(cat);
2827 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2828 while (len-- > 0) {
fc241834 2829 UV auv;
a6ec74c1 2830 fromstr = NEXTFROM;
fc241834
RGS
2831 auv = SvUV(fromstr);
2832 if (utf8) {
230e1fce 2833 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2834 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2835 warn_utf8 ?
fc241834
RGS
2836 0 : UNICODE_ALLOW_ANY);
2837 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2838 *cur = '\0';
b162af07 2839 SvCUR_set(cat, cur - start);
fc241834
RGS
2840 GROWING(0, cat, start, cur,
2841 len+(endb-buffer)*UTF8_EXPAND);
2842 end = start+SvLEN(cat);
2843 }
64844641 2844 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2845 } else {
2846 if (cur >= end) {
2847 *cur = '\0';
b162af07 2848 SvCUR_set(cat, cur - start);
fc241834
RGS
2849 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2850 end = start+SvLEN(cat)-UTF8_MAXLEN;
2851 }
230e1fce 2852 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2853 warn_utf8 ?
230e1fce 2854 0 : UNICODE_ALLOW_ANY);
fc241834 2855 }
a6ec74c1 2856 }
a6ec74c1 2857 break;
fc241834 2858 }
a6ec74c1
JH
2859 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2860 case 'f':
a6ec74c1 2861 while (len-- > 0) {
f337b084
TH
2862 float afloat;
2863 NV anv;
a6ec74c1 2864 fromstr = NEXTFROM;
f337b084 2865 anv = SvNV(fromstr);
85bba25f 2866# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2867 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2868 * on Alpha; fake it if we don't have them.
2869 */
f337b084 2870 if (anv > FLT_MAX)
fc241834 2871 afloat = FLT_MAX;
f337b084 2872 else if (anv < -FLT_MAX)
fc241834 2873 afloat = -FLT_MAX;
f337b084 2874 else afloat = (float)anv;
baf3cf9c 2875# else
f337b084 2876 afloat = (float)anv;
baf3cf9c 2877# endif
1109a392 2878 DO_BO_PACK_N(afloat, float);
f337b084 2879 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2880 }
2881 break;
2882 case 'd':
a6ec74c1 2883 while (len-- > 0) {
f337b084
TH
2884 double adouble;
2885 NV anv;
a6ec74c1 2886 fromstr = NEXTFROM;
f337b084 2887 anv = SvNV(fromstr);
85bba25f 2888# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2889 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2890 * on Alpha; fake it if we don't have them.
2891 */
f337b084 2892 if (anv > DBL_MAX)
fc241834 2893 adouble = DBL_MAX;
f337b084 2894 else if (anv < -DBL_MAX)
fc241834 2895 adouble = -DBL_MAX;
f337b084 2896 else adouble = (double)anv;
baf3cf9c 2897# else
f337b084 2898 adouble = (double)anv;
baf3cf9c 2899# endif
1109a392 2900 DO_BO_PACK_N(adouble, double);
f337b084 2901 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2902 }
2903 break;
fc241834 2904 case 'F': {
275663fa 2905 NV_bytes anv;
1109a392 2906 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2907 while (len-- > 0) {
2908 fromstr = NEXTFROM;
cd07c537
DM
2909#ifdef __GNUC__
2910 /* to work round a gcc/x86 bug; don't use SvNV */
2911 anv.nv = sv_2nv(fromstr);
2912#else
275663fa 2913 anv.nv = SvNV(fromstr);
cd07c537 2914#endif
1109a392 2915 DO_BO_PACK_N(anv, NV);
275663fa 2916 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2917 }
2918 break;
fc241834 2919 }
92d41999 2920#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2921 case 'D': {
275663fa 2922 ld_bytes aldouble;
1109a392
MHM
2923 /* long doubles can have unused bits, which may be nonzero */
2924 Zero(&aldouble, 1, long double);
92d41999
JH
2925 while (len-- > 0) {
2926 fromstr = NEXTFROM;
cd07c537
DM
2927# ifdef __GNUC__
2928 /* to work round a gcc/x86 bug; don't use SvNV */
2929 aldouble.ld = (long double)sv_2nv(fromstr);
2930# else
275663fa 2931 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2932# endif
1109a392 2933 DO_BO_PACK_N(aldouble, long double);
275663fa 2934 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2935 }
2936 break;
fc241834 2937 }
92d41999 2938#endif
7212898e 2939#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2940 case 'n' | TYPE_IS_SHRIEKING:
7212898e 2941#endif
a6ec74c1
JH
2942 case 'n':
2943 while (len-- > 0) {
f337b084 2944 I16 ai16;
a6ec74c1 2945 fromstr = NEXTFROM;
ef108786 2946 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2947#ifdef HAS_HTONS
ef108786 2948 ai16 = PerlSock_htons(ai16);
a6ec74c1 2949#endif
f337b084 2950 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2951 }
2952 break;
7212898e 2953#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2954 case 'v' | TYPE_IS_SHRIEKING:
7212898e 2955#endif
a6ec74c1
JH
2956 case 'v':
2957 while (len-- > 0) {
f337b084 2958 I16 ai16;
a6ec74c1 2959 fromstr = NEXTFROM;
ef108786 2960 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2961#ifdef HAS_HTOVS
ef108786 2962 ai16 = htovs(ai16);
a6ec74c1 2963#endif
f337b084 2964 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2965 }
2966 break;
49704364 2967 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2968#if SHORTSIZE != SIZE16
fc241834 2969 while (len-- > 0) {
f337b084 2970 unsigned short aushort;
fc241834
RGS
2971 fromstr = NEXTFROM;
2972 aushort = SvUV(fromstr);
2973 DO_BO_PACK(aushort, s);
f337b084 2974 PUSH_VAR(utf8, cur, aushort);
fc241834 2975 }
49704364
WL
2976 break;
2977#else
2978 /* Fall through! */
a6ec74c1 2979#endif
49704364 2980 case 'S':
fc241834 2981 while (len-- > 0) {
f337b084 2982 U16 au16;
fc241834
RGS
2983 fromstr = NEXTFROM;
2984 au16 = (U16)SvUV(fromstr);
2985 DO_BO_PACK(au16, 16);
f337b084 2986 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2987 }
2988 break;
49704364 2989 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2990#if SHORTSIZE != SIZE16
fc241834 2991 while (len-- > 0) {
f337b084 2992 short ashort;
fc241834
RGS
2993 fromstr = NEXTFROM;
2994 ashort = SvIV(fromstr);
2995 DO_BO_PACK(ashort, s);
f337b084 2996 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2997 }
49704364
WL
2998 break;
2999#else
3000 /* Fall through! */
a6ec74c1 3001#endif
49704364
WL
3002 case 's':
3003 while (len-- > 0) {
f337b084 3004 I16 ai16;
49704364 3005 fromstr = NEXTFROM;
ef108786
MHM
3006 ai16 = (I16)SvIV(fromstr);
3007 DO_BO_PACK(ai16, 16);
f337b084 3008 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3009 }
3010 break;
3011 case 'I':
49704364 3012 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3013 while (len-- > 0) {
f337b084 3014 unsigned int auint;
a6ec74c1
JH
3015 fromstr = NEXTFROM;
3016 auint = SvUV(fromstr);
1109a392 3017 DO_BO_PACK(auint, i);
f337b084 3018 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3019 }
3020 break;
92d41999
JH
3021 case 'j':
3022 while (len-- > 0) {
f337b084 3023 IV aiv;
92d41999
JH
3024 fromstr = NEXTFROM;
3025 aiv = SvIV(fromstr);
1109a392
MHM
3026#if IVSIZE == INTSIZE
3027 DO_BO_PACK(aiv, i);
3028#elif IVSIZE == LONGSIZE
3029 DO_BO_PACK(aiv, l);
3030#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3031 DO_BO_PACK(aiv, 64);
f337b084
TH
3032#else
3033 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3034#endif
f337b084 3035 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3036 }
3037 break;
3038 case 'J':
3039 while (len-- > 0) {
f337b084 3040 UV auv;
92d41999
JH
3041 fromstr = NEXTFROM;
3042 auv = SvUV(fromstr);
1109a392
MHM
3043#if UVSIZE == INTSIZE
3044 DO_BO_PACK(auv, i);
3045#elif UVSIZE == LONGSIZE
3046 DO_BO_PACK(auv, l);
3047#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3048 DO_BO_PACK(auv, 64);
f337b084
TH
3049#else
3050 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3051#endif
f337b084 3052 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3053 }
3054 break;
a6ec74c1
JH
3055 case 'w':
3056 while (len-- > 0) {
f337b084 3057 NV anv;
a6ec74c1 3058 fromstr = NEXTFROM;
15e9f109 3059 anv = SvNV(fromstr);
a6ec74c1 3060
f337b084
TH
3061 if (anv < 0) {
3062 *cur = '\0';
b162af07 3063 SvCUR_set(cat, cur - start);
49704364 3064 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3065 }
a6ec74c1 3066
196b62db
NC
3067 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3068 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3069 any negative IVs will have already been got by the croak()
3070 above. IOK is untrue for fractions, so we test them
3071 against UV_MAX_P1. */
f337b084
TH
3072 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3073 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3074 char *in = buf + sizeof(buf);
196b62db 3075 UV auv = SvUV(fromstr);
a6ec74c1
JH
3076
3077 do {
eb160463 3078 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3079 auv >>= 7;
3080 } while (auv);
3081 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3082 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3083 in, (buf + sizeof(buf)) - in);
3084 } else if (SvPOKp(fromstr))
3085 goto w_string;
a6ec74c1 3086 else if (SvNOKp(fromstr)) {
0258719b 3087 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3088 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3089 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3090 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3091 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3092 Some C compilers are strict about integral constant
3093 expressions so we conservatively divide by a slightly
3094 smaller integer instead of multiplying by the exact
3095 floating-point value.
0258719b
NC
3096 */
3097#ifdef NV_MAX_10_EXP
f337b084 3098 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3099 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3100#else
f337b084 3101 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3102 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3103#endif
a6ec74c1
JH
3104 char *in = buf + sizeof(buf);
3105
8b6e33c7 3106 anv = Perl_floor(anv);
a6ec74c1 3107 do {
8b6e33c7 3108 const NV next = Perl_floor(anv / 128);
a6ec74c1 3109 if (in <= buf) /* this cannot happen ;-) */
49704364 3110 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3111 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3112 anv = next;
3113 } while (anv > 0);
a6ec74c1 3114 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3115 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3116 in, (buf + sizeof(buf)) - in);
3117 } else {
8b6e33c7
AL
3118 const char *from;
3119 char *result, *in;
735b914b
JH
3120 SV *norm;
3121 STRLEN len;
3122 bool done;
3123
f337b084 3124 w_string:
735b914b 3125 /* Copy string and check for compliance */
349d4f2f 3126 from = SvPV_const(fromstr, len);
735b914b 3127 if ((norm = is_an_int(from, len)) == NULL)
49704364 3128 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3129
a02a5408 3130 Newx(result, len, char);
735b914b
JH
3131 in = result + len;
3132 done = FALSE;
f337b084 3133 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3134 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3135 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3136 in, (result + len) - in);
735b914b
JH
3137 Safefree(result);
3138 SvREFCNT_dec(norm); /* free norm */
fc241834 3139 }
a6ec74c1
JH
3140 }
3141 break;
3142 case 'i':
49704364 3143 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3144 while (len-- > 0) {
f337b084 3145 int aint;
a6ec74c1
JH
3146 fromstr = NEXTFROM;
3147 aint = SvIV(fromstr);
1109a392 3148 DO_BO_PACK(aint, i);
f337b084 3149 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3150 }
3151 break;
7212898e 3152#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3153 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3154#endif
a6ec74c1
JH
3155 case 'N':
3156 while (len-- > 0) {
f337b084 3157 U32 au32;
a6ec74c1 3158 fromstr = NEXTFROM;
ef108786 3159 au32 = SvUV(fromstr);
a6ec74c1 3160#ifdef HAS_HTONL
ef108786 3161 au32 = PerlSock_htonl(au32);
a6ec74c1 3162#endif
f337b084 3163 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3164 }
3165 break;
7212898e 3166#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3167 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3168#endif
a6ec74c1
JH
3169 case 'V':
3170 while (len-- > 0) {
f337b084 3171 U32 au32;
a6ec74c1 3172 fromstr = NEXTFROM;
ef108786 3173 au32 = SvUV(fromstr);
a6ec74c1 3174#ifdef HAS_HTOVL
ef108786 3175 au32 = htovl(au32);
a6ec74c1 3176#endif
f337b084 3177 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3178 }
3179 break;
49704364 3180 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3181#if LONGSIZE != SIZE32
fc241834 3182 while (len-- > 0) {
f337b084 3183 unsigned long aulong;
fc241834
RGS
3184 fromstr = NEXTFROM;
3185 aulong = SvUV(fromstr);
3186 DO_BO_PACK(aulong, l);
f337b084 3187 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3188 }
49704364
WL
3189 break;
3190#else
3191 /* Fall though! */
a6ec74c1 3192#endif
49704364 3193 case 'L':
fc241834 3194 while (len-- > 0) {
f337b084 3195 U32 au32;
fc241834
RGS
3196 fromstr = NEXTFROM;
3197 au32 = SvUV(fromstr);
3198 DO_BO_PACK(au32, 32);
f337b084 3199 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3200 }
3201 break;
49704364 3202 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3203#if LONGSIZE != SIZE32
fc241834 3204 while (len-- > 0) {
f337b084 3205 long along;
fc241834
RGS
3206 fromstr = NEXTFROM;
3207 along = SvIV(fromstr);
3208 DO_BO_PACK(along, l);
f337b084 3209 PUSH_VAR(utf8, cur, along);
a6ec74c1 3210 }
49704364
WL
3211 break;
3212#else
3213 /* Fall though! */
a6ec74c1 3214#endif
49704364
WL
3215 case 'l':
3216 while (len-- > 0) {
f337b084 3217 I32 ai32;
49704364 3218 fromstr = NEXTFROM;
ef108786
MHM
3219 ai32 = SvIV(fromstr);
3220 DO_BO_PACK(ai32, 32);
f337b084 3221 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3222 }
3223 break;
3224#ifdef HAS_QUAD
3225 case 'Q':
3226 while (len-- > 0) {
f337b084 3227 Uquad_t auquad;
a6ec74c1 3228 fromstr = NEXTFROM;
f337b084 3229 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3230 DO_BO_PACK(auquad, 64);
f337b084 3231 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3232 }
3233 break;
3234 case 'q':
3235 while (len-- > 0) {
f337b084 3236 Quad_t aquad;
a6ec74c1
JH
3237 fromstr = NEXTFROM;
3238 aquad = (Quad_t)SvIV(fromstr);
1109a392 3239 DO_BO_PACK(aquad, 64);
f337b084 3240 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3241 }
3242 break;
f337b084 3243#endif /* HAS_QUAD */
a6ec74c1
JH
3244 case 'P':
3245 len = 1; /* assume SV is correct length */
f337b084 3246 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3247 /* Fall through! */
a6ec74c1
JH
3248 case 'p':
3249 while (len-- > 0) {
83003860 3250 const char *aptr;
f337b084 3251
a6ec74c1 3252 fromstr = NEXTFROM;
28a4f200
TH
3253 SvGETMAGIC(fromstr);
3254 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3255 else {
a6ec74c1
JH
3256 /* XXX better yet, could spirit away the string to
3257 * a safe spot and hang on to it until the result
3258 * of pack() (and all copies of the result) are
3259 * gone.
3260 */
041457d9 3261 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3262 !SvREADONLY(fromstr)))) {
3263 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3264 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3265 }
3266 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3267 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3268 else
2596d9fe 3269 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3270 }
07409e01 3271 DO_BO_PACK_PC(aptr);
f337b084 3272 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3273 }
3274 break;
fc241834 3275 case 'u': {
f7fe979e 3276 const char *aptr, *aend;
fc241834 3277 bool from_utf8;
f337b084 3278
a6ec74c1 3279 fromstr = NEXTFROM;
fc241834
RGS
3280 if (len <= 2) len = 45;
3281 else len = len / 3 * 3;
3282 if (len >= 64) {
a2a5de95
NC
3283 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3284 "Field too wide in 'u' format in pack");
fc241834
RGS
3285 len = 63;
3286 }
83003860 3287 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3288 from_utf8 = DO_UTF8(fromstr);
3289 if (from_utf8) {
3290 aend = aptr + fromlen;
3f63b0e5 3291 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3292 } else aend = NULL; /* Unused, but keep compilers happy */
3293 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3294 while (fromlen > 0) {
fc241834 3295 U8 *end;
a6ec74c1 3296 I32 todo;
fc241834 3297 U8 hunk[1+63/3*4+1];
a6ec74c1 3298
eb160463 3299 if ((I32)fromlen > len)
a6ec74c1
JH
3300 todo = len;
3301 else
3302 todo = fromlen;
fc241834
RGS
3303 if (from_utf8) {
3304 char buffer[64];
3305 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3306 'u' | TYPE_IS_PACK)) {
3307 *cur = '\0';
b162af07 3308 SvCUR_set(cat, cur - start);
5637ef5b
NC
3309 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3310 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3311 aptr, aend, buffer, (long) todo);
fc241834
RGS
3312 }
3313 end = doencodes(hunk, buffer, todo);
3314 } else {
3315 end = doencodes(hunk, aptr, todo);
3316 aptr += todo;
3317 }
3318 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3319 fromlen -= todo;
3320 }
a6ec74c1
JH
3321 break;
3322 }
f337b084
TH
3323 }
3324 *cur = '\0';
b162af07 3325 SvCUR_set(cat, cur - start);
f337b084 3326 no_change:
49704364 3327 *symptr = lookahead;
a6ec74c1 3328 }
49704364 3329 return beglist;
18529408
IZ
3330}
3331#undef NEXTFROM
3332
3333
3334PP(pp_pack)
3335{
97aff369 3336 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3337 SV *cat = TARG;
18529408 3338 STRLEN fromlen;
349d4f2f 3339 SV *pat_sv = *++MARK;
eb578fdb
KW
3340 const char *pat = SvPV_const(pat_sv, fromlen);
3341 const char *patend = pat + fromlen;
18529408
IZ
3342
3343 MARK++;
76f68e9b 3344 sv_setpvs(cat, "");
f337b084 3345 SvUTF8_off(cat);
18529408 3346
7accc089 3347 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3348
a6ec74c1
JH
3349 SvSETMAGIC(cat);
3350 SP = ORIGMARK;
3351 PUSHs(cat);
3352 RETURN;
3353}
a6ec74c1 3354
73cb7263
NC
3355/*
3356 * Local variables:
3357 * c-indentation-style: bsd
3358 * c-basic-offset: 4
14d04a33 3359 * indent-tabs-mode: nil
73cb7263
NC
3360 * End:
3361 *
14d04a33 3362 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3363 */