This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fold the implementation of DO_BO_UNPACK into SHIFT_BYTES(), its only user.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
f337b084
TH
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
7212898e 87#endif
3473cf63
RGS
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
7212898e 90
a6ec74c1 91/*
a6ec74c1
JH
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
1109a392
MHM
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
a6ec74c1 119# else
1109a392 120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
08ca2aa3 124 ++++ bad cray byte order
a6ec74c1
JH
125# endif
126# endif
a6ec74c1 127#else
08ca2aa3
TH
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
a6ec74c1
JH
130#endif
131
f337b084
TH
132#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
133#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3 134
a1219b5e
NC
135#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
136# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
137#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
139#else
140# error "Unsupported byteorder"
141 /* Need to add code here to re-instate mixed endian support. */
142#endif
143
08ca2aa3 144/* Only to be used inside a loop (see the break) */
275663fa 145#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
146STMT_START { \
147 if (utf8) { \
f337b084 148 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 149 (char *) (buf), len, datumtype)) break; \
08ca2aa3 150 } else { \
275663fa
TC
151 Copy(s, (char *) (buf), len, char); \
152 s += len; \
08ca2aa3 153 } \
cd7e7512
NC
154 if (needs_swap) { \
155 my_swabn((buf), len); \
156 } \
08ca2aa3
TH
157} STMT_END
158
7285e3f4
NC
159#define SHIFT16(utf8, s, strend, p, datumtype) \
160 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype)
161
162#define SHIFT32(utf8, s, strend, p, datumtype) \
163 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype)
164
275663fa
TC
165#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
166 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
167
f337b084 168#define PUSH_VAR(utf8, aptr, var) \
230e1fce 169 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 170
49704364
LW
171/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
172#define MAX_SUB_TEMPLATE_LEVEL 100
173
66c611c5 174/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
175#define FLAG_WAS_UTF8 0x40
176#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 177#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 178#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
179#define FLAG_SLASH 0x04
180#define FLAG_COMMA 0x02
181#define FLAG_PACK 0x01
182
a6ec74c1
JH
183STATIC SV *
184S_mul128(pTHX_ SV *sv, U8 m)
185{
186 STRLEN len;
187 char *s = SvPV(sv, len);
188 char *t;
a6ec74c1 189
7918f24d
NC
190 PERL_ARGS_ASSERT_MUL128;
191
a6ec74c1 192 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 193 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
194
195 sv_catsv(tmpNew, sv);
196 SvREFCNT_dec(sv); /* free old sv */
197 sv = tmpNew;
198 s = SvPV(sv, len);
199 }
200 t = s + len - 1;
201 while (!*t) /* trailing '\0'? */
202 t--;
203 while (t > s) {
f7fe979e 204 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
205 *(t--) = '0' + (char)(i % 10);
206 m = (char)(i / 10);
a6ec74c1
JH
207 }
208 return (sv);
209}
210
211/* Explosives and implosives. */
212
213#if 'I' == 73 && 'J' == 74
214/* On an ASCII/ISO kind of system */
215#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
216#else
217/*
218 Some other sort of character set - use memchr() so we don't match
219 the null byte.
220 */
221#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
222#endif
223
66c611c5 224/* type modifiers */
62f95557 225#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
226#define TYPE_IS_BIG_ENDIAN 0x200
227#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 228#define TYPE_IS_PACK 0x800
1109a392 229#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 230#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
231#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
232
7212898e
NC
233# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
234# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
235
236# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
237
2b4ad569 238# define DO_BO_PACK(var) \
3cc90e2e 239 STMT_START { \
a1219b5e 240 if (needs_swap) { \
e8fda8c4 241 my_swabn(&var, sizeof(var)); \
3cc90e2e
NC
242 } \
243 } STMT_END
244
78d46eaa 245#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 246#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
247#define PACK_SIZE_MASK 0x3F
248
298bc19c 249#include "packsizetables.c"
78d46eaa 250
08ca2aa3 251STATIC U8
f7fe979e 252uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 253{
08ca2aa3 254 STRLEN retlen;
0bcc34c2 255 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 257 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
258 warnings), but these two mean we make no progress in the string and
259 might enter an infinite loop */
260 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
261 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
262 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 263 if (val >= 0x100) {
a2a5de95
NC
264 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
265 "Character in '%c' format wrapped in unpack",
266 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
267 val &= 0xff;
268 }
269 *s += retlen;
fe2774ed 270 return (U8)val;
08ca2aa3
TH
271}
272
f337b084
TH
273#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
274 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
275 *(U8 *)(s)++)
276
277STATIC bool
f7fe979e 278uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
279{
280 UV val;
281 STRLEN retlen;
f7fe979e 282 const char *from = *s;
08ca2aa3 283 int bad = 0;
f7fe979e 284 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
285 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
286 for (;buf_len > 0; buf_len--) {
287 if (from >= end) return FALSE;
f337b084 288 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
289 if (retlen == (STRLEN) -1 || retlen == 0) {
290 from += UTF8SKIP(from);
291 bad |= 1;
292 } else from += retlen;
293 if (val >= 0x100) {
294 bad |= 2;
295 val &= 0xff;
296 }
fe2774ed 297 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
298 }
299 /* We have enough characters for the buffer. Did we have problems ? */
300 if (bad) {
301 if (bad & 1) {
302 /* Rewalk the string fragment while warning */
f7fe979e 303 const char *ptr;
9e27e96a 304 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
305 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
306 if (ptr >= end) break;
f337b084 307 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
308 }
309 if (from > end) from = end;
310 }
a2a5de95
NC
311 if ((bad & 2))
312 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 313 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
314 "Character(s) in '%c' format wrapped in %s",
315 (int) TYPE_NO_MODIFIERS(datumtype),
316 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
317 }
318 *s = from;
319 return TRUE;
320}
321
322STATIC bool
f7fe979e 323next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 324{
97aff369 325 dVAR;
08ca2aa3 326 STRLEN retlen;
0bcc34c2 327 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
328 if (val >= 0x100 || !ISUUCHAR(val) ||
329 retlen == (STRLEN) -1 || retlen == 0) {
330 *out = 0;
331 return FALSE;
332 }
333 *out = PL_uudmap[val] & 077;
f337b084 334 *s += retlen;
08ca2aa3
TH
335 return TRUE;
336}
78d46eaa 337
64844641 338STATIC char *
14333449 339S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 340 const U8 * const end = start + len;
64844641 341
7918f24d
NC
342 PERL_ARGS_ASSERT_BYTES_TO_UNI;
343
f337b084 344 while (start < end) {
48fa4626
ST
345 const UV uv = NATIVE_TO_ASCII(*start);
346 if (UNI_IS_INVARIANT(uv))
347 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
348 else {
349 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
350 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
351 }
352 start++;
353 }
64844641 354 return dest;
f337b084
TH
355}
356
230e1fce
NC
357#define PUSH_BYTES(utf8, cur, buf, len) \
358STMT_START { \
64844641
AL
359 if (utf8) \
360 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
361 else { \
362 Copy(buf, cur, len, char); \
363 (cur) += (len); \
364 } \
f337b084
TH
365} STMT_END
366
367#define GROWING(utf8, cat, start, cur, in_len) \
368STMT_START { \
369 STRLEN glen = (in_len); \
3473cf63 370 if (utf8) glen *= UTF8_EXPAND; \
f337b084 371 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 372 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
373 (cur) = (start) + SvCUR(cat); \
374 } \
375} STMT_END
376
377#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
378STMT_START { \
f7fe979e 379 const STRLEN glen = (in_len); \
f337b084 380 STRLEN gl = glen; \
3473cf63 381 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
382 if ((cur) + gl >= (start) + SvLEN(cat)) { \
383 *cur = '\0'; \
b162af07 384 SvCUR_set((cat), (cur) - (start)); \
0bd48802 385 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
386 (cur) = (start) + SvCUR(cat); \
387 } \
388 PUSH_BYTES(utf8, cur, buf, glen); \
389} STMT_END
390
391#define PUSH_BYTE(utf8, s, byte) \
392STMT_START { \
393 if (utf8) { \
f7fe979e 394 const U8 au8 = (byte); \
64844641 395 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
396 } else *(U8 *)(s)++ = (byte); \
397} STMT_END
398
399/* Only to be used inside a loop (see the break) */
400#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
401STMT_START { \
402 STRLEN retlen; \
403 if (str >= end) break; \
404 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
405 if (retlen == (STRLEN) -1 || retlen == 0) { \
406 *cur = '\0'; \
407 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
408 } \
409 str += retlen; \
410} STMT_END
411
f7fe979e
AL
412static const char *_action( const tempsym_t* symptr )
413{
10edeb5d 414 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
415}
416
206947d2 417/* Returns the sizeof() struct described by pat */
028d1f6d 418STATIC I32
f337b084 419S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 420{
f337b084 421 I32 total = 0;
206947d2 422
7918f24d
NC
423 PERL_ARGS_ASSERT_MEASURE_STRUCT;
424
49704364 425 while (next_symbol(symptr)) {
f337b084 426 I32 len;
f7fe979e 427 int size;
f337b084
TH
428
429 switch (symptr->howlen) {
fc241834 430 case e_star:
49704364 431 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 432 _action( symptr ) );
49704364 433 break;
f337b084
TH
434 default:
435 /* e_no_len and e_number */
436 len = symptr->length;
437 break;
49704364
LW
438 }
439
a7a3cfaa 440 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 441 if (!size) {
f7fe979e 442 int star;
80a13697
NC
443 /* endianness doesn't influence the size of a type */
444 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
445 default:
446 Perl_croak(aTHX_ "Invalid type '%c' in %s",
447 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 448 _action( symptr ) );
28be1210
TH
449 case '.' | TYPE_IS_SHRIEKING:
450 case '@' | TYPE_IS_SHRIEKING:
80a13697 451 case '@':
28be1210 452 case '.':
80a13697
NC
453 case '/':
454 case 'U': /* XXXX Is it correct? */
455 case 'w':
456 case 'u':
457 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 458 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 459 _action( symptr ) );
80a13697
NC
460 case '%':
461 size = 0;
462 break;
463 case '(':
fc241834
RGS
464 {
465 tempsym_t savsym = *symptr;
466 symptr->patptr = savsym.grpbeg;
467 symptr->patend = savsym.grpend;
468 /* XXXX Theoretically, we need to measure many times at
469 different positions, since the subexpression may contain
470 alignment commands, but be not of aligned length.
471 Need to detect this and croak(). */
472 size = measure_struct(symptr);
473 *symptr = savsym;
474 break;
475 }
80a13697
NC
476 case 'X' | TYPE_IS_SHRIEKING:
477 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
478 */
479 if (!len) /* Avoid division by 0 */
480 len = 1;
481 len = total % len; /* Assumed: the start is aligned. */
482 /* FALL THROUGH */
483 case 'X':
484 size = -1;
485 if (total < len)
f7fe979e 486 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
487 break;
488 case 'x' | TYPE_IS_SHRIEKING:
489 if (!len) /* Avoid division by 0 */
490 len = 1;
491 star = total % len; /* Assumed: the start is aligned. */
492 if (star) /* Other portable ways? */
493 len = len - star;
494 else
495 len = 0;
496 /* FALL THROUGH */
497 case 'x':
498 case 'A':
499 case 'Z':
500 case 'a':
80a13697
NC
501 size = 1;
502 break;
503 case 'B':
504 case 'b':
505 len = (len + 7)/8;
506 size = 1;
507 break;
508 case 'H':
509 case 'h':
510 len = (len + 1)/2;
511 size = 1;
512 break;
78d46eaa 513
80a13697
NC
514 case 'P':
515 len = 1;
516 size = sizeof(char*);
78d46eaa
NC
517 break;
518 }
206947d2
IZ
519 }
520 total += len * size;
521 }
522 return total;
523}
524
49704364
LW
525
526/* locate matching closing parenthesis or bracket
527 * returns char pointer to char after match, or NULL
528 */
f7fe979e 529STATIC const char *
5aaab254 530S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 531{
7918f24d
NC
532 PERL_ARGS_ASSERT_GROUP_END;
533
49704364 534 while (patptr < patend) {
f7fe979e 535 const char c = *patptr++;
49704364
LW
536
537 if (isSPACE(c))
538 continue;
539 else if (c == ender)
540 return patptr-1;
541 else if (c == '#') {
542 while (patptr < patend && *patptr != '\n')
543 patptr++;
544 continue;
545 } else if (c == '(')
546 patptr = group_end(patptr, patend, ')') + 1;
547 else if (c == '[')
548 patptr = group_end(patptr, patend, ']') + 1;
18529408 549 }
49704364
LW
550 Perl_croak(aTHX_ "No group ending character '%c' found in template",
551 ender);
552 return 0;
18529408
IZ
553}
554
49704364
LW
555
556/* Convert unsigned decimal number to binary.
557 * Expects a pointer to the first digit and address of length variable
558 * Advances char pointer to 1st non-digit char and returns number
fc241834 559 */
f7fe979e 560STATIC const char *
5aaab254 561S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
LW
562{
563 I32 len = *patptr++ - '0';
7918f24d
NC
564
565 PERL_ARGS_ASSERT_GET_NUM;
566
49704364
LW
567 while (isDIGIT(*patptr)) {
568 if (len >= 0x7FFFFFFF/10)
569 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
570 len = (len * 10) + (*patptr++ - '0');
571 }
572 *lenptr = len;
573 return patptr;
574}
575
576/* The marvellous template parsing routine: Using state stored in *symptr,
577 * locates next template code and count
578 */
579STATIC bool
f337b084 580S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 581{
f7fe979e 582 const char* patptr = symptr->patptr;
0bcc34c2 583 const char* const patend = symptr->patend;
49704364 584
7918f24d
NC
585 PERL_ARGS_ASSERT_NEXT_SYMBOL;
586
49704364
LW
587 symptr->flags &= ~FLAG_SLASH;
588
589 while (patptr < patend) {
590 if (isSPACE(*patptr))
591 patptr++;
592 else if (*patptr == '#') {
593 patptr++;
594 while (patptr < patend && *patptr != '\n')
595 patptr++;
596 if (patptr < patend)
597 patptr++;
598 } else {
fc241834 599 /* We should have found a template code */
49704364 600 I32 code = *patptr++ & 0xFF;
66c611c5 601 U32 inherited_modifiers = 0;
49704364
LW
602
603 if (code == ','){ /* grandfather in commas but with a warning */
604 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
605 symptr->flags |= FLAG_COMMA;
606 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 607 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
608 }
609 continue;
610 }
fc241834 611
49704364 612 /* for '(', skip to ')' */
fc241834 613 if (code == '(') {
49704364
LW
614 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
615 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 616 _action( symptr ) );
49704364
LW
617 symptr->grpbeg = patptr;
618 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
619 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
620 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 621 _action( symptr ) );
49704364
LW
622 }
623
66c611c5
MHM
624 /* look for group modifiers to inherit */
625 if (TYPE_ENDIANNESS(symptr->flags)) {
626 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
627 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
628 }
629
1109a392
MHM
630 /* look for modifiers */
631 while (patptr < patend) {
b7787f18
AL
632 const char *allowed;
633 I32 modifier;
1109a392
MHM
634 switch (*patptr) {
635 case '!':
636 modifier = TYPE_IS_SHRIEKING;
f8e5a5db 637 allowed = "sSiIlLxXnNvV@.";
1109a392
MHM
638 break;
639 case '>':
640 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 641 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
642 break;
643 case '<':
644 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 645 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
646 break;
647 default:
b7787f18
AL
648 allowed = "";
649 modifier = 0;
1109a392
MHM
650 break;
651 }
66c611c5 652
1109a392
MHM
653 if (modifier == 0)
654 break;
66c611c5 655
1109a392
MHM
656 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
657 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 658 allowed, _action( symptr ) );
66c611c5
MHM
659
660 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 661 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 662 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
663 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
664 TYPE_ENDIANNESS_MASK)
665 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 666 *patptr, _action( symptr ) );
66c611c5 667
a2a5de95
NC
668 if ((code & modifier)) {
669 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
670 "Duplicate modifier '%c' after '%c' in %s",
671 *patptr, (int) TYPE_NO_MODIFIERS(code),
672 _action( symptr ) );
1109a392 673 }
66c611c5 674
1109a392
MHM
675 code |= modifier;
676 patptr++;
49704364
LW
677 }
678
66c611c5
MHM
679 /* inherit modifiers */
680 code |= inherited_modifiers;
681
fc241834 682 /* look for count and/or / */
49704364
LW
683 if (patptr < patend) {
684 if (isDIGIT(*patptr)) {
685 patptr = get_num( patptr, &symptr->length );
686 symptr->howlen = e_number;
687
688 } else if (*patptr == '*') {
689 patptr++;
690 symptr->howlen = e_star;
691
692 } else if (*patptr == '[') {
f7fe979e 693 const char* lenptr = ++patptr;
49704364
LW
694 symptr->howlen = e_number;
695 patptr = group_end( patptr, patend, ']' ) + 1;
696 /* what kind of [] is it? */
697 if (isDIGIT(*lenptr)) {
698 lenptr = get_num( lenptr, &symptr->length );
699 if( *lenptr != ']' )
700 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 701 _action( symptr ) );
49704364
LW
702 } else {
703 tempsym_t savsym = *symptr;
704 symptr->patend = patptr-1;
705 symptr->patptr = lenptr;
706 savsym.length = measure_struct(symptr);
707 *symptr = savsym;
708 }
709 } else {
710 symptr->howlen = e_no_len;
711 symptr->length = 1;
712 }
713
714 /* try to find / */
715 while (patptr < patend) {
716 if (isSPACE(*patptr))
717 patptr++;
718 else if (*patptr == '#') {
719 patptr++;
720 while (patptr < patend && *patptr != '\n')
721 patptr++;
722 if (patptr < patend)
723 patptr++;
724 } else {
66c611c5 725 if (*patptr == '/') {
49704364
LW
726 symptr->flags |= FLAG_SLASH;
727 patptr++;
66c611c5
MHM
728 if (patptr < patend &&
729 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 730 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 731 _action( symptr ) );
49704364
LW
732 }
733 break;
734 }
18529408 735 }
49704364
LW
736 } else {
737 /* at end - no count, no / */
738 symptr->howlen = e_no_len;
739 symptr->length = 1;
740 }
741
742 symptr->code = code;
fc241834 743 symptr->patptr = patptr;
49704364 744 return TRUE;
18529408 745 }
49704364 746 }
fc241834 747 symptr->patptr = patptr;
49704364 748 return FALSE;
18529408
IZ
749}
750
18529408 751/*
fc241834 752 There is no way to cleanly handle the case where we should process the
08ca2aa3 753 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
754 (e.g. estimates like strend-s as an upper bound for the number of
755 characters left wouldn't work). So if we foresee the need of this
756 (pattern starts with U or contains U0), we want to work on the encoded
757 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
758 themselves if they need to do a lot of unpacks like this on it
759*/
fc241834 760STATIC bool
08ca2aa3
TH
761need_utf8(const char *pat, const char *patend)
762{
763 bool first = TRUE;
7918f24d
NC
764
765 PERL_ARGS_ASSERT_NEED_UTF8;
766
08ca2aa3
TH
767 while (pat < patend) {
768 if (pat[0] == '#') {
769 pat++;
f7fe979e 770 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
771 if (!pat) return FALSE;
772 } else if (pat[0] == 'U') {
773 if (first || pat[1] == '0') return TRUE;
774 } else first = FALSE;
775 pat++;
776 }
777 return FALSE;
778}
779
780STATIC char
781first_symbol(const char *pat, const char *patend) {
7918f24d
NC
782 PERL_ARGS_ASSERT_FIRST_SYMBOL;
783
08ca2aa3
TH
784 while (pat < patend) {
785 if (pat[0] != '#') return pat[0];
786 pat++;
f7fe979e 787 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
788 if (!pat) return 0;
789 pat++;
790 }
791 return 0;
792}
793
794/*
7accc089
JH
795=for apidoc unpackstring
796
21ebfc7a
DM
797The engine implementing the unpack() Perl function.
798
799Using the template pat..patend, this function unpacks the string
800s..strend into a number of mortal SVs, which it pushes onto the perl
801argument (@_) stack (so you will need to issue a C<PUTBACK> before and
802C<SPAGAIN> after the call to this function). It returns the number of
803pushed elements.
804
805The strend and patend pointers should point to the byte following the last
806character of each string.
807
808Although this function returns its values on the perl argument stack, it
809doesn't take any parameters from that stack (and thus in particular
810there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
811example).
7accc089
JH
812
813=cut */
814
815I32
f7fe979e 816Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 817{
f7fe979e 818 tempsym_t sym;
08ca2aa3 819
7918f24d
NC
820 PERL_ARGS_ASSERT_UNPACKSTRING;
821
f337b084 822 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
823 else if (need_utf8(pat, patend)) {
824 /* We probably should try to avoid this in case a scalar context call
825 wouldn't get to the "U0" */
826 STRLEN len = strend - s;
230e1fce 827 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
828 SAVEFREEPV(s);
829 strend = s + len;
f337b084 830 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
831 }
832
f337b084
TH
833 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
834 flags |= FLAG_PARSE_UTF8;
08ca2aa3 835
f7fe979e 836 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
837
838 return unpack_rec(&sym, s, s, strend, NULL );
839}
840
4136a0f7 841STATIC I32
f7fe979e 842S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 843{
27da23d5 844 dVAR; dSP;
3297d27d 845 SV *sv = NULL;
f7fe979e 846 const I32 start_sp_offset = SP - PL_stack_base;
49704364 847 howlen_t howlen;
a6ec74c1 848 I32 checksum = 0;
92d41999 849 UV cuv = 0;
a6ec74c1 850 NV cdouble = 0.0;
f337b084 851 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 852 bool beyond = FALSE;
21c16052 853 bool explicit_length;
9e27e96a 854 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 855 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
856
857 PERL_ARGS_ASSERT_UNPACK_REC;
858
28be1210 859 symptr->strbeg = s - strbeg;
49704364 860
49704364 861 while (next_symbol(symptr)) {
a7a3cfaa 862 packprops_t props;
9e27e96a 863 I32 len;
f337b084 864 I32 datumtype = symptr->code;
a1219b5e 865 bool needs_swap;
206947d2 866 /* do first one only unless in list context
08ca2aa3 867 / is implemented by unpacking the count, then popping it from the
206947d2 868 stack, so must check that we're not in the middle of a / */
49704364 869 if ( unpack_only_one
206947d2 870 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 871 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 872 break;
49704364 873
f337b084 874 switch (howlen = symptr->howlen) {
fc241834
RGS
875 case e_star:
876 len = strend - strbeg; /* long enough */
49704364 877 break;
f337b084
TH
878 default:
879 /* e_no_len and e_number */
880 len = symptr->length;
881 break;
49704364 882 }
18529408 883
21c16052 884 explicit_length = TRUE;
a6ec74c1 885 redo_switch:
49704364 886 beyond = s >= strend;
a7a3cfaa
TH
887
888 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
889 if (props) {
890 /* props nonzero means we can process this letter. */
9e27e96a
AL
891 const long size = props & PACK_SIZE_MASK;
892 const long howmany = (strend - s) / size;
a7a3cfaa
TH
893 if (len > howmany)
894 len = howmany;
895
896 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
897 if (len && unpack_only_one) len = 1;
898 EXTEND(SP, len);
899 EXTEND_MORTAL(len);
78d46eaa
NC
900 }
901 }
a7a3cfaa 902
a1219b5e
NC
903 needs_swap = NEEDS_SWAP(datumtype);
904
1109a392 905 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 906 default:
1109a392 907 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 908
a6ec74c1 909 case '%':
49704364 910 if (howlen == e_no_len)
18529408 911 len = 16; /* len is not specified */
a6ec74c1 912 checksum = len;
92d41999 913 cuv = 0;
a6ec74c1 914 cdouble = 0;
18529408 915 continue;
a6ec74c1 916 break;
18529408
IZ
917 case '(':
918 {
49704364 919 tempsym_t savsym = *symptr;
9e27e96a 920 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 921 symptr->flags |= group_modifiers;
49704364 922 symptr->patend = savsym.grpend;
28be1210 923 symptr->previous = &savsym;
49704364 924 symptr->level++;
18529408 925 PUTBACK;
c6f750d1 926 if (len && unpack_only_one) len = 1;
18529408 927 while (len--) {
49704364 928 symptr->patptr = savsym.grpbeg;
f337b084
TH
929 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
930 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
931 unpack_rec(symptr, s, strbeg, strend, &s);
932 if (s == strend && savsym.howlen == e_star)
49704364 933 break; /* No way to continue */
18529408
IZ
934 }
935 SPAGAIN;
28be1210 936 savsym.flags = symptr->flags & ~group_modifiers;
49704364 937 *symptr = savsym;
18529408
IZ
938 break;
939 }
28be1210 940 case '.' | TYPE_IS_SHRIEKING:
28be1210 941 case '.': {
9e27e96a 942 const char *from;
28be1210 943 SV *sv;
9e27e96a 944 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210
TH
945 if (howlen == e_star) from = strbeg;
946 else if (len <= 0) from = s;
947 else {
948 tempsym_t *group = symptr;
949
950 while (--len && group) group = group->previous;
951 from = group ? strbeg + group->strbeg : strbeg;
952 }
953 sv = from <= s ?
00646304
CB
954 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
955 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 956 mXPUSHs(sv);
28be1210
TH
957 break;
958 }
28be1210 959 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 960 case '@':
28be1210 961 s = strbeg + symptr->strbeg;
28be1210 962 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210 963 {
08ca2aa3
TH
964 while (len > 0) {
965 if (s >= strend)
966 Perl_croak(aTHX_ "'@' outside of string in unpack");
967 s += UTF8SKIP(s);
968 len--;
969 }
970 if (s > strend)
971 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
972 } else {
28be1210 973 if (strend-s < len)
fc241834 974 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 975 s += len;
08ca2aa3 976 }
a6ec74c1 977 break;
62f95557
IZ
978 case 'X' | TYPE_IS_SHRIEKING:
979 if (!len) /* Avoid division by 0 */
980 len = 1;
08ca2aa3 981 if (utf8) {
f7fe979e 982 const char *hop, *last;
f337b084
TH
983 I32 l = len;
984 hop = last = strbeg;
985 while (hop < s) {
986 hop += UTF8SKIP(hop);
987 if (--l == 0) {
08ca2aa3 988 last = hop;
f337b084
TH
989 l = len;
990 }
fc241834 991 }
f337b084
TH
992 if (last > s)
993 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
994 s = last;
995 break;
f337b084
TH
996 }
997 len = (s - strbeg) % len;
62f95557 998 /* FALL THROUGH */
a6ec74c1 999 case 'X':
08ca2aa3
TH
1000 if (utf8) {
1001 while (len > 0) {
1002 if (s <= strbeg)
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1004 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1005 if (s <= strbeg)
1006 Perl_croak(aTHX_ "'X' outside of string in unpack");
1007 }
1008 len--;
1009 }
1010 } else {
fc241834
RGS
1011 if (len > s - strbeg)
1012 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1013 s -= len;
08ca2aa3 1014 }
a6ec74c1 1015 break;
9e27e96a
AL
1016 case 'x' | TYPE_IS_SHRIEKING: {
1017 I32 ai32;
62f95557
IZ
1018 if (!len) /* Avoid division by 0 */
1019 len = 1;
230e1fce
NC
1020 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1021 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1022 if (ai32 == 0) break;
1023 len -= ai32;
9e27e96a 1024 }
62f95557 1025 /* FALL THROUGH */
a6ec74c1 1026 case 'x':
08ca2aa3
TH
1027 if (utf8) {
1028 while (len>0) {
1029 if (s >= strend)
1030 Perl_croak(aTHX_ "'x' outside of string in unpack");
1031 s += UTF8SKIP(s);
1032 len--;
1033 }
1034 } else {
fc241834
RGS
1035 if (len > strend - s)
1036 Perl_croak(aTHX_ "'x' outside of string in unpack");
1037 s += len;
f337b084 1038 }
a6ec74c1
JH
1039 break;
1040 case '/':
49704364
LW
1041 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1042 break;
a6ec74c1
JH
1043 case 'A':
1044 case 'Z':
1045 case 'a':
08ca2aa3
TH
1046 if (checksum) {
1047 /* Preliminary length estimate is assumed done in 'W' */
1048 if (len > strend - s) len = strend - s;
1049 goto W_checksum;
1050 }
1051 if (utf8) {
1052 I32 l;
f7fe979e 1053 const char *hop;
08ca2aa3
TH
1054 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1055 if (hop >= strend) {
1056 if (hop > strend)
1057 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1058 break;
fc241834 1059 }
a6ec74c1 1060 }
08ca2aa3
TH
1061 if (hop > strend)
1062 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1063 len = hop - s;
1064 } else if (len > strend - s)
1065 len = strend - s;
1066
1067 if (datumtype == 'Z') {
1068 /* 'Z' strips stuff after first null */
f7fe979e 1069 const char *ptr, *end;
f337b084
TH
1070 end = s + len;
1071 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1072 sv = newSVpvn(s, ptr-s);
1073 if (howlen == e_star) /* exact for 'Z*' */
1074 len = ptr-s + (ptr != strend ? 1 : 0);
1075 } else if (datumtype == 'A') {
1076 /* 'A' strips both nulls and spaces */
f7fe979e 1077 const char *ptr;
18bdf90a
TH
1078 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1079 for (ptr = s+len-1; ptr >= s; ptr--)
1080 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1081 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1082 if (ptr >= s) ptr += UTF8SKIP(ptr);
1083 else ptr++;
28be1210 1084 if (ptr > s+len)
18bdf90a
TH
1085 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1086 } else {
1087 for (ptr = s+len-1; ptr >= s; ptr--)
1088 if (*ptr != 0 && !isSPACE(*ptr)) break;
1089 ptr++;
1090 }
08ca2aa3
TH
1091 sv = newSVpvn(s, ptr-s);
1092 } else sv = newSVpvn(s, len);
1093
1094 if (utf8) {
1095 SvUTF8_on(sv);
1096 /* Undo any upgrade done due to need_utf8() */
f337b084 1097 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1098 sv_utf8_downgrade(sv, 0);
a6ec74c1 1099 }
6e449a3a 1100 mXPUSHs(sv);
08ca2aa3 1101 s += len;
a6ec74c1
JH
1102 break;
1103 case 'B':
08ca2aa3
TH
1104 case 'b': {
1105 char *str;
49704364 1106 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1107 len = (strend - s) * 8;
1108 if (checksum) {
f337b084 1109 if (utf8)
08ca2aa3 1110 while (len >= 8 && s < strend) {
f337b084 1111 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1112 len -= 8;
1113 }
f337b084 1114 else
fc241834 1115 while (len >= 8) {
08ca2aa3 1116 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1117 len -= 8;
1118 }
08ca2aa3
TH
1119 if (len && s < strend) {
1120 U8 bits;
f337b084
TH
1121 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1122 if (datumtype == 'b')
a6ec74c1 1123 while (len-- > 0) {
92d41999 1124 if (bits & 1) cuv++;
a6ec74c1
JH
1125 bits >>= 1;
1126 }
f337b084 1127 else
a6ec74c1 1128 while (len-- > 0) {
08ca2aa3 1129 if (bits & 0x80) cuv++;
a6ec74c1
JH
1130 bits <<= 1;
1131 }
fc241834 1132 }
a6ec74c1
JH
1133 break;
1134 }
08ca2aa3 1135
561b68a9 1136 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1137 SvPOK_on(sv);
1138 str = SvPVX(sv);
1139 if (datumtype == 'b') {
f337b084 1140 U8 bits = 0;
f7fe979e 1141 const I32 ai32 = len;
08ca2aa3
TH
1142 for (len = 0; len < ai32; len++) {
1143 if (len & 7) bits >>= 1;
1144 else if (utf8) {
1145 if (s >= strend) break;
f337b084 1146 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1147 } else bits = *(U8 *) s++;
1148 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1149 }
08ca2aa3 1150 } else {
f337b084 1151 U8 bits = 0;
f7fe979e 1152 const I32 ai32 = len;
08ca2aa3
TH
1153 for (len = 0; len < ai32; len++) {
1154 if (len & 7) bits <<= 1;
1155 else if (utf8) {
1156 if (s >= strend) break;
f337b084 1157 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1158 } else bits = *(U8 *) s++;
1159 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1160 }
1161 }
1162 *str = '\0';
aa07b2f6 1163 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1164 XPUSHs(sv);
a6ec74c1 1165 break;
08ca2aa3 1166 }
a6ec74c1 1167 case 'H':
08ca2aa3 1168 case 'h': {
3297d27d 1169 char *str = NULL;
fc241834 1170 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1171 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1172 len = (strend - s) * 2;
858fe5e1
TC
1173 if (!checksum) {
1174 sv = sv_2mortal(newSV(len ? len : 1));
1175 SvPOK_on(sv);
1176 str = SvPVX(sv);
1177 }
a6ec74c1 1178 if (datumtype == 'h') {
f337b084 1179 U8 bits = 0;
9e27e96a 1180 I32 ai32 = len;
fc241834
RGS
1181 for (len = 0; len < ai32; len++) {
1182 if (len & 1) bits >>= 4;
1183 else if (utf8) {
1184 if (s >= strend) break;
f337b084 1185 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1186 } else bits = * (U8 *) s++;
858fe5e1
TC
1187 if (!checksum)
1188 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1189 }
08ca2aa3 1190 } else {
f337b084 1191 U8 bits = 0;
f7fe979e 1192 const I32 ai32 = len;
08ca2aa3
TH
1193 for (len = 0; len < ai32; len++) {
1194 if (len & 1) bits <<= 4;
1195 else if (utf8) {
1196 if (s >= strend) break;
f337b084 1197 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1198 } else bits = *(U8 *) s++;
858fe5e1
TC
1199 if (!checksum)
1200 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1201 }
1202 }
858fe5e1
TC
1203 if (!checksum) {
1204 *str = '\0';
1205 SvCUR_set(sv, str - SvPVX_const(sv));
1206 XPUSHs(sv);
1207 }
a6ec74c1 1208 break;
08ca2aa3 1209 }
1651fc44
ML
1210 case 'C':
1211 if (len == 0) {
1212 if (explicit_length)
1213 /* Switch to "character" mode */
1214 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1215 break;
1216 }
1217 /* FALL THROUGH */
a6ec74c1 1218 case 'c':
1651fc44
ML
1219 while (len-- > 0 && s < strend) {
1220 int aint;
1221 if (utf8)
1222 {
1223 STRLEN retlen;
1224 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1225 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1226 if (retlen == (STRLEN) -1 || retlen == 0)
1227 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1228 s += retlen;
1229 }
1230 else
1231 aint = *(U8 *)(s)++;
1232 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1233 aint -= 256;
08ca2aa3 1234 if (!checksum)
6e449a3a 1235 mPUSHi(aint);
73cb7263
NC
1236 else if (checksum > bits_in_uv)
1237 cdouble += (NV)aint;
1238 else
1239 cuv += aint;
a6ec74c1
JH
1240 }
1241 break;
08ca2aa3
TH
1242 case 'W':
1243 W_checksum:
1651fc44 1244 if (utf8) {
08ca2aa3 1245 while (len-- > 0 && s < strend) {
08ca2aa3 1246 STRLEN retlen;
f7fe979e 1247 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1248 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1249 if (retlen == (STRLEN) -1 || retlen == 0)
1250 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1251 s += retlen;
1252 if (!checksum)
6e449a3a 1253 mPUSHu(val);
08ca2aa3
TH
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV) val;
d6d3e8bd 1256 else
08ca2aa3 1257 cuv += val;
fc241834 1258 }
08ca2aa3 1259 } else if (!checksum)
a6ec74c1 1260 while (len-- > 0) {
f7fe979e 1261 const U8 ch = *(U8 *) s++;
6e449a3a 1262 mPUSHu(ch);
a6ec74c1 1263 }
08ca2aa3
TH
1264 else if (checksum > bits_in_uv)
1265 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1266 else
1267 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1268 break;
1269 case 'U':
35bcd338 1270 if (len == 0) {
c5333953 1271 if (explicit_length && howlen != e_star) {
08ca2aa3 1272 /* Switch to "bytes in UTF-8" mode */
f337b084 1273 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1274 else
1275 /* Should be impossible due to the need_utf8() test */
1276 Perl_croak(aTHX_ "U0 mode on a byte string");
1277 }
35bcd338
JH
1278 break;
1279 }
08ca2aa3 1280 if (len > strend - s) len = strend - s;
fc241834 1281 if (!checksum) {
08ca2aa3
TH
1282 if (len && unpack_only_one) len = 1;
1283 EXTEND(SP, len);
1284 EXTEND_MORTAL(len);
fc241834 1285 }
08ca2aa3
TH
1286 while (len-- > 0 && s < strend) {
1287 STRLEN retlen;
1288 UV auv;
1289 if (utf8) {
1290 U8 result[UTF8_MAXLEN];
f7fe979e 1291 const char *ptr = s;
08ca2aa3 1292 STRLEN len;
08ca2aa3
TH
1293 /* Bug: warns about bad utf8 even if we are short on bytes
1294 and will break out of the loop */
230e1fce
NC
1295 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1296 'U'))
08ca2aa3
TH
1297 break;
1298 len = UTF8SKIP(result);
fc241834 1299 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1300 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1301 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1302 s = ptr;
1303 } else {
cde84f2a 1304 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1305 if (retlen == (STRLEN) -1 || retlen == 0)
1306 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1307 s += retlen;
1308 }
1309 if (!checksum)
6e449a3a 1310 mPUSHu(auv);
73cb7263 1311 else if (checksum > bits_in_uv)
08ca2aa3 1312 cdouble += (NV) auv;
73cb7263 1313 else
08ca2aa3 1314 cuv += auv;
a6ec74c1
JH
1315 }
1316 break;
49704364
LW
1317 case 's' | TYPE_IS_SHRIEKING:
1318#if SHORTSIZE != SIZE16
73cb7263 1319 while (len-- > 0) {
08ca2aa3 1320 short ashort;
f337b084 1321 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
08ca2aa3 1322 if (!checksum)
6e449a3a 1323 mPUSHi(ashort);
73cb7263
NC
1324 else if (checksum > bits_in_uv)
1325 cdouble += (NV)ashort;
1326 else
1327 cuv += ashort;
49704364
LW
1328 }
1329 break;
1330#else
1331 /* Fallthrough! */
a6ec74c1 1332#endif
49704364 1333 case 's':
73cb7263 1334 while (len-- > 0) {
08ca2aa3
TH
1335 I16 ai16;
1336
1337#if U16SIZE > SIZE16
1338 ai16 = 0;
1339#endif
f337b084 1340 SHIFT16(utf8, s, strend, &ai16, datumtype);
1109a392 1341#if U16SIZE > SIZE16
73cb7263
NC
1342 if (ai16 > 32767)
1343 ai16 -= 65536;
a6ec74c1 1344#endif
08ca2aa3 1345 if (!checksum)
6e449a3a 1346 mPUSHi(ai16);
73cb7263
NC
1347 else if (checksum > bits_in_uv)
1348 cdouble += (NV)ai16;
1349 else
1350 cuv += ai16;
a6ec74c1
JH
1351 }
1352 break;
49704364
LW
1353 case 'S' | TYPE_IS_SHRIEKING:
1354#if SHORTSIZE != SIZE16
73cb7263 1355 while (len-- > 0) {
08ca2aa3 1356 unsigned short aushort;
f337b084 1357 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
08ca2aa3 1358 if (!checksum)
6e449a3a 1359 mPUSHu(aushort);
73cb7263
NC
1360 else if (checksum > bits_in_uv)
1361 cdouble += (NV)aushort;
1362 else
1363 cuv += aushort;
49704364
LW
1364 }
1365 break;
1366#else
486ec47a 1367 /* Fallthrough! */
49704364 1368#endif
a6ec74c1
JH
1369 case 'v':
1370 case 'n':
1371 case 'S':
73cb7263 1372 while (len-- > 0) {
08ca2aa3
TH
1373 U16 au16;
1374#if U16SIZE > SIZE16
1375 au16 = 0;
1376#endif
f337b084 1377 SHIFT16(utf8, s, strend, &au16, datumtype);
73cb7263
NC
1378 if (datumtype == 'n')
1379 au16 = PerlSock_ntohs(au16);
73cb7263
NC
1380 if (datumtype == 'v')
1381 au16 = vtohs(au16);
08ca2aa3 1382 if (!checksum)
6e449a3a 1383 mPUSHu(au16);
73cb7263 1384 else if (checksum > bits_in_uv)
f337b084 1385 cdouble += (NV) au16;
73cb7263
NC
1386 else
1387 cuv += au16;
a6ec74c1
JH
1388 }
1389 break;
068bd2e7
MHM
1390 case 'v' | TYPE_IS_SHRIEKING:
1391 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1392 while (len-- > 0) {
08ca2aa3
TH
1393 I16 ai16;
1394# if U16SIZE > SIZE16
1395 ai16 = 0;
1396# endif
f337b084 1397 SHIFT16(utf8, s, strend, &ai16, datumtype);
e396d235
NC
1398 /* There should never be any byte-swapping here. */
1399 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263 1400 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3 1401 ai16 = (I16) PerlSock_ntohs((U16) ai16);
73cb7263 1402 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3 1403 ai16 = (I16) vtohs((U16) ai16);
08ca2aa3 1404 if (!checksum)
6e449a3a 1405 mPUSHi(ai16);
73cb7263 1406 else if (checksum > bits_in_uv)
08ca2aa3 1407 cdouble += (NV) ai16;
73cb7263
NC
1408 else
1409 cuv += ai16;
068bd2e7
MHM
1410 }
1411 break;
a6ec74c1 1412 case 'i':
49704364 1413 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1414 while (len-- > 0) {
08ca2aa3 1415 int aint;
f337b084 1416 SHIFT_VAR(utf8, s, strend, aint, datumtype);
08ca2aa3 1417 if (!checksum)
6e449a3a 1418 mPUSHi(aint);
73cb7263
NC
1419 else if (checksum > bits_in_uv)
1420 cdouble += (NV)aint;
1421 else
1422 cuv += aint;
a6ec74c1
JH
1423 }
1424 break;
1425 case 'I':
49704364 1426 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1427 while (len-- > 0) {
08ca2aa3 1428 unsigned int auint;
f337b084 1429 SHIFT_VAR(utf8, s, strend, auint, datumtype);
08ca2aa3 1430 if (!checksum)
6e449a3a 1431 mPUSHu(auint);
73cb7263
NC
1432 else if (checksum > bits_in_uv)
1433 cdouble += (NV)auint;
1434 else
1435 cuv += auint;
a6ec74c1
JH
1436 }
1437 break;
92d41999 1438 case 'j':
73cb7263 1439 while (len-- > 0) {
08ca2aa3 1440 IV aiv;
f337b084 1441 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
08ca2aa3 1442 if (!checksum)
6e449a3a 1443 mPUSHi(aiv);
73cb7263
NC
1444 else if (checksum > bits_in_uv)
1445 cdouble += (NV)aiv;
1446 else
1447 cuv += aiv;
92d41999
JH
1448 }
1449 break;
1450 case 'J':
73cb7263 1451 while (len-- > 0) {
08ca2aa3 1452 UV auv;
f337b084 1453 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1454 if (!checksum)
6e449a3a 1455 mPUSHu(auv);
73cb7263
NC
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)auv;
1458 else
1459 cuv += auv;
92d41999
JH
1460 }
1461 break;
49704364
LW
1462 case 'l' | TYPE_IS_SHRIEKING:
1463#if LONGSIZE != SIZE32
73cb7263 1464 while (len-- > 0) {
08ca2aa3 1465 long along;
f337b084 1466 SHIFT_VAR(utf8, s, strend, along, datumtype);
08ca2aa3 1467 if (!checksum)
6e449a3a 1468 mPUSHi(along);
73cb7263
NC
1469 else if (checksum > bits_in_uv)
1470 cdouble += (NV)along;
1471 else
1472 cuv += along;
49704364
LW
1473 }
1474 break;
1475#else
1476 /* Fallthrough! */
a6ec74c1 1477#endif
49704364 1478 case 'l':
73cb7263 1479 while (len-- > 0) {
08ca2aa3
TH
1480 I32 ai32;
1481#if U32SIZE > SIZE32
1482 ai32 = 0;
1483#endif
f337b084 1484 SHIFT32(utf8, s, strend, &ai32, datumtype);
25a9bd2a 1485#if U32SIZE > SIZE32
08ca2aa3 1486 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1487#endif
08ca2aa3 1488 if (!checksum)
6e449a3a 1489 mPUSHi(ai32);
73cb7263
NC
1490 else if (checksum > bits_in_uv)
1491 cdouble += (NV)ai32;
1492 else
1493 cuv += ai32;
a6ec74c1
JH
1494 }
1495 break;
49704364
LW
1496 case 'L' | TYPE_IS_SHRIEKING:
1497#if LONGSIZE != SIZE32
73cb7263 1498 while (len-- > 0) {
08ca2aa3 1499 unsigned long aulong;
f337b084 1500 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
08ca2aa3 1501 if (!checksum)
6e449a3a 1502 mPUSHu(aulong);
73cb7263
NC
1503 else if (checksum > bits_in_uv)
1504 cdouble += (NV)aulong;
1505 else
1506 cuv += aulong;
49704364
LW
1507 }
1508 break;
1509#else
1510 /* Fall through! */
1511#endif
a6ec74c1
JH
1512 case 'V':
1513 case 'N':
1514 case 'L':
73cb7263 1515 while (len-- > 0) {
08ca2aa3
TH
1516 U32 au32;
1517#if U32SIZE > SIZE32
1518 au32 = 0;
1519#endif
f337b084 1520 SHIFT32(utf8, s, strend, &au32, datumtype);
73cb7263
NC
1521 if (datumtype == 'N')
1522 au32 = PerlSock_ntohl(au32);
73cb7263
NC
1523 if (datumtype == 'V')
1524 au32 = vtohl(au32);
08ca2aa3 1525 if (!checksum)
6e449a3a 1526 mPUSHu(au32);
fc241834
RGS
1527 else if (checksum > bits_in_uv)
1528 cdouble += (NV)au32;
1529 else
1530 cuv += au32;
a6ec74c1
JH
1531 }
1532 break;
068bd2e7
MHM
1533 case 'V' | TYPE_IS_SHRIEKING:
1534 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1535 while (len-- > 0) {
08ca2aa3 1536 I32 ai32;
f8e5a5db 1537#if U32SIZE > SIZE32
08ca2aa3 1538 ai32 = 0;
f8e5a5db 1539#endif
f337b084 1540 SHIFT32(utf8, s, strend, &ai32, datumtype);
e396d235
NC
1541 /* There should never be any byte swapping here. */
1542 assert(!TYPE_ENDIANNESS(datumtype));
73cb7263
NC
1543 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1544 ai32 = (I32)PerlSock_ntohl((U32)ai32);
73cb7263
NC
1545 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1546 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3 1547 if (!checksum)
6e449a3a 1548 mPUSHi(ai32);
73cb7263
NC
1549 else if (checksum > bits_in_uv)
1550 cdouble += (NV)ai32;
1551 else
1552 cuv += ai32;
068bd2e7
MHM
1553 }
1554 break;
a6ec74c1 1555 case 'p':
a6ec74c1 1556 while (len-- > 0) {
f7fe979e 1557 const char *aptr;
f337b084 1558 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
c4c5f44a 1559 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1560 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1561 }
1562 break;
1563 case 'w':
a6ec74c1
JH
1564 {
1565 UV auv = 0;
1566 U32 bytes = 0;
fc241834 1567
08ca2aa3
TH
1568 while (len > 0 && s < strend) {
1569 U8 ch;
f337b084 1570 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1571 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1572 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1573 if (ch < 0x80) {
a6ec74c1 1574 bytes = 0;
6e449a3a 1575 mPUSHu(auv);
a6ec74c1
JH
1576 len--;
1577 auv = 0;
08ca2aa3 1578 continue;
a6ec74c1 1579 }
08ca2aa3 1580 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1581 const char *t;
a6ec74c1 1582
f5992bc4 1583 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1584 while (s < strend) {
f337b084 1585 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1586 sv = mul128(sv, (U8)(ch & 0x7f));
1587 if (!(ch & 0x80)) {
a6ec74c1
JH
1588 bytes = 0;
1589 break;
1590 }
1591 }
10516c54 1592 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1593 while (*t == '0')
1594 t++;
1595 sv_chop(sv, t);
6e449a3a 1596 mPUSHs(sv);
a6ec74c1
JH
1597 len--;
1598 auv = 0;
1599 }
1600 }
1601 if ((s >= strend) && bytes)
49704364 1602 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1603 }
1604 break;
1605 case 'P':
49704364
LW
1606 if (symptr->howlen == e_star)
1607 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1608 EXTEND(SP, 1);
2d3e0934 1609 if (s + sizeof(char*) <= strend) {
08ca2aa3 1610 char *aptr;
f337b084 1611 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
fc241834 1612 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 1613 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 1614 }
a6ec74c1
JH
1615 break;
1616#ifdef HAS_QUAD
1617 case 'q':
73cb7263 1618 while (len-- > 0) {
08ca2aa3 1619 Quad_t aquad;
f337b084 1620 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
08ca2aa3 1621 if (!checksum)
6e449a3a
MHM
1622 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1623 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
1624 else if (checksum > bits_in_uv)
1625 cdouble += (NV)aquad;
1626 else
1627 cuv += aquad;
1628 }
a6ec74c1
JH
1629 break;
1630 case 'Q':
73cb7263 1631 while (len-- > 0) {
08ca2aa3 1632 Uquad_t auquad;
f337b084 1633 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
08ca2aa3 1634 if (!checksum)
6e449a3a
MHM
1635 mPUSHs(auquad <= UV_MAX ?
1636 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
1637 else if (checksum > bits_in_uv)
1638 cdouble += (NV)auquad;
1639 else
1640 cuv += auquad;
a6ec74c1
JH
1641 }
1642 break;
08ca2aa3 1643#endif /* HAS_QUAD */
a6ec74c1
JH
1644 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1645 case 'f':
73cb7263 1646 while (len-- > 0) {
08ca2aa3 1647 float afloat;
f337b084 1648 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3 1649 if (!checksum)
6e449a3a 1650 mPUSHn(afloat);
08ca2aa3 1651 else
73cb7263 1652 cdouble += afloat;
fc241834 1653 }
a6ec74c1
JH
1654 break;
1655 case 'd':
73cb7263 1656 while (len-- > 0) {
08ca2aa3 1657 double adouble;
f337b084 1658 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3 1659 if (!checksum)
6e449a3a 1660 mPUSHn(adouble);
08ca2aa3 1661 else
73cb7263 1662 cdouble += adouble;
fc241834 1663 }
a6ec74c1 1664 break;
92d41999 1665 case 'F':
73cb7263 1666 while (len-- > 0) {
275663fa
TC
1667 NV_bytes anv;
1668 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
08ca2aa3 1669 if (!checksum)
275663fa 1670 mPUSHn(anv.nv);
08ca2aa3 1671 else
275663fa 1672 cdouble += anv.nv;
fc241834 1673 }
92d41999
JH
1674 break;
1675#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1676 case 'D':
73cb7263 1677 while (len-- > 0) {
275663fa
TC
1678 ld_bytes aldouble;
1679 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
08ca2aa3 1680 if (!checksum)
275663fa 1681 mPUSHn(aldouble.ld);
08ca2aa3 1682 else
275663fa 1683 cdouble += aldouble.ld;
92d41999
JH
1684 }
1685 break;
1686#endif
a6ec74c1 1687 case 'u':
858fe5e1 1688 if (!checksum) {
f7fe979e 1689 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 1690 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
1691 if (l) SvPOK_on(sv);
1692 }
1693 if (utf8) {
1694 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1695 I32 a, b, c, d;
db187877 1696 char hunk[3];
08ca2aa3 1697
08ca2aa3
TH
1698 while (len > 0) {
1699 next_uni_uu(aTHX_ &s, strend, &a);
1700 next_uni_uu(aTHX_ &s, strend, &b);
1701 next_uni_uu(aTHX_ &s, strend, &c);
1702 next_uni_uu(aTHX_ &s, strend, &d);
1703 hunk[0] = (char)((a << 2) | (b >> 4));
1704 hunk[1] = (char)((b << 4) | (c >> 2));
1705 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1706 if (!checksum)
1707 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
1708 len -= 3;
1709 }
1710 if (s < strend) {
f7fe979e
AL
1711 if (*s == '\n') {
1712 s++;
1713 }
08ca2aa3
TH
1714 else {
1715 /* possible checksum byte */
f7fe979e
AL
1716 const char *skip = s+UTF8SKIP(s);
1717 if (skip < strend && *skip == '\n')
1718 s = skip+1;
08ca2aa3
TH
1719 }
1720 }
1721 }
1722 } else {
fc241834
RGS
1723 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1724 I32 a, b, c, d;
db187877 1725 char hunk[3];
a6ec74c1 1726
fc241834
RGS
1727 len = PL_uudmap[*(U8*)s++] & 077;
1728 while (len > 0) {
1729 if (s < strend && ISUUCHAR(*s))
1730 a = PL_uudmap[*(U8*)s++] & 077;
1731 else
1732 a = 0;
1733 if (s < strend && ISUUCHAR(*s))
1734 b = PL_uudmap[*(U8*)s++] & 077;
1735 else
1736 b = 0;
1737 if (s < strend && ISUUCHAR(*s))
1738 c = PL_uudmap[*(U8*)s++] & 077;
1739 else
1740 c = 0;
1741 if (s < strend && ISUUCHAR(*s))
1742 d = PL_uudmap[*(U8*)s++] & 077;
1743 else
1744 d = 0;
1745 hunk[0] = (char)((a << 2) | (b >> 4));
1746 hunk[1] = (char)((b << 4) | (c >> 2));
1747 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
1748 if (!checksum)
1749 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
1750 len -= 3;
1751 }
1752 if (*s == '\n')
1753 s++;
1754 else /* possible checksum byte */
1755 if (s + 1 < strend && s[1] == '\n')
1756 s += 2;
a6ec74c1 1757 }
08ca2aa3 1758 }
858fe5e1
TC
1759 if (!checksum)
1760 XPUSHs(sv);
a6ec74c1
JH
1761 break;
1762 }
49704364 1763
a6ec74c1 1764 if (checksum) {
1109a392 1765 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1766 (checksum > bits_in_uv &&
08ca2aa3
TH
1767 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1768 NV trouble, anv;
a6ec74c1 1769
08ca2aa3 1770 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1771 while (checksum >= 16) {
1772 checksum -= 16;
08ca2aa3 1773 anv *= 65536.0;
a6ec74c1 1774 }
a6ec74c1 1775 while (cdouble < 0.0)
08ca2aa3
TH
1776 cdouble += anv;
1777 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 1778 sv = newSVnv(cdouble);
a6ec74c1
JH
1779 }
1780 else {
fa8ec7c1
NC
1781 if (checksum < bits_in_uv) {
1782 UV mask = ((UV)1 << checksum) - 1;
92d41999 1783 cuv &= mask;
a6ec74c1 1784 }
c4c5f44a 1785 sv = newSVuv(cuv);
a6ec74c1 1786 }
6e449a3a 1787 mXPUSHs(sv);
a6ec74c1
JH
1788 checksum = 0;
1789 }
fc241834 1790
49704364
LW
1791 if (symptr->flags & FLAG_SLASH){
1792 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 1793 break;
49704364
LW
1794 if( next_symbol(symptr) ){
1795 if( symptr->howlen == e_number )
1796 Perl_croak(aTHX_ "Count after length/code in unpack" );
1797 if( beyond ){
1798 /* ...end of char buffer then no decent length available */
1799 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1800 } else {
1801 /* take top of stack (hope it's numeric) */
1802 len = POPi;
1803 if( len < 0 )
1804 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1805 }
1806 } else {
1807 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1808 }
1809 datumtype = symptr->code;
21c16052 1810 explicit_length = FALSE;
49704364
LW
1811 goto redo_switch;
1812 }
a6ec74c1 1813 }
49704364 1814
18529408
IZ
1815 if (new_s)
1816 *new_s = s;
1817 PUTBACK;
1818 return SP - PL_stack_base - start_sp_offset;
1819}
1820
1821PP(pp_unpack)
1822{
97aff369 1823 dVAR;
18529408 1824 dSP;
bab9c0ac 1825 dPOPPOPssrl;
18529408
IZ
1826 I32 gimme = GIMME_V;
1827 STRLEN llen;
1828 STRLEN rlen;
5c144d81
NC
1829 const char *pat = SvPV_const(left, llen);
1830 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
1831 const char *strend = s + rlen;
1832 const char *patend = pat + llen;
08ca2aa3 1833 I32 cnt;
18529408
IZ
1834
1835 PUTBACK;
7accc089 1836 cnt = unpackstring(pat, patend, s, strend,
49704364 1837 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 1838 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 1839
18529408
IZ
1840 SPAGAIN;
1841 if ( !cnt && gimme == G_SCALAR )
1842 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1843 RETURN;
1844}
1845
f337b084 1846STATIC U8 *
f7fe979e 1847doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 1848{
f337b084 1849 *h++ = PL_uuemap[len];
a6ec74c1 1850 while (len > 2) {
f337b084
TH
1851 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1852 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1853 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1854 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
1855 s += 3;
1856 len -= 3;
1857 }
1858 if (len > 0) {
f7fe979e 1859 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
1860 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1861 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1862 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1863 *h++ = PL_uuemap[0];
a6ec74c1 1864 }
f337b084
TH
1865 *h++ = '\n';
1866 return h;
a6ec74c1
JH
1867}
1868
1869STATIC SV *
f7fe979e 1870S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 1871{
8b6e33c7
AL
1872 SV *result = newSVpvn(s, l);
1873 char *const result_c = SvPV_nolen(result); /* convenience */
1874 char *out = result_c;
1875 bool skip = 1;
1876 bool ignore = 0;
a6ec74c1 1877
7918f24d
NC
1878 PERL_ARGS_ASSERT_IS_AN_INT;
1879
a6ec74c1
JH
1880 while (*s) {
1881 switch (*s) {
1882 case ' ':
1883 break;
1884 case '+':
1885 if (!skip) {
1886 SvREFCNT_dec(result);
1887 return (NULL);
1888 }
1889 break;
1890 case '0':
1891 case '1':
1892 case '2':
1893 case '3':
1894 case '4':
1895 case '5':
1896 case '6':
1897 case '7':
1898 case '8':
1899 case '9':
1900 skip = 0;
1901 if (!ignore) {
1902 *(out++) = *s;
1903 }
1904 break;
1905 case '.':
1906 ignore = 1;
1907 break;
1908 default:
1909 SvREFCNT_dec(result);
1910 return (NULL);
1911 }
1912 s++;
1913 }
1914 *(out++) = '\0';
1915 SvCUR_set(result, out - result_c);
1916 return (result);
1917}
1918
1919/* pnum must be '\0' terminated */
1920STATIC int
1921S_div128(pTHX_ SV *pnum, bool *done)
1922{
8b6e33c7
AL
1923 STRLEN len;
1924 char * const s = SvPV(pnum, len);
1925 char *t = s;
1926 int m = 0;
1927
7918f24d
NC
1928 PERL_ARGS_ASSERT_DIV128;
1929
8b6e33c7
AL
1930 *done = 1;
1931 while (*t) {
1932 const int i = m * 10 + (*t - '0');
1933 const int r = (i >> 7); /* r < 10 */
1934 m = i & 0x7F;
1935 if (r) {
1936 *done = 0;
1937 }
1938 *(t++) = '0' + r;
a6ec74c1 1939 }
8b6e33c7
AL
1940 *(t++) = '\0';
1941 SvCUR_set(pnum, (STRLEN) (t - s));
1942 return (m);
a6ec74c1
JH
1943}
1944
18529408 1945/*
7accc089
JH
1946=for apidoc packlist
1947
1948The engine implementing pack() Perl function.
1949
bfce84ec
AL
1950=cut
1951*/
7accc089
JH
1952
1953void
5aaab254 1954Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 1955{
97aff369 1956 dVAR;
aadb217d
JH
1957 tempsym_t sym;
1958
7918f24d
NC
1959 PERL_ARGS_ASSERT_PACKLIST;
1960
f7fe979e 1961 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 1962
f337b084
TH
1963 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1964 Also make sure any UTF8 flag is loaded */
56eb0262 1965 SvPV_force_nolen(cat);
bfce84ec
AL
1966 if (DO_UTF8(cat))
1967 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 1968
49704364
LW
1969 (void)pack_rec( cat, &sym, beglist, endlist );
1970}
1971
f337b084
TH
1972/* like sv_utf8_upgrade, but also repoint the group start markers */
1973STATIC void
1974marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1975 STRLEN len;
1976 tempsym_t *group;
f7fe979e
AL
1977 const char *from_ptr, *from_start, *from_end, **marks, **m;
1978 char *to_start, *to_ptr;
f337b084
TH
1979
1980 if (SvUTF8(sv)) return;
1981
aa07b2f6 1982 from_start = SvPVX_const(sv);
f337b084
TH
1983 from_end = from_start + SvCUR(sv);
1984 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1985 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
1986 if (from_ptr == from_end) {
1987 /* Simple case: no character needs to be changed */
1988 SvUTF8_on(sv);
1989 return;
1990 }
1991
3473cf63 1992 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 1993 Newx(to_start, len, char);
f337b084
TH
1994 Copy(from_start, to_start, from_ptr-from_start, char);
1995 to_ptr = to_start + (from_ptr-from_start);
1996
a02a5408 1997 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
1998 for (group=sym_ptr; group; group = group->previous)
1999 marks[group->level] = from_start + group->strbeg;
2000 marks[sym_ptr->level+1] = from_end+1;
2001 for (m = marks; *m < from_ptr; m++)
2002 *m = to_start + (*m-from_start);
2003
2004 for (;from_ptr < from_end; from_ptr++) {
2005 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2006 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2007 }
2008 *to_ptr = 0;
2009
2010 while (*m == from_ptr) *m++ = to_ptr;
2011 if (m != marks + sym_ptr->level+1) {
2012 Safefree(marks);
2013 Safefree(to_start);
5637ef5b
NC
2014 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2015 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2016 }
2017 for (group=sym_ptr; group; group = group->previous)
2018 group->strbeg = marks[group->level] - to_start;
2019 Safefree(marks);
2020
2021 if (SvOOK(sv)) {
2022 if (SvIVX(sv)) {
b162af07 2023 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2024 from_start -= SvIVX(sv);
2025 SvIV_set(sv, 0);
2026 }
2027 SvFLAGS(sv) &= ~SVf_OOK;
2028 }
2029 if (SvLEN(sv) != 0)
2030 Safefree(from_start);
f880fe2f 2031 SvPV_set(sv, to_start);
b162af07
SP
2032 SvCUR_set(sv, to_ptr - to_start);
2033 SvLEN_set(sv, len);
f337b084
TH
2034 SvUTF8_on(sv);
2035}
2036
2037/* Exponential string grower. Makes string extension effectively O(n)
2038 needed says how many extra bytes we need (not counting the final '\0')
2039 Only grows the string if there is an actual lack of space
2040*/
2041STATIC char *
0bd48802 2042S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2043 const STRLEN cur = SvCUR(sv);
2044 const STRLEN len = SvLEN(sv);
f337b084 2045 STRLEN extend;
7918f24d
NC
2046
2047 PERL_ARGS_ASSERT_SV_EXP_GROW;
2048
f337b084
TH
2049 if (len - cur > needed) return SvPVX(sv);
2050 extend = needed > len ? needed : len;
2051 return SvGROW(sv, len+extend+1);
2052}
49704364
LW
2053
2054STATIC
2055SV **
f337b084 2056S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2057{
97aff369 2058 dVAR;
49704364 2059 tempsym_t lookahead;
f337b084
TH
2060 I32 items = endlist - beglist;
2061 bool found = next_symbol(symptr);
2062 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2063 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2064
7918f24d
NC
2065 PERL_ARGS_ASSERT_PACK_REC;
2066
f337b084
TH
2067 if (symptr->level == 0 && found && symptr->code == 'U') {
2068 marked_upgrade(aTHX_ cat, symptr);
2069 symptr->flags |= FLAG_DO_UTF8;
2070 utf8 = 0;
49704364 2071 }
f337b084 2072 symptr->strbeg = SvCUR(cat);
49704364
LW
2073
2074 while (found) {
f337b084
TH
2075 SV *fromstr;
2076 STRLEN fromlen;
2077 I32 len;
a0714e2c 2078 SV *lengthcode = NULL;
49704364 2079 I32 datumtype = symptr->code;
f337b084
TH
2080 howlen_t howlen = symptr->howlen;
2081 char *start = SvPVX(cat);
2082 char *cur = start + SvCUR(cat);
a1219b5e 2083 bool needs_swap;
49704364 2084
f337b084
TH
2085#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2086
2087 switch (howlen) {
fc241834 2088 case e_star:
f337b084
TH
2089 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2090 0 : items;
2091 break;
2092 default:
2093 /* e_no_len and e_number */
2094 len = symptr->length;
49704364
LW
2095 break;
2096 }
2097
f337b084 2098 if (len) {
a7a3cfaa 2099 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2100
a7a3cfaa
TH
2101 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2102 /* We can process this letter. */
2103 STRLEN size = props & PACK_SIZE_MASK;
2104 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2105 }
f337b084
TH
2106 }
2107
49704364
LW
2108 /* Look ahead for next symbol. Do we have code/code? */
2109 lookahead = *symptr;
2110 found = next_symbol(&lookahead);
246f24af
TH
2111 if (symptr->flags & FLAG_SLASH) {
2112 IV count;
f337b084 2113 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2114 if (strchr("aAZ", lookahead.code)) {
2115 if (lookahead.howlen == e_number) count = lookahead.length;
2116 else {
ce399ba6 2117 if (items > 0) {
48a5da33 2118 count = sv_len_utf8(*beglist);
ce399ba6 2119 }
246f24af
TH
2120 else count = 0;
2121 if (lookahead.code == 'Z') count++;
2122 }
2123 } else {
2124 if (lookahead.howlen == e_number && lookahead.length < items)
2125 count = lookahead.length;
2126 else count = items;
2127 }
2128 lookahead.howlen = e_number;
2129 lookahead.length = count;
2130 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2131 }
49704364 2132
a1219b5e
NC
2133 needs_swap = NEEDS_SWAP(datumtype);
2134
fc241834
RGS
2135 /* Code inside the switch must take care to properly update
2136 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2137 doesn't simply leave using break */
1109a392 2138 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2139 default:
f337b084
TH
2140 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2141 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2142 case '%':
49704364 2143 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2144 {
2145 char *from;
28be1210 2146 case '.' | TYPE_IS_SHRIEKING:
28be1210
TH
2147 case '.':
2148 if (howlen == e_star) from = start;
2149 else if (len == 0) from = cur;
2150 else {
2151 tempsym_t *group = symptr;
2152
2153 while (--len && group) group = group->previous;
2154 from = group ? start + group->strbeg : start;
2155 }
2156 fromstr = NEXTFROM;
2157 len = SvIV(fromstr);
2158 goto resize;
28be1210 2159 case '@' | TYPE_IS_SHRIEKING:
a6ec74c1 2160 case '@':
28be1210
TH
2161 from = start + symptr->strbeg;
2162 resize:
28be1210 2163 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
28be1210
TH
2164 if (len >= 0) {
2165 while (len && from < cur) {
2166 from += UTF8SKIP(from);
2167 len--;
2168 }
2169 if (from > cur)
2170 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2171 if (len) {
2172 /* Here we know from == cur */
2173 grow:
2174 GROWING(0, cat, start, cur, len);
2175 Zero(cur, len, char);
2176 cur += len;
2177 } else if (from < cur) {
2178 len = cur - from;
2179 goto shrink;
2180 } else goto no_change;
2181 } else {
2182 cur = from;
2183 len = -len;
2184 goto utf8_shrink;
f337b084 2185 }
28be1210
TH
2186 else {
2187 len -= cur - from;
f337b084 2188 if (len > 0) goto grow;
28be1210 2189 if (len == 0) goto no_change;
fc241834 2190 len = -len;
28be1210 2191 goto shrink;
f337b084 2192 }
a6ec74c1 2193 break;
28be1210 2194 }
fc241834 2195 case '(': {
49704364 2196 tempsym_t savsym = *symptr;
66c611c5
MHM
2197 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2198 symptr->flags |= group_modifiers;
49704364
LW
2199 symptr->patend = savsym.grpend;
2200 symptr->level++;
f337b084 2201 symptr->previous = &lookahead;
18529408 2202 while (len--) {
f337b084
TH
2203 U32 was_utf8;
2204 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2205 else symptr->flags &= ~FLAG_PARSE_UTF8;
2206 was_utf8 = SvUTF8(cat);
49704364 2207 symptr->patptr = savsym.grpbeg;
f337b084
TH
2208 beglist = pack_rec(cat, symptr, beglist, endlist);
2209 if (SvUTF8(cat) != was_utf8)
2210 /* This had better be an upgrade while in utf8==0 mode */
2211 utf8 = 1;
2212
49704364 2213 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2214 break; /* No way to continue */
2215 }
ee790063 2216 items = endlist - beglist;
f337b084
TH
2217 lookahead.flags = symptr->flags & ~group_modifiers;
2218 goto no_change;
18529408 2219 }
62f95557
IZ
2220 case 'X' | TYPE_IS_SHRIEKING:
2221 if (!len) /* Avoid division by 0 */
2222 len = 1;
f337b084
TH
2223 if (utf8) {
2224 char *hop, *last;
2225 I32 l = len;
2226 hop = last = start;
2227 while (hop < cur) {
2228 hop += UTF8SKIP(hop);
2229 if (--l == 0) {
2230 last = hop;
2231 l = len;
2232 }
2233 }
2234 if (last > cur)
2235 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2236 cur = last;
2237 break;
2238 }
2239 len = (cur-start) % len;
62f95557 2240 /* FALL THROUGH */
a6ec74c1 2241 case 'X':
f337b084
TH
2242 if (utf8) {
2243 if (len < 1) goto no_change;
28be1210 2244 utf8_shrink:
f337b084
TH
2245 while (len > 0) {
2246 if (cur <= start)
28be1210
TH
2247 Perl_croak(aTHX_ "'%c' outside of string in pack",
2248 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2249 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2250 if (cur <= start)
28be1210
TH
2251 Perl_croak(aTHX_ "'%c' outside of string in pack",
2252 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2253 }
2254 len--;
2255 }
2256 } else {
fc241834 2257 shrink:
f337b084 2258 if (cur - start < len)
28be1210
TH
2259 Perl_croak(aTHX_ "'%c' outside of string in pack",
2260 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2261 cur -= len;
2262 }
2263 if (cur < start+symptr->strbeg) {
2264 /* Make sure group starts don't point into the void */
2265 tempsym_t *group;
9e27e96a 2266 const STRLEN length = cur-start;
f337b084
TH
2267 for (group = symptr;
2268 group && length < group->strbeg;
2269 group = group->previous) group->strbeg = length;
2270 lookahead.strbeg = length;
2271 }
a6ec74c1 2272 break;
fc241834
RGS
2273 case 'x' | TYPE_IS_SHRIEKING: {
2274 I32 ai32;
62f95557
IZ
2275 if (!len) /* Avoid division by 0 */
2276 len = 1;
230e1fce 2277 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2278 else ai32 = (cur - start) % len;
2279 if (ai32 == 0) goto no_change;
2280 len -= ai32;
2281 }
2282 /* FALL THROUGH */
a6ec74c1 2283 case 'x':
f337b084 2284 goto grow;
a6ec74c1
JH
2285 case 'A':
2286 case 'Z':
f337b084 2287 case 'a': {
f7fe979e 2288 const char *aptr;
f337b084 2289
a6ec74c1 2290 fromstr = NEXTFROM;
e62f0680 2291 aptr = SvPV_const(fromstr, fromlen);
f337b084 2292 if (DO_UTF8(fromstr)) {
f7fe979e 2293 const char *end, *s;
f337b084
TH
2294
2295 if (!utf8 && !SvUTF8(cat)) {
2296 marked_upgrade(aTHX_ cat, symptr);
2297 lookahead.flags |= FLAG_DO_UTF8;
2298 lookahead.strbeg = symptr->strbeg;
2299 utf8 = 1;
2300 start = SvPVX(cat);
2301 cur = start + SvCUR(cat);
2302 }
fc241834 2303 if (howlen == e_star) {
f337b084
TH
2304 if (utf8) goto string_copy;
2305 len = fromlen+1;
2306 }
2307 s = aptr;
2308 end = aptr + fromlen;
2309 fromlen = datumtype == 'Z' ? len-1 : len;
2310 while ((I32) fromlen > 0 && s < end) {
2311 s += UTF8SKIP(s);
2312 fromlen--;
2313 }
2314 if (s > end)
2315 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2316 if (utf8) {
fc241834 2317 len = fromlen;
f337b084
TH
2318 if (datumtype == 'Z') len++;
2319 fromlen = s-aptr;
2320 len += fromlen;
fc241834 2321
f337b084 2322 goto string_copy;
fc241834 2323 }
f337b084
TH
2324 fromlen = len - fromlen;
2325 if (datumtype == 'Z') fromlen--;
2326 if (howlen == e_star) {
2327 len = fromlen;
2328 if (datumtype == 'Z') len++;
fc241834 2329 }
f337b084 2330 GROWING(0, cat, start, cur, len);
fc241834 2331 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2332 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2333 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2334 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2335 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2336 cur += fromlen;
a6ec74c1 2337 len -= fromlen;
f337b084
TH
2338 } else if (utf8) {
2339 if (howlen == e_star) {
2340 len = fromlen;
2341 if (datumtype == 'Z') len++;
a6ec74c1 2342 }
f337b084
TH
2343 if (len <= (I32) fromlen) {
2344 fromlen = len;
2345 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2346 }
fc241834 2347 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2348 upgrade, so:
2349 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2350 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2351 len -= fromlen;
2352 while (fromlen > 0) {
230e1fce 2353 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2354 aptr++;
2355 fromlen--;
fc241834 2356 }
f337b084
TH
2357 } else {
2358 string_copy:
2359 if (howlen == e_star) {
2360 len = fromlen;
2361 if (datumtype == 'Z') len++;
2362 }
2363 if (len <= (I32) fromlen) {
2364 fromlen = len;
2365 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2366 }
f337b084
TH
2367 GROWING(0, cat, start, cur, len);
2368 Copy(aptr, cur, fromlen, char);
2369 cur += fromlen;
2370 len -= fromlen;
a6ec74c1 2371 }
f337b084
TH
2372 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2373 cur += len;
3c4fb04a 2374 SvTAINT(cat);
a6ec74c1 2375 break;
f337b084 2376 }
a6ec74c1 2377 case 'B':
f337b084 2378 case 'b': {
b83604b4 2379 const char *str, *end;
f337b084
TH
2380 I32 l, field_len;
2381 U8 bits;
2382 bool utf8_source;
2383 U32 utf8_flags;
a6ec74c1 2384
fc241834 2385 fromstr = NEXTFROM;
b83604b4 2386 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2387 end = str + fromlen;
2388 if (DO_UTF8(fromstr)) {
2389 utf8_source = TRUE;
041457d9 2390 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2391 } else {
2392 utf8_source = FALSE;
2393 utf8_flags = 0; /* Unused, but keep compilers happy */
2394 }
2395 if (howlen == e_star) len = fromlen;
2396 field_len = (len+7)/8;
2397 GROWING(utf8, cat, start, cur, field_len);
2398 if (len > (I32)fromlen) len = fromlen;
2399 bits = 0;
2400 l = 0;
2401 if (datumtype == 'B')
2402 while (l++ < len) {
2403 if (utf8_source) {
95b63a38 2404 UV val = 0;
f337b084
TH
2405 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2406 bits |= val & 1;
2407 } else bits |= *str++ & 1;
2408 if (l & 7) bits <<= 1;
fc241834 2409 else {
f337b084
TH
2410 PUSH_BYTE(utf8, cur, bits);
2411 bits = 0;
a6ec74c1
JH
2412 }
2413 }
f337b084
TH
2414 else
2415 /* datumtype == 'b' */
2416 while (l++ < len) {
2417 if (utf8_source) {
95b63a38 2418 UV val = 0;
f337b084
TH
2419 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2420 if (val & 1) bits |= 0x80;
2421 } else if (*str++ & 1)
2422 bits |= 0x80;
2423 if (l & 7) bits >>= 1;
fc241834 2424 else {
f337b084
TH
2425 PUSH_BYTE(utf8, cur, bits);
2426 bits = 0;
a6ec74c1
JH
2427 }
2428 }
f337b084
TH
2429 l--;
2430 if (l & 7) {
fc241834 2431 if (datumtype == 'B')
f337b084 2432 bits <<= 7 - (l & 7);
fc241834 2433 else
f337b084
TH
2434 bits >>= 7 - (l & 7);
2435 PUSH_BYTE(utf8, cur, bits);
2436 l += 7;
a6ec74c1 2437 }
f337b084
TH
2438 /* Determine how many chars are left in the requested field */
2439 l /= 8;
2440 if (howlen == e_star) field_len = 0;
2441 else field_len -= l;
2442 Zero(cur, field_len, char);
2443 cur += field_len;
a6ec74c1 2444 break;
f337b084 2445 }
a6ec74c1 2446 case 'H':
f337b084 2447 case 'h': {
10516c54 2448 const char *str, *end;
f337b084
TH
2449 I32 l, field_len;
2450 U8 bits;
2451 bool utf8_source;
2452 U32 utf8_flags;
a6ec74c1 2453
fc241834 2454 fromstr = NEXTFROM;
10516c54 2455 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2456 end = str + fromlen;
2457 if (DO_UTF8(fromstr)) {
2458 utf8_source = TRUE;
041457d9 2459 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2460 } else {
2461 utf8_source = FALSE;
2462 utf8_flags = 0; /* Unused, but keep compilers happy */
2463 }
2464 if (howlen == e_star) len = fromlen;
2465 field_len = (len+1)/2;
2466 GROWING(utf8, cat, start, cur, field_len);
2467 if (!utf8 && len > (I32)fromlen) len = fromlen;
2468 bits = 0;
2469 l = 0;
2470 if (datumtype == 'H')
2471 while (l++ < len) {
2472 if (utf8_source) {
95b63a38 2473 UV val = 0;
f337b084
TH
2474 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2475 if (val < 256 && isALPHA(val))
2476 bits |= (val + 9) & 0xf;
a6ec74c1 2477 else
f337b084
TH
2478 bits |= val & 0xf;
2479 } else if (isALPHA(*str))
2480 bits |= (*str++ + 9) & 0xf;
2481 else
2482 bits |= *str++ & 0xf;
2483 if (l & 1) bits <<= 4;
fc241834 2484 else {
f337b084
TH
2485 PUSH_BYTE(utf8, cur, bits);
2486 bits = 0;
a6ec74c1
JH
2487 }
2488 }
f337b084
TH
2489 else
2490 while (l++ < len) {
2491 if (utf8_source) {
95b63a38 2492 UV val = 0;
f337b084
TH
2493 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2494 if (val < 256 && isALPHA(val))
2495 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2496 else
f337b084
TH
2497 bits |= (val & 0xf) << 4;
2498 } else if (isALPHA(*str))
2499 bits |= ((*str++ + 9) & 0xf) << 4;
2500 else
2501 bits |= (*str++ & 0xf) << 4;
2502 if (l & 1) bits >>= 4;
fc241834 2503 else {
f337b084
TH
2504 PUSH_BYTE(utf8, cur, bits);
2505 bits = 0;
a6ec74c1 2506 }
fc241834 2507 }
f337b084
TH
2508 l--;
2509 if (l & 1) {
2510 PUSH_BYTE(utf8, cur, bits);
2511 l++;
2512 }
2513 /* Determine how many chars are left in the requested field */
2514 l /= 2;
2515 if (howlen == e_star) field_len = 0;
2516 else field_len -= l;
2517 Zero(cur, field_len, char);
2518 cur += field_len;
2519 break;
fc241834
RGS
2520 }
2521 case 'c':
f337b084
TH
2522 while (len-- > 0) {
2523 IV aiv;
2524 fromstr = NEXTFROM;
2525 aiv = SvIV(fromstr);
a2a5de95
NC
2526 if ((-128 > aiv || aiv > 127))
2527 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2528 "Character in 'c' format wrapped in pack");
585ec06d 2529 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2530 }
2531 break;
2532 case 'C':
f337b084
TH
2533 if (len == 0) {
2534 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2535 break;
2536 }
a6ec74c1 2537 while (len-- > 0) {
f337b084 2538 IV aiv;
a6ec74c1 2539 fromstr = NEXTFROM;
f337b084 2540 aiv = SvIV(fromstr);
a2a5de95
NC
2541 if ((0 > aiv || aiv > 0xff))
2542 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2543 "Character in 'C' format wrapped in pack");
1651fc44 2544 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2545 }
fc241834
RGS
2546 break;
2547 case 'W': {
2548 char *end;
670f1322 2549 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2550
2551 end = start+SvLEN(cat)-1;
2552 if (utf8) end -= UTF8_MAXLEN-1;
2553 while (len-- > 0) {
2554 UV auv;
2555 fromstr = NEXTFROM;
2556 auv = SvUV(fromstr);
2557 if (in_bytes) auv = auv % 0x100;
2558 if (utf8) {
2559 W_utf8:
2560 if (cur > end) {
2561 *cur = '\0';
b162af07 2562 SvCUR_set(cat, cur - start);
fc241834
RGS
2563
2564 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2565 end = start+SvLEN(cat)-UTF8_MAXLEN;
2566 }
230e1fce
NC
2567 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2568 NATIVE_TO_UNI(auv),
041457d9 2569 warn_utf8 ?
230e1fce 2570 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2571 } else {
2572 if (auv >= 0x100) {
2573 if (!SvUTF8(cat)) {
2574 *cur = '\0';
b162af07 2575 SvCUR_set(cat, cur - start);
fc241834
RGS
2576 marked_upgrade(aTHX_ cat, symptr);
2577 lookahead.flags |= FLAG_DO_UTF8;
2578 lookahead.strbeg = symptr->strbeg;
2579 utf8 = 1;
2580 start = SvPVX(cat);
2581 cur = start + SvCUR(cat);
2582 end = start+SvLEN(cat)-UTF8_MAXLEN;
2583 goto W_utf8;
2584 }
a2a5de95
NC
2585 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2586 "Character in 'W' format wrapped in pack");
fc241834
RGS
2587 auv &= 0xff;
2588 }
2589 if (cur >= end) {
2590 *cur = '\0';
b162af07 2591 SvCUR_set(cat, cur - start);
fc241834
RGS
2592 GROWING(0, cat, start, cur, len+1);
2593 end = start+SvLEN(cat)-1;
2594 }
fe2774ed 2595 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2596 }
2597 }
2598 break;
fc241834
RGS
2599 }
2600 case 'U': {
2601 char *end;
2602
2603 if (len == 0) {
2604 if (!(symptr->flags & FLAG_DO_UTF8)) {
2605 marked_upgrade(aTHX_ cat, symptr);
2606 lookahead.flags |= FLAG_DO_UTF8;
2607 lookahead.strbeg = symptr->strbeg;
2608 }
2609 utf8 = 0;
2610 goto no_change;
2611 }
2612
2613 end = start+SvLEN(cat);
2614 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 2615 while (len-- > 0) {
fc241834 2616 UV auv;
a6ec74c1 2617 fromstr = NEXTFROM;
fc241834
RGS
2618 auv = SvUV(fromstr);
2619 if (utf8) {
230e1fce 2620 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 2621 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 2622 warn_utf8 ?
fc241834
RGS
2623 0 : UNICODE_ALLOW_ANY);
2624 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2625 *cur = '\0';
b162af07 2626 SvCUR_set(cat, cur - start);
fc241834
RGS
2627 GROWING(0, cat, start, cur,
2628 len+(endb-buffer)*UTF8_EXPAND);
2629 end = start+SvLEN(cat);
2630 }
64844641 2631 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
2632 } else {
2633 if (cur >= end) {
2634 *cur = '\0';
b162af07 2635 SvCUR_set(cat, cur - start);
fc241834
RGS
2636 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2637 end = start+SvLEN(cat)-UTF8_MAXLEN;
2638 }
230e1fce 2639 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 2640 warn_utf8 ?
230e1fce 2641 0 : UNICODE_ALLOW_ANY);
fc241834 2642 }
a6ec74c1 2643 }
a6ec74c1 2644 break;
fc241834 2645 }
a6ec74c1
JH
2646 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2647 case 'f':
a6ec74c1 2648 while (len-- > 0) {
f337b084
TH
2649 float afloat;
2650 NV anv;
a6ec74c1 2651 fromstr = NEXTFROM;
f337b084 2652 anv = SvNV(fromstr);
85bba25f 2653# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2654 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2655 * on Alpha; fake it if we don't have them.
2656 */
f337b084 2657 if (anv > FLT_MAX)
fc241834 2658 afloat = FLT_MAX;
f337b084 2659 else if (anv < -FLT_MAX)
fc241834 2660 afloat = -FLT_MAX;
f337b084 2661 else afloat = (float)anv;
baf3cf9c 2662# else
f337b084 2663 afloat = (float)anv;
baf3cf9c 2664# endif
2b4ad569 2665 DO_BO_PACK(afloat);
f337b084 2666 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
2667 }
2668 break;
2669 case 'd':
a6ec74c1 2670 while (len-- > 0) {
f337b084
TH
2671 double adouble;
2672 NV anv;
a6ec74c1 2673 fromstr = NEXTFROM;
f337b084 2674 anv = SvNV(fromstr);
85bba25f 2675# if defined(VMS) && !defined(_IEEE_FP)
f337b084 2676 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
2677 * on Alpha; fake it if we don't have them.
2678 */
f337b084 2679 if (anv > DBL_MAX)
fc241834 2680 adouble = DBL_MAX;
f337b084 2681 else if (anv < -DBL_MAX)
fc241834 2682 adouble = -DBL_MAX;
f337b084 2683 else adouble = (double)anv;
baf3cf9c 2684# else
f337b084 2685 adouble = (double)anv;
baf3cf9c 2686# endif
2b4ad569 2687 DO_BO_PACK(adouble);
f337b084 2688 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
2689 }
2690 break;
fc241834 2691 case 'F': {
275663fa 2692 NV_bytes anv;
1109a392 2693 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2694 while (len-- > 0) {
2695 fromstr = NEXTFROM;
cd07c537
DM
2696#ifdef __GNUC__
2697 /* to work round a gcc/x86 bug; don't use SvNV */
2698 anv.nv = sv_2nv(fromstr);
2699#else
275663fa 2700 anv.nv = SvNV(fromstr);
cd07c537 2701#endif
2b4ad569 2702 DO_BO_PACK(anv);
275663fa 2703 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
2704 }
2705 break;
fc241834 2706 }
92d41999 2707#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 2708 case 'D': {
275663fa 2709 ld_bytes aldouble;
1109a392
MHM
2710 /* long doubles can have unused bits, which may be nonzero */
2711 Zero(&aldouble, 1, long double);
92d41999
JH
2712 while (len-- > 0) {
2713 fromstr = NEXTFROM;
cd07c537
DM
2714# ifdef __GNUC__
2715 /* to work round a gcc/x86 bug; don't use SvNV */
2716 aldouble.ld = (long double)sv_2nv(fromstr);
2717# else
275663fa 2718 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 2719# endif
2b4ad569 2720 DO_BO_PACK(aldouble);
275663fa 2721 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
2722 }
2723 break;
fc241834 2724 }
92d41999 2725#endif
068bd2e7 2726 case 'n' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2727 case 'n':
2728 while (len-- > 0) {
f337b084 2729 I16 ai16;
a6ec74c1 2730 fromstr = NEXTFROM;
ef108786 2731 ai16 = (I16)SvIV(fromstr);
ef108786 2732 ai16 = PerlSock_htons(ai16);
f337b084 2733 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2734 }
2735 break;
068bd2e7 2736 case 'v' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2737 case 'v':
2738 while (len-- > 0) {
f337b084 2739 I16 ai16;
a6ec74c1 2740 fromstr = NEXTFROM;
ef108786 2741 ai16 = (I16)SvIV(fromstr);
ef108786 2742 ai16 = htovs(ai16);
f337b084 2743 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2744 }
2745 break;
49704364 2746 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2747#if SHORTSIZE != SIZE16
fc241834 2748 while (len-- > 0) {
f337b084 2749 unsigned short aushort;
fc241834
RGS
2750 fromstr = NEXTFROM;
2751 aushort = SvUV(fromstr);
2b4ad569 2752 DO_BO_PACK(aushort);
f337b084 2753 PUSH_VAR(utf8, cur, aushort);
fc241834 2754 }
49704364
LW
2755 break;
2756#else
2757 /* Fall through! */
a6ec74c1 2758#endif
49704364 2759 case 'S':
fc241834 2760 while (len-- > 0) {
f337b084 2761 U16 au16;
fc241834
RGS
2762 fromstr = NEXTFROM;
2763 au16 = (U16)SvUV(fromstr);
2b4ad569 2764 DO_BO_PACK(au16);
f337b084 2765 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
2766 }
2767 break;
49704364 2768 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2769#if SHORTSIZE != SIZE16
fc241834 2770 while (len-- > 0) {
f337b084 2771 short ashort;
fc241834
RGS
2772 fromstr = NEXTFROM;
2773 ashort = SvIV(fromstr);
2b4ad569 2774 DO_BO_PACK(ashort);
f337b084 2775 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 2776 }
49704364
LW
2777 break;
2778#else
2779 /* Fall through! */
a6ec74c1 2780#endif
49704364
LW
2781 case 's':
2782 while (len-- > 0) {
f337b084 2783 I16 ai16;
49704364 2784 fromstr = NEXTFROM;
ef108786 2785 ai16 = (I16)SvIV(fromstr);
2b4ad569 2786 DO_BO_PACK(ai16);
f337b084 2787 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
2788 }
2789 break;
2790 case 'I':
49704364 2791 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2792 while (len-- > 0) {
f337b084 2793 unsigned int auint;
a6ec74c1
JH
2794 fromstr = NEXTFROM;
2795 auint = SvUV(fromstr);
2b4ad569 2796 DO_BO_PACK(auint);
f337b084 2797 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
2798 }
2799 break;
92d41999
JH
2800 case 'j':
2801 while (len-- > 0) {
f337b084 2802 IV aiv;
92d41999
JH
2803 fromstr = NEXTFROM;
2804 aiv = SvIV(fromstr);
2b4ad569 2805 DO_BO_PACK(aiv);
f337b084 2806 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
2807 }
2808 break;
2809 case 'J':
2810 while (len-- > 0) {
f337b084 2811 UV auv;
92d41999
JH
2812 fromstr = NEXTFROM;
2813 auv = SvUV(fromstr);
2b4ad569 2814 DO_BO_PACK(auv);
f337b084 2815 PUSH_VAR(utf8, cur, auv);
92d41999
JH
2816 }
2817 break;
a6ec74c1
JH
2818 case 'w':
2819 while (len-- > 0) {
f337b084 2820 NV anv;
a6ec74c1 2821 fromstr = NEXTFROM;
15e9f109 2822 anv = SvNV(fromstr);
a6ec74c1 2823
f337b084
TH
2824 if (anv < 0) {
2825 *cur = '\0';
b162af07 2826 SvCUR_set(cat, cur - start);
49704364 2827 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 2828 }
a6ec74c1 2829
196b62db
NC
2830 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2831 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2832 any negative IVs will have already been got by the croak()
2833 above. IOK is untrue for fractions, so we test them
2834 against UV_MAX_P1. */
f337b084
TH
2835 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2836 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 2837 char *in = buf + sizeof(buf);
196b62db 2838 UV auv = SvUV(fromstr);
a6ec74c1
JH
2839
2840 do {
eb160463 2841 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2842 auv >>= 7;
2843 } while (auv);
2844 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2845 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2846 in, (buf + sizeof(buf)) - in);
2847 } else if (SvPOKp(fromstr))
2848 goto w_string;
a6ec74c1 2849 else if (SvNOKp(fromstr)) {
0258719b 2850 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 2851 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
2852 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2853 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2854 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2855 Some C compilers are strict about integral constant
2856 expressions so we conservatively divide by a slightly
2857 smaller integer instead of multiplying by the exact
2858 floating-point value.
0258719b
NC
2859 */
2860#ifdef NV_MAX_10_EXP
f337b084 2861 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2862 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2863#else
f337b084 2864 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 2865 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2866#endif
a6ec74c1
JH
2867 char *in = buf + sizeof(buf);
2868
8b6e33c7 2869 anv = Perl_floor(anv);
a6ec74c1 2870 do {
8b6e33c7 2871 const NV next = Perl_floor(anv / 128);
a6ec74c1 2872 if (in <= buf) /* this cannot happen ;-) */
49704364 2873 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2874 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2875 anv = next;
2876 } while (anv > 0);
a6ec74c1 2877 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
2878 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2879 in, (buf + sizeof(buf)) - in);
2880 } else {
8b6e33c7
AL
2881 const char *from;
2882 char *result, *in;
735b914b
JH
2883 SV *norm;
2884 STRLEN len;
2885 bool done;
2886
f337b084 2887 w_string:
735b914b 2888 /* Copy string and check for compliance */
349d4f2f 2889 from = SvPV_const(fromstr, len);
735b914b 2890 if ((norm = is_an_int(from, len)) == NULL)
49704364 2891 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2892
a02a5408 2893 Newx(result, len, char);
735b914b
JH
2894 in = result + len;
2895 done = FALSE;
f337b084 2896 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 2897 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
2898 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2899 in, (result + len) - in);
735b914b
JH
2900 Safefree(result);
2901 SvREFCNT_dec(norm); /* free norm */
fc241834 2902 }
a6ec74c1
JH
2903 }
2904 break;
2905 case 'i':
49704364 2906 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2907 while (len-- > 0) {
f337b084 2908 int aint;
a6ec74c1
JH
2909 fromstr = NEXTFROM;
2910 aint = SvIV(fromstr);
2b4ad569 2911 DO_BO_PACK(aint);
f337b084 2912 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
2913 }
2914 break;
068bd2e7 2915 case 'N' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2916 case 'N':
2917 while (len-- > 0) {
f337b084 2918 U32 au32;
a6ec74c1 2919 fromstr = NEXTFROM;
ef108786 2920 au32 = SvUV(fromstr);
ef108786 2921 au32 = PerlSock_htonl(au32);
f337b084 2922 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2923 }
2924 break;
068bd2e7 2925 case 'V' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2926 case 'V':
2927 while (len-- > 0) {
f337b084 2928 U32 au32;
a6ec74c1 2929 fromstr = NEXTFROM;
ef108786 2930 au32 = SvUV(fromstr);
ef108786 2931 au32 = htovl(au32);
f337b084 2932 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2933 }
2934 break;
49704364 2935 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2936#if LONGSIZE != SIZE32
fc241834 2937 while (len-- > 0) {
f337b084 2938 unsigned long aulong;
fc241834
RGS
2939 fromstr = NEXTFROM;
2940 aulong = SvUV(fromstr);
2b4ad569 2941 DO_BO_PACK(aulong);
f337b084 2942 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 2943 }
49704364
LW
2944 break;
2945#else
2946 /* Fall though! */
a6ec74c1 2947#endif
49704364 2948 case 'L':
fc241834 2949 while (len-- > 0) {
f337b084 2950 U32 au32;
fc241834
RGS
2951 fromstr = NEXTFROM;
2952 au32 = SvUV(fromstr);
2b4ad569 2953 DO_BO_PACK(au32);
f337b084 2954 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
2955 }
2956 break;
49704364 2957 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2958#if LONGSIZE != SIZE32
fc241834 2959 while (len-- > 0) {
f337b084 2960 long along;
fc241834
RGS
2961 fromstr = NEXTFROM;
2962 along = SvIV(fromstr);
2b4ad569 2963 DO_BO_PACK(along);
f337b084 2964 PUSH_VAR(utf8, cur, along);
a6ec74c1 2965 }
49704364
LW
2966 break;
2967#else
2968 /* Fall though! */
a6ec74c1 2969#endif
49704364
LW
2970 case 'l':
2971 while (len-- > 0) {
f337b084 2972 I32 ai32;
49704364 2973 fromstr = NEXTFROM;
ef108786 2974 ai32 = SvIV(fromstr);
2b4ad569 2975 DO_BO_PACK(ai32);
f337b084 2976 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
2977 }
2978 break;
2979#ifdef HAS_QUAD
2980 case 'Q':
2981 while (len-- > 0) {
f337b084 2982 Uquad_t auquad;
a6ec74c1 2983 fromstr = NEXTFROM;
f337b084 2984 auquad = (Uquad_t) SvUV(fromstr);
2b4ad569 2985 DO_BO_PACK(auquad);
f337b084 2986 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
2987 }
2988 break;
2989 case 'q':
2990 while (len-- > 0) {
f337b084 2991 Quad_t aquad;
a6ec74c1
JH
2992 fromstr = NEXTFROM;
2993 aquad = (Quad_t)SvIV(fromstr);
2b4ad569 2994 DO_BO_PACK(aquad);
f337b084 2995 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
2996 }
2997 break;
f337b084 2998#endif /* HAS_QUAD */
a6ec74c1
JH
2999 case 'P':
3000 len = 1; /* assume SV is correct length */
f337b084 3001 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3002 /* Fall through! */
a6ec74c1
JH
3003 case 'p':
3004 while (len-- > 0) {
83003860 3005 const char *aptr;
f337b084 3006
a6ec74c1 3007 fromstr = NEXTFROM;
28a4f200 3008 SvGETMAGIC(fromstr);
3009 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3010 else {
a6ec74c1
JH
3011 /* XXX better yet, could spirit away the string to
3012 * a safe spot and hang on to it until the result
3013 * of pack() (and all copies of the result) are
3014 * gone.
3015 */
041457d9 3016 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3017 !SvREADONLY(fromstr)))) {
3018 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3019 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3020 }
3021 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3022 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3023 else
2596d9fe 3024 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3025 }
2b4ad569 3026 DO_BO_PACK(aptr);
f337b084 3027 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3028 }
3029 break;
fc241834 3030 case 'u': {
f7fe979e 3031 const char *aptr, *aend;
fc241834 3032 bool from_utf8;
f337b084 3033
a6ec74c1 3034 fromstr = NEXTFROM;
fc241834
RGS
3035 if (len <= 2) len = 45;
3036 else len = len / 3 * 3;
3037 if (len >= 64) {
a2a5de95
NC
3038 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3039 "Field too wide in 'u' format in pack");
fc241834
RGS
3040 len = 63;
3041 }
83003860 3042 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3043 from_utf8 = DO_UTF8(fromstr);
3044 if (from_utf8) {
3045 aend = aptr + fromlen;
3f63b0e5 3046 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3047 } else aend = NULL; /* Unused, but keep compilers happy */
3048 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3049 while (fromlen > 0) {
fc241834 3050 U8 *end;
a6ec74c1 3051 I32 todo;
fc241834 3052 U8 hunk[1+63/3*4+1];
a6ec74c1 3053
eb160463 3054 if ((I32)fromlen > len)
a6ec74c1
JH
3055 todo = len;
3056 else
3057 todo = fromlen;
fc241834
RGS
3058 if (from_utf8) {
3059 char buffer[64];
3060 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3061 'u' | TYPE_IS_PACK)) {
3062 *cur = '\0';
b162af07 3063 SvCUR_set(cat, cur - start);
5637ef5b
NC
3064 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3065 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3066 aptr, aend, buffer, (long) todo);
fc241834
RGS
3067 }
3068 end = doencodes(hunk, buffer, todo);
3069 } else {
3070 end = doencodes(hunk, aptr, todo);
3071 aptr += todo;
3072 }
3073 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3074 fromlen -= todo;
3075 }
a6ec74c1
JH
3076 break;
3077 }
f337b084
TH
3078 }
3079 *cur = '\0';
b162af07 3080 SvCUR_set(cat, cur - start);
f337b084 3081 no_change:
49704364 3082 *symptr = lookahead;
a6ec74c1 3083 }
49704364 3084 return beglist;
18529408
IZ
3085}
3086#undef NEXTFROM
3087
3088
3089PP(pp_pack)
3090{
97aff369 3091 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3092 SV *cat = TARG;
18529408 3093 STRLEN fromlen;
349d4f2f 3094 SV *pat_sv = *++MARK;
eb578fdb
KW
3095 const char *pat = SvPV_const(pat_sv, fromlen);
3096 const char *patend = pat + fromlen;
18529408
IZ
3097
3098 MARK++;
76f68e9b 3099 sv_setpvs(cat, "");
f337b084 3100 SvUTF8_off(cat);
18529408 3101
7accc089 3102 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3103
a6ec74c1
JH
3104 SvSETMAGIC(cat);
3105 SP = ORIGMARK;
3106 PUSHs(cat);
3107 RETURN;
3108}
a6ec74c1 3109
73cb7263
NC
3110/*
3111 * Local variables:
3112 * c-indentation-style: bsd
3113 * c-basic-offset: 4
14d04a33 3114 * indent-tabs-mode: nil
73cb7263
NC
3115 * End:
3116 *
14d04a33 3117 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3118 */