This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
lib/locale.t: Refactor some tests common to 2 branches
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19 */
20
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
87#endif
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
90
91/*
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
119# else
120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
124 ++++ bad cray byte order
125# endif
126# endif
127#else
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
130#endif
131
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
136
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
148#endif
149
150/* Only to be used inside a loop (see the break) */
151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
152STMT_START { \
153 if (UNLIKELY(utf8)) { \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
156 } else { \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
161 s += len; \
162 } \
163} STMT_END
164
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
167
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
170
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
173
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
176
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
180/* flags (note that type modifiers can also be used as flags!) */
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183#define FLAG_UNPACK_ONLY_ONE 0x10
184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
195
196 PERL_ARGS_ASSERT_MUL128;
197
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
230/* type modifiers */
231#define TYPE_IS_SHRIEKING 0x100
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
234#define TYPE_IS_PACK 0x800
235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
244#define PACK_SIZE_CANNOT_CSUM 0x80
245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246#define PACK_SIZE_MASK 0x3F
247
248#include "packsizetables.c"
249
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
258STATIC U8
259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
260{
261 STRLEN retlen;
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
270 if (val >= 0x100) {
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
274 val &= 0xff;
275 }
276 *s += retlen;
277 return (U8)val;
278}
279
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
282 *(U8 *)(s)++)
283
284STATIC bool
285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
286{
287 UV val;
288 STRLEN retlen;
289 const char *from = *s;
290 int bad = 0;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
295 if (UNLIKELY(needs_swap))
296 buf += buf_len;
297
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
309 if (UNLIKELY(needs_swap))
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
318 const char *ptr;
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
323 }
324 if (from > end) from = end;
325 }
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
339{
340 dVAR;
341 STRLEN retlen;
342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
345 *out = 0;
346 return FALSE;
347 }
348 *out = PL_uudmap[val] & 077;
349 *s += retlen;
350 return TRUE;
351}
352
353STATIC char *
354S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
356
357 if (UNLIKELY(needs_swap)) {
358 const U8 *p = start + len;
359 while (p-- > start) {
360 append_utf8_from_native_byte(*p, (U8 **) & dest);
361 }
362 } else {
363 const U8 * const end = start + len;
364 while (start < end) {
365 append_utf8_from_native_byte(*start, (U8 **) & dest);
366 start++;
367 }
368 }
369 return dest;
370}
371
372#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
373STMT_START { \
374 if (UNLIKELY(utf8)) \
375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
376 else { \
377 if (UNLIKELY(needs_swap)) \
378 S_reverse_copy((char *)(buf), cur, len); \
379 else \
380 Copy(buf, cur, len, char); \
381 (cur) += (len); \
382 } \
383} STMT_END
384
385#define GROWING(utf8, cat, start, cur, in_len) \
386STMT_START { \
387 STRLEN glen = (in_len); \
388 if (utf8) glen *= UTF8_EXPAND; \
389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
390 (start) = sv_exp_grow(cat, glen); \
391 (cur) = (start) + SvCUR(cat); \
392 } \
393} STMT_END
394
395#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
396STMT_START { \
397 const STRLEN glen = (in_len); \
398 STRLEN gl = glen; \
399 if (utf8) gl *= UTF8_EXPAND; \
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
401 *cur = '\0'; \
402 SvCUR_set((cat), (cur) - (start)); \
403 (start) = sv_exp_grow(cat, gl); \
404 (cur) = (start) + SvCUR(cat); \
405 } \
406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
407} STMT_END
408
409#define PUSH_BYTE(utf8, s, byte) \
410STMT_START { \
411 if (utf8) { \
412 const U8 au8 = (byte); \
413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
414 } else *(U8 *)(s)++ = (byte); \
415} STMT_END
416
417/* Only to be used inside a loop (see the break) */
418#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
419STMT_START { \
420 STRLEN retlen; \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
424 *cur = '\0'; \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
426 } \
427 str += retlen; \
428} STMT_END
429
430static const char *_action( const tempsym_t* symptr )
431{
432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
433}
434
435/* Returns the sizeof() struct described by pat */
436STATIC I32
437S_measure_struct(pTHX_ tempsym_t* symptr)
438{
439 I32 total = 0;
440
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
442
443 while (next_symbol(symptr)) {
444 I32 len;
445 int size;
446
447 switch (symptr->howlen) {
448 case e_star:
449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
450 _action( symptr ) );
451
452 default:
453 /* e_no_len and e_number */
454 len = symptr->length;
455 break;
456 }
457
458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
459 if (!size) {
460 int star;
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
463 default:
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
466 _action( symptr ) );
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
469 case '@':
470 case '.':
471 case '/':
472 case 'U': /* XXXX Is it correct? */
473 case 'w':
474 case 'u':
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
476 (int) TYPE_NO_MODIFIERS(symptr->code),
477 _action( symptr ) );
478 case '%':
479 size = 0;
480 break;
481 case '(':
482 {
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
491 *symptr = savsym;
492 break;
493 }
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
496 */
497 if (!len) /* Avoid division by 0 */
498 len = 1;
499 len = total % len; /* Assumed: the start is aligned. */
500 /* FALLTHROUGH */
501 case 'X':
502 size = -1;
503 if (total < len)
504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
505 break;
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
508 len = 1;
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
511 len = len - star;
512 else
513 len = 0;
514 /* FALLTHROUGH */
515 case 'x':
516 case 'A':
517 case 'Z':
518 case 'a':
519 size = 1;
520 break;
521 case 'B':
522 case 'b':
523 len = (len + 7)/8;
524 size = 1;
525 break;
526 case 'H':
527 case 'h':
528 len = (len + 1)/2;
529 size = 1;
530 break;
531
532 case 'P':
533 len = 1;
534 size = sizeof(char*);
535 break;
536 }
537 }
538 total += len * size;
539 }
540 return total;
541}
542
543
544/* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
546 */
547STATIC const char *
548S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
549{
550 PERL_ARGS_ASSERT_GROUP_END;
551
552 while (patptr < patend) {
553 const char c = *patptr++;
554
555 if (isSPACE(c))
556 continue;
557 else if (c == ender)
558 return patptr-1;
559 else if (c == '#') {
560 while (patptr < patend && *patptr != '\n')
561 patptr++;
562 continue;
563 } else if (c == '(')
564 patptr = group_end(patptr, patend, ')') + 1;
565 else if (c == '[')
566 patptr = group_end(patptr, patend, ']') + 1;
567 }
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
569 ender);
570 NOT_REACHED; /* NOTREACHED */
571}
572
573
574/* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
577 */
578STATIC const char *
579S_get_num(pTHX_ const char *patptr, I32 *lenptr )
580{
581 I32 len = *patptr++ - '0';
582
583 PERL_ARGS_ASSERT_GET_NUM;
584
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
589 }
590 *lenptr = len;
591 return patptr;
592}
593
594/* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
596 */
597STATIC bool
598S_next_symbol(pTHX_ tempsym_t* symptr )
599{
600 const char* patptr = symptr->patptr;
601 const char* const patend = symptr->patend;
602
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
604
605 symptr->flags &= ~FLAG_SLASH;
606
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
609 patptr++;
610 else if (*patptr == '#') {
611 patptr++;
612 while (patptr < patend && *patptr != '\n')
613 patptr++;
614 if (patptr < patend)
615 patptr++;
616 } else {
617 /* We should have found a template code */
618 I32 code = *patptr++ & 0xFF;
619 U32 inherited_modifiers = 0;
620
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
625 "Invalid type ',' in %s", _action( symptr ) );
626 }
627 continue;
628 }
629
630 /* for '(', skip to ')' */
631 if (code == '(') {
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
634 _action( symptr ) );
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
639 _action( symptr ) );
640 }
641
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
646 }
647
648 /* look for modifiers */
649 while (patptr < patend) {
650 const char *allowed;
651 I32 modifier;
652 switch (*patptr) {
653 case '!':
654 modifier = TYPE_IS_SHRIEKING;
655 allowed = "sSiIlLxXnNvV@.";
656 break;
657 case '>':
658 modifier = TYPE_IS_BIG_ENDIAN;
659 allowed = ENDIANNESS_ALLOWED_TYPES;
660 break;
661 case '<':
662 modifier = TYPE_IS_LITTLE_ENDIAN;
663 allowed = ENDIANNESS_ALLOWED_TYPES;
664 break;
665 default:
666 allowed = "";
667 modifier = 0;
668 break;
669 }
670
671 if (modifier == 0)
672 break;
673
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
676 allowed, _action( symptr ) );
677
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
684 *patptr, _action( symptr ) );
685
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 _action( symptr ) );
691 }
692
693 code |= modifier;
694 patptr++;
695 }
696
697 /* inherit modifiers */
698 code |= inherited_modifiers;
699
700 /* look for count and/or / */
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
705
706 } else if (*patptr == '*') {
707 patptr++;
708 symptr->howlen = e_star;
709
710 } else if (*patptr == '[') {
711 const char* lenptr = ++patptr;
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
717 if( *lenptr != ']' )
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
719 _action( symptr ) );
720 } else {
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
725 *symptr = savsym;
726 }
727 } else {
728 symptr->howlen = e_no_len;
729 symptr->length = 1;
730 }
731
732 /* try to find / */
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
735 patptr++;
736 else if (*patptr == '#') {
737 patptr++;
738 while (patptr < patend && *patptr != '\n')
739 patptr++;
740 if (patptr < patend)
741 patptr++;
742 } else {
743 if (*patptr == '/') {
744 symptr->flags |= FLAG_SLASH;
745 patptr++;
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
749 _action( symptr ) );
750 }
751 break;
752 }
753 }
754 } else {
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
757 symptr->length = 1;
758 }
759
760 symptr->code = code;
761 symptr->patptr = patptr;
762 return TRUE;
763 }
764 }
765 symptr->patptr = patptr;
766 return FALSE;
767}
768
769/*
770 There is no way to cleanly handle the case where we should process the
771 string per byte in its upgraded form while it's really in downgraded form
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
776 themselves if they need to do a lot of unpacks like this on it
777*/
778STATIC bool
779need_utf8(const char *pat, const char *patend)
780{
781 bool first = TRUE;
782
783 PERL_ARGS_ASSERT_NEED_UTF8;
784
785 while (pat < patend) {
786 if (pat[0] == '#') {
787 pat++;
788 pat = (const char *) memchr(pat, '\n', patend-pat);
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
793 pat++;
794 }
795 return FALSE;
796}
797
798STATIC char
799first_symbol(const char *pat, const char *patend) {
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
804 pat++;
805 pat = (const char *) memchr(pat, '\n', patend-pat);
806 if (!pat) return 0;
807 pat++;
808 }
809 return 0;
810}
811
812/*
813
814=head1 Pack and Unpack
815
816=for apidoc unpackstring
817
818The engine implementing the unpack() Perl function.
819
820Using the template pat..patend, this function unpacks the string
821s..strend into a number of mortal SVs, which it pushes onto the perl
822argument (@_) stack (so you will need to issue a C<PUTBACK> before and
823C<SPAGAIN> after the call to this function). It returns the number of
824pushed elements.
825
826The strend and patend pointers should point to the byte following the last
827character of each string.
828
829Although this function returns its values on the perl argument stack, it
830doesn't take any parameters from that stack (and thus in particular
831there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
832example).
833
834=cut */
835
836I32
837Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
838{
839 tempsym_t sym;
840
841 PERL_ARGS_ASSERT_UNPACKSTRING;
842
843 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
844 else if (need_utf8(pat, patend)) {
845 /* We probably should try to avoid this in case a scalar context call
846 wouldn't get to the "U0" */
847 STRLEN len = strend - s;
848 s = (char *) bytes_to_utf8((U8 *) s, &len);
849 SAVEFREEPV(s);
850 strend = s + len;
851 flags |= FLAG_DO_UTF8;
852 }
853
854 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
855 flags |= FLAG_PARSE_UTF8;
856
857 TEMPSYM_INIT(&sym, pat, patend, flags);
858
859 return unpack_rec(&sym, s, s, strend, NULL );
860}
861
862STATIC I32
863S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
864{
865 dVAR; dSP;
866 SV *sv = NULL;
867 const I32 start_sp_offset = SP - PL_stack_base;
868 howlen_t howlen;
869 I32 checksum = 0;
870 UV cuv = 0;
871 NV cdouble = 0.0;
872 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
873 bool beyond = FALSE;
874 bool explicit_length;
875 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
876 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
877
878 PERL_ARGS_ASSERT_UNPACK_REC;
879
880 symptr->strbeg = s - strbeg;
881
882 while (next_symbol(symptr)) {
883 packprops_t props;
884 I32 len;
885 I32 datumtype = symptr->code;
886 bool needs_swap;
887 /* do first one only unless in list context
888 / is implemented by unpacking the count, then popping it from the
889 stack, so must check that we're not in the middle of a / */
890 if ( unpack_only_one
891 && (SP - PL_stack_base == start_sp_offset + 1)
892 && (datumtype != '/') ) /* XXX can this be omitted */
893 break;
894
895 switch (howlen = symptr->howlen) {
896 case e_star:
897 len = strend - strbeg; /* long enough */
898 break;
899 default:
900 /* e_no_len and e_number */
901 len = symptr->length;
902 break;
903 }
904
905 explicit_length = TRUE;
906 redo_switch:
907 beyond = s >= strend;
908
909 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
910 if (props) {
911 /* props nonzero means we can process this letter. */
912 const long size = props & PACK_SIZE_MASK;
913 const long howmany = (strend - s) / size;
914 if (len > howmany)
915 len = howmany;
916
917 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
918 if (len && unpack_only_one) len = 1;
919 EXTEND(SP, len);
920 EXTEND_MORTAL(len);
921 }
922 }
923
924 needs_swap = NEEDS_SWAP(datumtype);
925
926 switch(TYPE_NO_ENDIANNESS(datumtype)) {
927 default:
928 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
929
930 case '%':
931 if (howlen == e_no_len)
932 len = 16; /* len is not specified */
933 checksum = len;
934 cuv = 0;
935 cdouble = 0;
936 continue;
937
938 case '(':
939 {
940 tempsym_t savsym = *symptr;
941 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
942 symptr->flags |= group_modifiers;
943 symptr->patend = savsym.grpend;
944 symptr->previous = &savsym;
945 symptr->level++;
946 PUTBACK;
947 if (len && unpack_only_one) len = 1;
948 while (len--) {
949 symptr->patptr = savsym.grpbeg;
950 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
951 else symptr->flags &= ~FLAG_PARSE_UTF8;
952 unpack_rec(symptr, s, strbeg, strend, &s);
953 if (s == strend && savsym.howlen == e_star)
954 break; /* No way to continue */
955 }
956 SPAGAIN;
957 savsym.flags = symptr->flags & ~group_modifiers;
958 *symptr = savsym;
959 break;
960 }
961 case '.' | TYPE_IS_SHRIEKING:
962 case '.': {
963 const char *from;
964 SV *sv;
965 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
966 if (howlen == e_star) from = strbeg;
967 else if (len <= 0) from = s;
968 else {
969 tempsym_t *group = symptr;
970
971 while (--len && group) group = group->previous;
972 from = group ? strbeg + group->strbeg : strbeg;
973 }
974 sv = from <= s ?
975 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
976 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
977 mXPUSHs(sv);
978 break;
979 }
980 case '@' | TYPE_IS_SHRIEKING:
981 case '@':
982 s = strbeg + symptr->strbeg;
983 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
984 {
985 while (len > 0) {
986 if (s >= strend)
987 Perl_croak(aTHX_ "'@' outside of string in unpack");
988 s += UTF8SKIP(s);
989 len--;
990 }
991 if (s > strend)
992 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
993 } else {
994 if (strend-s < len)
995 Perl_croak(aTHX_ "'@' outside of string in unpack");
996 s += len;
997 }
998 break;
999 case 'X' | TYPE_IS_SHRIEKING:
1000 if (!len) /* Avoid division by 0 */
1001 len = 1;
1002 if (utf8) {
1003 const char *hop, *last;
1004 I32 l = len;
1005 hop = last = strbeg;
1006 while (hop < s) {
1007 hop += UTF8SKIP(hop);
1008 if (--l == 0) {
1009 last = hop;
1010 l = len;
1011 }
1012 }
1013 if (last > s)
1014 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1015 s = last;
1016 break;
1017 }
1018 len = (s - strbeg) % len;
1019 /* FALLTHROUGH */
1020 case 'X':
1021 if (utf8) {
1022 while (len > 0) {
1023 if (s <= strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1025 while (--s, UTF8_IS_CONTINUATION(*s)) {
1026 if (s <= strbeg)
1027 Perl_croak(aTHX_ "'X' outside of string in unpack");
1028 }
1029 len--;
1030 }
1031 } else {
1032 if (len > s - strbeg)
1033 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1034 s -= len;
1035 }
1036 break;
1037 case 'x' | TYPE_IS_SHRIEKING: {
1038 I32 ai32;
1039 if (!len) /* Avoid division by 0 */
1040 len = 1;
1041 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1042 else ai32 = (s - strbeg) % len;
1043 if (ai32 == 0) break;
1044 len -= ai32;
1045 }
1046 /* FALLTHROUGH */
1047 case 'x':
1048 if (utf8) {
1049 while (len>0) {
1050 if (s >= strend)
1051 Perl_croak(aTHX_ "'x' outside of string in unpack");
1052 s += UTF8SKIP(s);
1053 len--;
1054 }
1055 } else {
1056 if (len > strend - s)
1057 Perl_croak(aTHX_ "'x' outside of string in unpack");
1058 s += len;
1059 }
1060 break;
1061 case '/':
1062 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1063
1064 case 'A':
1065 case 'Z':
1066 case 'a':
1067 if (checksum) {
1068 /* Preliminary length estimate is assumed done in 'W' */
1069 if (len > strend - s) len = strend - s;
1070 goto W_checksum;
1071 }
1072 if (utf8) {
1073 I32 l;
1074 const char *hop;
1075 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1076 if (hop >= strend) {
1077 if (hop > strend)
1078 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1079 break;
1080 }
1081 }
1082 if (hop > strend)
1083 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1084 len = hop - s;
1085 } else if (len > strend - s)
1086 len = strend - s;
1087
1088 if (datumtype == 'Z') {
1089 /* 'Z' strips stuff after first null */
1090 const char *ptr, *end;
1091 end = s + len;
1092 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1093 sv = newSVpvn(s, ptr-s);
1094 if (howlen == e_star) /* exact for 'Z*' */
1095 len = ptr-s + (ptr != strend ? 1 : 0);
1096 } else if (datumtype == 'A') {
1097 /* 'A' strips both nulls and spaces */
1098 const char *ptr;
1099 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1100 for (ptr = s+len-1; ptr >= s; ptr--)
1101 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1102 !isSPACE_utf8(ptr)) break;
1103 if (ptr >= s) ptr += UTF8SKIP(ptr);
1104 else ptr++;
1105 if (ptr > s+len)
1106 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1107 } else {
1108 for (ptr = s+len-1; ptr >= s; ptr--)
1109 if (*ptr != 0 && !isSPACE(*ptr)) break;
1110 ptr++;
1111 }
1112 sv = newSVpvn(s, ptr-s);
1113 } else sv = newSVpvn(s, len);
1114
1115 if (utf8) {
1116 SvUTF8_on(sv);
1117 /* Undo any upgrade done due to need_utf8() */
1118 if (!(symptr->flags & FLAG_WAS_UTF8))
1119 sv_utf8_downgrade(sv, 0);
1120 }
1121 mXPUSHs(sv);
1122 s += len;
1123 break;
1124 case 'B':
1125 case 'b': {
1126 char *str;
1127 if (howlen == e_star || len > (strend - s) * 8)
1128 len = (strend - s) * 8;
1129 if (checksum) {
1130 if (utf8)
1131 while (len >= 8 && s < strend) {
1132 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1133 len -= 8;
1134 }
1135 else
1136 while (len >= 8) {
1137 cuv += PL_bitcount[*(U8 *)s++];
1138 len -= 8;
1139 }
1140 if (len && s < strend) {
1141 U8 bits;
1142 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1143 if (datumtype == 'b')
1144 while (len-- > 0) {
1145 if (bits & 1) cuv++;
1146 bits >>= 1;
1147 }
1148 else
1149 while (len-- > 0) {
1150 if (bits & 0x80) cuv++;
1151 bits <<= 1;
1152 }
1153 }
1154 break;
1155 }
1156
1157 sv = sv_2mortal(newSV(len ? len : 1));
1158 SvPOK_on(sv);
1159 str = SvPVX(sv);
1160 if (datumtype == 'b') {
1161 U8 bits = 0;
1162 const I32 ai32 = len;
1163 for (len = 0; len < ai32; len++) {
1164 if (len & 7) bits >>= 1;
1165 else if (utf8) {
1166 if (s >= strend) break;
1167 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1168 } else bits = *(U8 *) s++;
1169 *str++ = bits & 1 ? '1' : '0';
1170 }
1171 } else {
1172 U8 bits = 0;
1173 const I32 ai32 = len;
1174 for (len = 0; len < ai32; len++) {
1175 if (len & 7) bits <<= 1;
1176 else if (utf8) {
1177 if (s >= strend) break;
1178 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1179 } else bits = *(U8 *) s++;
1180 *str++ = bits & 0x80 ? '1' : '0';
1181 }
1182 }
1183 *str = '\0';
1184 SvCUR_set(sv, str - SvPVX_const(sv));
1185 XPUSHs(sv);
1186 break;
1187 }
1188 case 'H':
1189 case 'h': {
1190 char *str = NULL;
1191 /* Preliminary length estimate, acceptable for utf8 too */
1192 if (howlen == e_star || len > (strend - s) * 2)
1193 len = (strend - s) * 2;
1194 if (!checksum) {
1195 sv = sv_2mortal(newSV(len ? len : 1));
1196 SvPOK_on(sv);
1197 str = SvPVX(sv);
1198 }
1199 if (datumtype == 'h') {
1200 U8 bits = 0;
1201 I32 ai32 = len;
1202 for (len = 0; len < ai32; len++) {
1203 if (len & 1) bits >>= 4;
1204 else if (utf8) {
1205 if (s >= strend) break;
1206 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1207 } else bits = * (U8 *) s++;
1208 if (!checksum)
1209 *str++ = PL_hexdigit[bits & 15];
1210 }
1211 } else {
1212 U8 bits = 0;
1213 const I32 ai32 = len;
1214 for (len = 0; len < ai32; len++) {
1215 if (len & 1) bits <<= 4;
1216 else if (utf8) {
1217 if (s >= strend) break;
1218 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1219 } else bits = *(U8 *) s++;
1220 if (!checksum)
1221 *str++ = PL_hexdigit[(bits >> 4) & 15];
1222 }
1223 }
1224 if (!checksum) {
1225 *str = '\0';
1226 SvCUR_set(sv, str - SvPVX_const(sv));
1227 XPUSHs(sv);
1228 }
1229 break;
1230 }
1231 case 'C':
1232 if (len == 0) {
1233 if (explicit_length)
1234 /* Switch to "character" mode */
1235 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1236 break;
1237 }
1238 /* FALLTHROUGH */
1239 case 'c':
1240 while (len-- > 0 && s < strend) {
1241 int aint;
1242 if (utf8)
1243 {
1244 STRLEN retlen;
1245 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1246 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1247 if (retlen == (STRLEN) -1 || retlen == 0)
1248 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1249 s += retlen;
1250 }
1251 else
1252 aint = *(U8 *)(s)++;
1253 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1254 aint -= 256;
1255 if (!checksum)
1256 mPUSHi(aint);
1257 else if (checksum > bits_in_uv)
1258 cdouble += (NV)aint;
1259 else
1260 cuv += aint;
1261 }
1262 break;
1263 case 'W':
1264 W_checksum:
1265 if (utf8) {
1266 while (len-- > 0 && s < strend) {
1267 STRLEN retlen;
1268 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1269 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1270 if (retlen == (STRLEN) -1 || retlen == 0)
1271 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1272 s += retlen;
1273 if (!checksum)
1274 mPUSHu(val);
1275 else if (checksum > bits_in_uv)
1276 cdouble += (NV) val;
1277 else
1278 cuv += val;
1279 }
1280 } else if (!checksum)
1281 while (len-- > 0) {
1282 const U8 ch = *(U8 *) s++;
1283 mPUSHu(ch);
1284 }
1285 else if (checksum > bits_in_uv)
1286 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1287 else
1288 while (len-- > 0) cuv += *(U8 *) s++;
1289 break;
1290 case 'U':
1291 if (len == 0) {
1292 if (explicit_length && howlen != e_star) {
1293 /* Switch to "bytes in UTF-8" mode */
1294 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1295 else
1296 /* Should be impossible due to the need_utf8() test */
1297 Perl_croak(aTHX_ "U0 mode on a byte string");
1298 }
1299 break;
1300 }
1301 if (len > strend - s) len = strend - s;
1302 if (!checksum) {
1303 if (len && unpack_only_one) len = 1;
1304 EXTEND(SP, len);
1305 EXTEND_MORTAL(len);
1306 }
1307 while (len-- > 0 && s < strend) {
1308 STRLEN retlen;
1309 UV auv;
1310 if (utf8) {
1311 U8 result[UTF8_MAXLEN];
1312 const char *ptr = s;
1313 STRLEN len;
1314 /* Bug: warns about bad utf8 even if we are short on bytes
1315 and will break out of the loop */
1316 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1317 'U'))
1318 break;
1319 len = UTF8SKIP(result);
1320 if (!uni_to_bytes(aTHX_ &ptr, strend,
1321 (char *) &result[1], len-1, 'U')) break;
1322 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1323 s = ptr;
1324 } else {
1325 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1326 if (retlen == (STRLEN) -1 || retlen == 0)
1327 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1328 s += retlen;
1329 }
1330 if (!checksum)
1331 mPUSHu(auv);
1332 else if (checksum > bits_in_uv)
1333 cdouble += (NV) auv;
1334 else
1335 cuv += auv;
1336 }
1337 break;
1338 case 's' | TYPE_IS_SHRIEKING:
1339#if SHORTSIZE != SIZE16
1340 while (len-- > 0) {
1341 short ashort;
1342 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1343 if (!checksum)
1344 mPUSHi(ashort);
1345 else if (checksum > bits_in_uv)
1346 cdouble += (NV)ashort;
1347 else
1348 cuv += ashort;
1349 }
1350 break;
1351#else
1352 /* FALLTHROUGH */
1353#endif
1354 case 's':
1355 while (len-- > 0) {
1356 I16 ai16;
1357
1358#if U16SIZE > SIZE16
1359 ai16 = 0;
1360#endif
1361 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1362#if U16SIZE > SIZE16
1363 if (ai16 > 32767)
1364 ai16 -= 65536;
1365#endif
1366 if (!checksum)
1367 mPUSHi(ai16);
1368 else if (checksum > bits_in_uv)
1369 cdouble += (NV)ai16;
1370 else
1371 cuv += ai16;
1372 }
1373 break;
1374 case 'S' | TYPE_IS_SHRIEKING:
1375#if SHORTSIZE != SIZE16
1376 while (len-- > 0) {
1377 unsigned short aushort;
1378 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1379 needs_swap);
1380 if (!checksum)
1381 mPUSHu(aushort);
1382 else if (checksum > bits_in_uv)
1383 cdouble += (NV)aushort;
1384 else
1385 cuv += aushort;
1386 }
1387 break;
1388#else
1389 /* FALLTHROUGH */
1390#endif
1391 case 'v':
1392 case 'n':
1393 case 'S':
1394 while (len-- > 0) {
1395 U16 au16;
1396#if U16SIZE > SIZE16
1397 au16 = 0;
1398#endif
1399 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1400 if (datumtype == 'n')
1401 au16 = PerlSock_ntohs(au16);
1402 if (datumtype == 'v')
1403 au16 = vtohs(au16);
1404 if (!checksum)
1405 mPUSHu(au16);
1406 else if (checksum > bits_in_uv)
1407 cdouble += (NV) au16;
1408 else
1409 cuv += au16;
1410 }
1411 break;
1412 case 'v' | TYPE_IS_SHRIEKING:
1413 case 'n' | TYPE_IS_SHRIEKING:
1414 while (len-- > 0) {
1415 I16 ai16;
1416# if U16SIZE > SIZE16
1417 ai16 = 0;
1418# endif
1419 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1420 /* There should never be any byte-swapping here. */
1421 assert(!TYPE_ENDIANNESS(datumtype));
1422 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1423 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1424 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1425 ai16 = (I16) vtohs((U16) ai16);
1426 if (!checksum)
1427 mPUSHi(ai16);
1428 else if (checksum > bits_in_uv)
1429 cdouble += (NV) ai16;
1430 else
1431 cuv += ai16;
1432 }
1433 break;
1434 case 'i':
1435 case 'i' | TYPE_IS_SHRIEKING:
1436 while (len-- > 0) {
1437 int aint;
1438 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1439 if (!checksum)
1440 mPUSHi(aint);
1441 else if (checksum > bits_in_uv)
1442 cdouble += (NV)aint;
1443 else
1444 cuv += aint;
1445 }
1446 break;
1447 case 'I':
1448 case 'I' | TYPE_IS_SHRIEKING:
1449 while (len-- > 0) {
1450 unsigned int auint;
1451 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1452 if (!checksum)
1453 mPUSHu(auint);
1454 else if (checksum > bits_in_uv)
1455 cdouble += (NV)auint;
1456 else
1457 cuv += auint;
1458 }
1459 break;
1460 case 'j':
1461 while (len-- > 0) {
1462 IV aiv;
1463 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1464 if (!checksum)
1465 mPUSHi(aiv);
1466 else if (checksum > bits_in_uv)
1467 cdouble += (NV)aiv;
1468 else
1469 cuv += aiv;
1470 }
1471 break;
1472 case 'J':
1473 while (len-- > 0) {
1474 UV auv;
1475 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1476 if (!checksum)
1477 mPUSHu(auv);
1478 else if (checksum > bits_in_uv)
1479 cdouble += (NV)auv;
1480 else
1481 cuv += auv;
1482 }
1483 break;
1484 case 'l' | TYPE_IS_SHRIEKING:
1485#if LONGSIZE != SIZE32
1486 while (len-- > 0) {
1487 long along;
1488 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1489 if (!checksum)
1490 mPUSHi(along);
1491 else if (checksum > bits_in_uv)
1492 cdouble += (NV)along;
1493 else
1494 cuv += along;
1495 }
1496 break;
1497#else
1498 /* FALLTHROUGH */
1499#endif
1500 case 'l':
1501 while (len-- > 0) {
1502 I32 ai32;
1503#if U32SIZE > SIZE32
1504 ai32 = 0;
1505#endif
1506 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1507#if U32SIZE > SIZE32
1508 if (ai32 > 2147483647) ai32 -= 4294967296;
1509#endif
1510 if (!checksum)
1511 mPUSHi(ai32);
1512 else if (checksum > bits_in_uv)
1513 cdouble += (NV)ai32;
1514 else
1515 cuv += ai32;
1516 }
1517 break;
1518 case 'L' | TYPE_IS_SHRIEKING:
1519#if LONGSIZE != SIZE32
1520 while (len-- > 0) {
1521 unsigned long aulong;
1522 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1523 if (!checksum)
1524 mPUSHu(aulong);
1525 else if (checksum > bits_in_uv)
1526 cdouble += (NV)aulong;
1527 else
1528 cuv += aulong;
1529 }
1530 break;
1531#else
1532 /* FALLTHROUGH */
1533#endif
1534 case 'V':
1535 case 'N':
1536 case 'L':
1537 while (len-- > 0) {
1538 U32 au32;
1539#if U32SIZE > SIZE32
1540 au32 = 0;
1541#endif
1542 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1543 if (datumtype == 'N')
1544 au32 = PerlSock_ntohl(au32);
1545 if (datumtype == 'V')
1546 au32 = vtohl(au32);
1547 if (!checksum)
1548 mPUSHu(au32);
1549 else if (checksum > bits_in_uv)
1550 cdouble += (NV)au32;
1551 else
1552 cuv += au32;
1553 }
1554 break;
1555 case 'V' | TYPE_IS_SHRIEKING:
1556 case 'N' | TYPE_IS_SHRIEKING:
1557 while (len-- > 0) {
1558 I32 ai32;
1559#if U32SIZE > SIZE32
1560 ai32 = 0;
1561#endif
1562 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1563 /* There should never be any byte swapping here. */
1564 assert(!TYPE_ENDIANNESS(datumtype));
1565 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1566 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1567 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1568 ai32 = (I32)vtohl((U32)ai32);
1569 if (!checksum)
1570 mPUSHi(ai32);
1571 else if (checksum > bits_in_uv)
1572 cdouble += (NV)ai32;
1573 else
1574 cuv += ai32;
1575 }
1576 break;
1577 case 'p':
1578 while (len-- > 0) {
1579 const char *aptr;
1580 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1581 /* newSVpv generates undef if aptr is NULL */
1582 mPUSHs(newSVpv(aptr, 0));
1583 }
1584 break;
1585 case 'w':
1586 {
1587 UV auv = 0;
1588 U32 bytes = 0;
1589
1590 while (len > 0 && s < strend) {
1591 U8 ch;
1592 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1593 auv = (auv << 7) | (ch & 0x7f);
1594 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1595 if (ch < 0x80) {
1596 bytes = 0;
1597 mPUSHu(auv);
1598 len--;
1599 auv = 0;
1600 continue;
1601 }
1602 if (++bytes >= sizeof(UV)) { /* promote to string */
1603 const char *t;
1604
1605 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1606 while (s < strend) {
1607 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1608 sv = mul128(sv, (U8)(ch & 0x7f));
1609 if (!(ch & 0x80)) {
1610 bytes = 0;
1611 break;
1612 }
1613 }
1614 t = SvPV_nolen_const(sv);
1615 while (*t == '0')
1616 t++;
1617 sv_chop(sv, t);
1618 mPUSHs(sv);
1619 len--;
1620 auv = 0;
1621 }
1622 }
1623 if ((s >= strend) && bytes)
1624 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1625 }
1626 break;
1627 case 'P':
1628 if (symptr->howlen == e_star)
1629 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1630 EXTEND(SP, 1);
1631 if (s + sizeof(char*) <= strend) {
1632 char *aptr;
1633 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1634 /* newSVpvn generates undef if aptr is NULL */
1635 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1636 }
1637 break;
1638#if defined(HAS_QUAD) && IVSIZE >= 8
1639 case 'q':
1640 while (len-- > 0) {
1641 Quad_t aquad;
1642 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1643 if (!checksum)
1644 mPUSHs(newSViv((IV)aquad));
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1647 else
1648 cuv += aquad;
1649 }
1650 break;
1651 case 'Q':
1652 while (len-- > 0) {
1653 Uquad_t auquad;
1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1655 if (!checksum)
1656 mPUSHs(newSVuv((UV)auquad));
1657 else if (checksum > bits_in_uv)
1658 cdouble += (NV)auquad;
1659 else
1660 cuv += auquad;
1661 }
1662 break;
1663#endif
1664 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1665 case 'f':
1666 while (len-- > 0) {
1667 float afloat;
1668 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1669 if (!checksum)
1670 mPUSHn(afloat);
1671 else
1672 cdouble += afloat;
1673 }
1674 break;
1675 case 'd':
1676 while (len-- > 0) {
1677 double adouble;
1678 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1679 if (!checksum)
1680 mPUSHn(adouble);
1681 else
1682 cdouble += adouble;
1683 }
1684 break;
1685 case 'F':
1686 while (len-- > 0) {
1687 NV_bytes anv;
1688 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1689 datumtype, needs_swap);
1690 if (!checksum)
1691 mPUSHn(anv.nv);
1692 else
1693 cdouble += anv.nv;
1694 }
1695 break;
1696#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1697 case 'D':
1698 while (len-- > 0) {
1699 ld_bytes aldouble;
1700 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1701 sizeof(aldouble.bytes), datumtype, needs_swap);
1702 if (!checksum)
1703 mPUSHn(aldouble.ld);
1704 else
1705 cdouble += aldouble.ld;
1706 }
1707 break;
1708#endif
1709 case 'u':
1710 if (!checksum) {
1711 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1712 sv = sv_2mortal(newSV(l));
1713 if (l) SvPOK_on(sv);
1714 }
1715 if (utf8) {
1716 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1717 I32 a, b, c, d;
1718 char hunk[3];
1719
1720 while (len > 0) {
1721 next_uni_uu(aTHX_ &s, strend, &a);
1722 next_uni_uu(aTHX_ &s, strend, &b);
1723 next_uni_uu(aTHX_ &s, strend, &c);
1724 next_uni_uu(aTHX_ &s, strend, &d);
1725 hunk[0] = (char)((a << 2) | (b >> 4));
1726 hunk[1] = (char)((b << 4) | (c >> 2));
1727 hunk[2] = (char)((c << 6) | d);
1728 if (!checksum)
1729 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1730 len -= 3;
1731 }
1732 if (s < strend) {
1733 if (*s == '\n') {
1734 s++;
1735 }
1736 else {
1737 /* possible checksum byte */
1738 const char *skip = s+UTF8SKIP(s);
1739 if (skip < strend && *skip == '\n')
1740 s = skip+1;
1741 }
1742 }
1743 }
1744 } else {
1745 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1746 I32 a, b, c, d;
1747 char hunk[3];
1748
1749 len = PL_uudmap[*(U8*)s++] & 077;
1750 while (len > 0) {
1751 if (s < strend && ISUUCHAR(*s))
1752 a = PL_uudmap[*(U8*)s++] & 077;
1753 else
1754 a = 0;
1755 if (s < strend && ISUUCHAR(*s))
1756 b = PL_uudmap[*(U8*)s++] & 077;
1757 else
1758 b = 0;
1759 if (s < strend && ISUUCHAR(*s))
1760 c = PL_uudmap[*(U8*)s++] & 077;
1761 else
1762 c = 0;
1763 if (s < strend && ISUUCHAR(*s))
1764 d = PL_uudmap[*(U8*)s++] & 077;
1765 else
1766 d = 0;
1767 hunk[0] = (char)((a << 2) | (b >> 4));
1768 hunk[1] = (char)((b << 4) | (c >> 2));
1769 hunk[2] = (char)((c << 6) | d);
1770 if (!checksum)
1771 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1772 len -= 3;
1773 }
1774 if (*s == '\n')
1775 s++;
1776 else /* possible checksum byte */
1777 if (s + 1 < strend && s[1] == '\n')
1778 s += 2;
1779 }
1780 }
1781 if (!checksum)
1782 XPUSHs(sv);
1783 break;
1784 }
1785
1786 if (checksum) {
1787 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1788 (checksum > bits_in_uv &&
1789 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1790 NV trouble, anv;
1791
1792 anv = (NV) (1 << (checksum & 15));
1793 while (checksum >= 16) {
1794 checksum -= 16;
1795 anv *= 65536.0;
1796 }
1797 while (cdouble < 0.0)
1798 cdouble += anv;
1799 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1800 sv = newSVnv(cdouble);
1801 }
1802 else {
1803 if (checksum < bits_in_uv) {
1804 UV mask = ((UV)1 << checksum) - 1;
1805 cuv &= mask;
1806 }
1807 sv = newSVuv(cuv);
1808 }
1809 mXPUSHs(sv);
1810 checksum = 0;
1811 }
1812
1813 if (symptr->flags & FLAG_SLASH){
1814 if (SP - PL_stack_base - start_sp_offset <= 0)
1815 break;
1816 if( next_symbol(symptr) ){
1817 if( symptr->howlen == e_number )
1818 Perl_croak(aTHX_ "Count after length/code in unpack" );
1819 if( beyond ){
1820 /* ...end of char buffer then no decent length available */
1821 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1822 } else {
1823 /* take top of stack (hope it's numeric) */
1824 len = POPi;
1825 if( len < 0 )
1826 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1827 }
1828 } else {
1829 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1830 }
1831 datumtype = symptr->code;
1832 explicit_length = FALSE;
1833 goto redo_switch;
1834 }
1835 }
1836
1837 if (new_s)
1838 *new_s = s;
1839 PUTBACK;
1840 return SP - PL_stack_base - start_sp_offset;
1841}
1842
1843PP(pp_unpack)
1844{
1845 dVAR;
1846 dSP;
1847 dPOPPOPssrl;
1848 I32 gimme = GIMME_V;
1849 STRLEN llen;
1850 STRLEN rlen;
1851 const char *pat = SvPV_const(left, llen);
1852 const char *s = SvPV_const(right, rlen);
1853 const char *strend = s + rlen;
1854 const char *patend = pat + llen;
1855 I32 cnt;
1856
1857 PUTBACK;
1858 cnt = unpackstring(pat, patend, s, strend,
1859 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1860 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1861
1862 SPAGAIN;
1863 if ( !cnt && gimme == G_SCALAR )
1864 PUSHs(&PL_sv_undef);
1865 RETURN;
1866}
1867
1868STATIC U8 *
1869doencodes(U8 *h, const char *s, I32 len)
1870{
1871 *h++ = PL_uuemap[len];
1872 while (len > 2) {
1873 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1874 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1875 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1876 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1877 s += 3;
1878 len -= 3;
1879 }
1880 if (len > 0) {
1881 const char r = (len > 1 ? s[1] : '\0');
1882 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1883 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1884 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1885 *h++ = PL_uuemap[0];
1886 }
1887 *h++ = '\n';
1888 return h;
1889}
1890
1891STATIC SV *
1892S_is_an_int(pTHX_ const char *s, STRLEN l)
1893{
1894 SV *result = newSVpvn(s, l);
1895 char *const result_c = SvPV_nolen(result); /* convenience */
1896 char *out = result_c;
1897 bool skip = 1;
1898 bool ignore = 0;
1899
1900 PERL_ARGS_ASSERT_IS_AN_INT;
1901
1902 while (*s) {
1903 switch (*s) {
1904 case ' ':
1905 break;
1906 case '+':
1907 if (!skip) {
1908 SvREFCNT_dec(result);
1909 return (NULL);
1910 }
1911 break;
1912 case '0':
1913 case '1':
1914 case '2':
1915 case '3':
1916 case '4':
1917 case '5':
1918 case '6':
1919 case '7':
1920 case '8':
1921 case '9':
1922 skip = 0;
1923 if (!ignore) {
1924 *(out++) = *s;
1925 }
1926 break;
1927 case '.':
1928 ignore = 1;
1929 break;
1930 default:
1931 SvREFCNT_dec(result);
1932 return (NULL);
1933 }
1934 s++;
1935 }
1936 *(out++) = '\0';
1937 SvCUR_set(result, out - result_c);
1938 return (result);
1939}
1940
1941/* pnum must be '\0' terminated */
1942STATIC int
1943S_div128(pTHX_ SV *pnum, bool *done)
1944{
1945 STRLEN len;
1946 char * const s = SvPV(pnum, len);
1947 char *t = s;
1948 int m = 0;
1949
1950 PERL_ARGS_ASSERT_DIV128;
1951
1952 *done = 1;
1953 while (*t) {
1954 const int i = m * 10 + (*t - '0');
1955 const int r = (i >> 7); /* r < 10 */
1956 m = i & 0x7F;
1957 if (r) {
1958 *done = 0;
1959 }
1960 *(t++) = '0' + r;
1961 }
1962 *(t++) = '\0';
1963 SvCUR_set(pnum, (STRLEN) (t - s));
1964 return (m);
1965}
1966
1967/*
1968=for apidoc packlist
1969
1970The engine implementing pack() Perl function.
1971
1972=cut
1973*/
1974
1975void
1976Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1977{
1978 dVAR;
1979 tempsym_t sym;
1980
1981 PERL_ARGS_ASSERT_PACKLIST;
1982
1983 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1984
1985 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1986 Also make sure any UTF8 flag is loaded */
1987 SvPV_force_nolen(cat);
1988 if (DO_UTF8(cat))
1989 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1990
1991 (void)pack_rec( cat, &sym, beglist, endlist );
1992}
1993
1994/* like sv_utf8_upgrade, but also repoint the group start markers */
1995STATIC void
1996marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1997 STRLEN len;
1998 tempsym_t *group;
1999 const char *from_ptr, *from_start, *from_end, **marks, **m;
2000 char *to_start, *to_ptr;
2001
2002 if (SvUTF8(sv)) return;
2003
2004 from_start = SvPVX_const(sv);
2005 from_end = from_start + SvCUR(sv);
2006 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2007 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2008 if (from_ptr == from_end) {
2009 /* Simple case: no character needs to be changed */
2010 SvUTF8_on(sv);
2011 return;
2012 }
2013
2014 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2015 Newx(to_start, len, char);
2016 Copy(from_start, to_start, from_ptr-from_start, char);
2017 to_ptr = to_start + (from_ptr-from_start);
2018
2019 Newx(marks, sym_ptr->level+2, const char *);
2020 for (group=sym_ptr; group; group = group->previous)
2021 marks[group->level] = from_start + group->strbeg;
2022 marks[sym_ptr->level+1] = from_end+1;
2023 for (m = marks; *m < from_ptr; m++)
2024 *m = to_start + (*m-from_start);
2025
2026 for (;from_ptr < from_end; from_ptr++) {
2027 while (*m == from_ptr) *m++ = to_ptr;
2028 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2029 }
2030 *to_ptr = 0;
2031
2032 while (*m == from_ptr) *m++ = to_ptr;
2033 if (m != marks + sym_ptr->level+1) {
2034 Safefree(marks);
2035 Safefree(to_start);
2036 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2037 "level=%d", m, marks, sym_ptr->level);
2038 }
2039 for (group=sym_ptr; group; group = group->previous)
2040 group->strbeg = marks[group->level] - to_start;
2041 Safefree(marks);
2042
2043 if (SvOOK(sv)) {
2044 if (SvIVX(sv)) {
2045 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2046 from_start -= SvIVX(sv);
2047 SvIV_set(sv, 0);
2048 }
2049 SvFLAGS(sv) &= ~SVf_OOK;
2050 }
2051 if (SvLEN(sv) != 0)
2052 Safefree(from_start);
2053 SvPV_set(sv, to_start);
2054 SvCUR_set(sv, to_ptr - to_start);
2055 SvLEN_set(sv, len);
2056 SvUTF8_on(sv);
2057}
2058
2059/* Exponential string grower. Makes string extension effectively O(n)
2060 needed says how many extra bytes we need (not counting the final '\0')
2061 Only grows the string if there is an actual lack of space
2062*/
2063STATIC char *
2064S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2065 const STRLEN cur = SvCUR(sv);
2066 const STRLEN len = SvLEN(sv);
2067 STRLEN extend;
2068
2069 PERL_ARGS_ASSERT_SV_EXP_GROW;
2070
2071 if (len - cur > needed) return SvPVX(sv);
2072 extend = needed > len ? needed : len;
2073 return SvGROW(sv, len+extend+1);
2074}
2075
2076STATIC
2077SV **
2078S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2079{
2080 dVAR;
2081 tempsym_t lookahead;
2082 I32 items = endlist - beglist;
2083 bool found = next_symbol(symptr);
2084 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2085 bool warn_utf8 = ckWARN(WARN_UTF8);
2086 char* from;
2087
2088 PERL_ARGS_ASSERT_PACK_REC;
2089
2090 if (symptr->level == 0 && found && symptr->code == 'U') {
2091 marked_upgrade(aTHX_ cat, symptr);
2092 symptr->flags |= FLAG_DO_UTF8;
2093 utf8 = 0;
2094 }
2095 symptr->strbeg = SvCUR(cat);
2096
2097 while (found) {
2098 SV *fromstr;
2099 STRLEN fromlen;
2100 I32 len;
2101 SV *lengthcode = NULL;
2102 I32 datumtype = symptr->code;
2103 howlen_t howlen = symptr->howlen;
2104 char *start = SvPVX(cat);
2105 char *cur = start + SvCUR(cat);
2106 bool needs_swap;
2107
2108#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2109
2110 switch (howlen) {
2111 case e_star:
2112 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2113 0 : items;
2114 break;
2115 default:
2116 /* e_no_len and e_number */
2117 len = symptr->length;
2118 break;
2119 }
2120
2121 if (len) {
2122 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2123
2124 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2125 /* We can process this letter. */
2126 STRLEN size = props & PACK_SIZE_MASK;
2127 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2128 }
2129 }
2130
2131 /* Look ahead for next symbol. Do we have code/code? */
2132 lookahead = *symptr;
2133 found = next_symbol(&lookahead);
2134 if (symptr->flags & FLAG_SLASH) {
2135 IV count;
2136 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2137 if (strchr("aAZ", lookahead.code)) {
2138 if (lookahead.howlen == e_number) count = lookahead.length;
2139 else {
2140 if (items > 0) {
2141 count = sv_len_utf8(*beglist);
2142 }
2143 else count = 0;
2144 if (lookahead.code == 'Z') count++;
2145 }
2146 } else {
2147 if (lookahead.howlen == e_number && lookahead.length < items)
2148 count = lookahead.length;
2149 else count = items;
2150 }
2151 lookahead.howlen = e_number;
2152 lookahead.length = count;
2153 lengthcode = sv_2mortal(newSViv(count));
2154 }
2155
2156 needs_swap = NEEDS_SWAP(datumtype);
2157
2158 /* Code inside the switch must take care to properly update
2159 cat (CUR length and '\0' termination) if it updated *cur and
2160 doesn't simply leave using break */
2161 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2162 default:
2163 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2164 (int) TYPE_NO_MODIFIERS(datumtype));
2165 case '%':
2166 Perl_croak(aTHX_ "'%%' may not be used in pack");
2167
2168 case '.' | TYPE_IS_SHRIEKING:
2169 case '.':
2170 if (howlen == e_star) from = start;
2171 else if (len == 0) from = cur;
2172 else {
2173 tempsym_t *group = symptr;
2174
2175 while (--len && group) group = group->previous;
2176 from = group ? start + group->strbeg : start;
2177 }
2178 fromstr = NEXTFROM;
2179 len = SvIV(fromstr);
2180 goto resize;
2181 case '@' | TYPE_IS_SHRIEKING:
2182 case '@':
2183 from = start + symptr->strbeg;
2184 resize:
2185 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2186 if (len >= 0) {
2187 while (len && from < cur) {
2188 from += UTF8SKIP(from);
2189 len--;
2190 }
2191 if (from > cur)
2192 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2193 if (len) {
2194 /* Here we know from == cur */
2195 grow:
2196 GROWING(0, cat, start, cur, len);
2197 Zero(cur, len, char);
2198 cur += len;
2199 } else if (from < cur) {
2200 len = cur - from;
2201 goto shrink;
2202 } else goto no_change;
2203 } else {
2204 cur = from;
2205 len = -len;
2206 goto utf8_shrink;
2207 }
2208 else {
2209 len -= cur - from;
2210 if (len > 0) goto grow;
2211 if (len == 0) goto no_change;
2212 len = -len;
2213 goto shrink;
2214 }
2215 break;
2216
2217 case '(': {
2218 tempsym_t savsym = *symptr;
2219 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2220 symptr->flags |= group_modifiers;
2221 symptr->patend = savsym.grpend;
2222 symptr->level++;
2223 symptr->previous = &lookahead;
2224 while (len--) {
2225 U32 was_utf8;
2226 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2227 else symptr->flags &= ~FLAG_PARSE_UTF8;
2228 was_utf8 = SvUTF8(cat);
2229 symptr->patptr = savsym.grpbeg;
2230 beglist = pack_rec(cat, symptr, beglist, endlist);
2231 if (SvUTF8(cat) != was_utf8)
2232 /* This had better be an upgrade while in utf8==0 mode */
2233 utf8 = 1;
2234
2235 if (savsym.howlen == e_star && beglist == endlist)
2236 break; /* No way to continue */
2237 }
2238 items = endlist - beglist;
2239 lookahead.flags = symptr->flags & ~group_modifiers;
2240 goto no_change;
2241 }
2242 case 'X' | TYPE_IS_SHRIEKING:
2243 if (!len) /* Avoid division by 0 */
2244 len = 1;
2245 if (utf8) {
2246 char *hop, *last;
2247 I32 l = len;
2248 hop = last = start;
2249 while (hop < cur) {
2250 hop += UTF8SKIP(hop);
2251 if (--l == 0) {
2252 last = hop;
2253 l = len;
2254 }
2255 }
2256 if (last > cur)
2257 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2258 cur = last;
2259 break;
2260 }
2261 len = (cur-start) % len;
2262 /* FALLTHROUGH */
2263 case 'X':
2264 if (utf8) {
2265 if (len < 1) goto no_change;
2266 utf8_shrink:
2267 while (len > 0) {
2268 if (cur <= start)
2269 Perl_croak(aTHX_ "'%c' outside of string in pack",
2270 (int) TYPE_NO_MODIFIERS(datumtype));
2271 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2272 if (cur <= start)
2273 Perl_croak(aTHX_ "'%c' outside of string in pack",
2274 (int) TYPE_NO_MODIFIERS(datumtype));
2275 }
2276 len--;
2277 }
2278 } else {
2279 shrink:
2280 if (cur - start < len)
2281 Perl_croak(aTHX_ "'%c' outside of string in pack",
2282 (int) TYPE_NO_MODIFIERS(datumtype));
2283 cur -= len;
2284 }
2285 if (cur < start+symptr->strbeg) {
2286 /* Make sure group starts don't point into the void */
2287 tempsym_t *group;
2288 const STRLEN length = cur-start;
2289 for (group = symptr;
2290 group && length < group->strbeg;
2291 group = group->previous) group->strbeg = length;
2292 lookahead.strbeg = length;
2293 }
2294 break;
2295 case 'x' | TYPE_IS_SHRIEKING: {
2296 I32 ai32;
2297 if (!len) /* Avoid division by 0 */
2298 len = 1;
2299 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2300 else ai32 = (cur - start) % len;
2301 if (ai32 == 0) goto no_change;
2302 len -= ai32;
2303 }
2304 /* FALLTHROUGH */
2305 case 'x':
2306 goto grow;
2307 case 'A':
2308 case 'Z':
2309 case 'a': {
2310 const char *aptr;
2311
2312 fromstr = NEXTFROM;
2313 aptr = SvPV_const(fromstr, fromlen);
2314 if (DO_UTF8(fromstr)) {
2315 const char *end, *s;
2316
2317 if (!utf8 && !SvUTF8(cat)) {
2318 marked_upgrade(aTHX_ cat, symptr);
2319 lookahead.flags |= FLAG_DO_UTF8;
2320 lookahead.strbeg = symptr->strbeg;
2321 utf8 = 1;
2322 start = SvPVX(cat);
2323 cur = start + SvCUR(cat);
2324 }
2325 if (howlen == e_star) {
2326 if (utf8) goto string_copy;
2327 len = fromlen+1;
2328 }
2329 s = aptr;
2330 end = aptr + fromlen;
2331 fromlen = datumtype == 'Z' ? len-1 : len;
2332 while ((I32) fromlen > 0 && s < end) {
2333 s += UTF8SKIP(s);
2334 fromlen--;
2335 }
2336 if (s > end)
2337 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2338 if (utf8) {
2339 len = fromlen;
2340 if (datumtype == 'Z') len++;
2341 fromlen = s-aptr;
2342 len += fromlen;
2343
2344 goto string_copy;
2345 }
2346 fromlen = len - fromlen;
2347 if (datumtype == 'Z') fromlen--;
2348 if (howlen == e_star) {
2349 len = fromlen;
2350 if (datumtype == 'Z') len++;
2351 }
2352 GROWING(0, cat, start, cur, len);
2353 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2354 datumtype | TYPE_IS_PACK))
2355 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2356 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2357 (int)datumtype, aptr, end, cur, (UV)fromlen);
2358 cur += fromlen;
2359 len -= fromlen;
2360 } else if (utf8) {
2361 if (howlen == e_star) {
2362 len = fromlen;
2363 if (datumtype == 'Z') len++;
2364 }
2365 if (len <= (I32) fromlen) {
2366 fromlen = len;
2367 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2368 }
2369 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2370 upgrade, so:
2371 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2372 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2373 len -= fromlen;
2374 while (fromlen > 0) {
2375 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2376 aptr++;
2377 fromlen--;
2378 }
2379 } else {
2380 string_copy:
2381 if (howlen == e_star) {
2382 len = fromlen;
2383 if (datumtype == 'Z') len++;
2384 }
2385 if (len <= (I32) fromlen) {
2386 fromlen = len;
2387 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2388 }
2389 GROWING(0, cat, start, cur, len);
2390 Copy(aptr, cur, fromlen, char);
2391 cur += fromlen;
2392 len -= fromlen;
2393 }
2394 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2395 cur += len;
2396 SvTAINT(cat);
2397 break;
2398 }
2399 case 'B':
2400 case 'b': {
2401 const char *str, *end;
2402 I32 l, field_len;
2403 U8 bits;
2404 bool utf8_source;
2405 U32 utf8_flags;
2406
2407 fromstr = NEXTFROM;
2408 str = SvPV_const(fromstr, fromlen);
2409 end = str + fromlen;
2410 if (DO_UTF8(fromstr)) {
2411 utf8_source = TRUE;
2412 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2413 } else {
2414 utf8_source = FALSE;
2415 utf8_flags = 0; /* Unused, but keep compilers happy */
2416 }
2417 if (howlen == e_star) len = fromlen;
2418 field_len = (len+7)/8;
2419 GROWING(utf8, cat, start, cur, field_len);
2420 if (len > (I32)fromlen) len = fromlen;
2421 bits = 0;
2422 l = 0;
2423 if (datumtype == 'B')
2424 while (l++ < len) {
2425 if (utf8_source) {
2426 UV val = 0;
2427 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2428 bits |= val & 1;
2429 } else bits |= *str++ & 1;
2430 if (l & 7) bits <<= 1;
2431 else {
2432 PUSH_BYTE(utf8, cur, bits);
2433 bits = 0;
2434 }
2435 }
2436 else
2437 /* datumtype == 'b' */
2438 while (l++ < len) {
2439 if (utf8_source) {
2440 UV val = 0;
2441 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2442 if (val & 1) bits |= 0x80;
2443 } else if (*str++ & 1)
2444 bits |= 0x80;
2445 if (l & 7) bits >>= 1;
2446 else {
2447 PUSH_BYTE(utf8, cur, bits);
2448 bits = 0;
2449 }
2450 }
2451 l--;
2452 if (l & 7) {
2453 if (datumtype == 'B')
2454 bits <<= 7 - (l & 7);
2455 else
2456 bits >>= 7 - (l & 7);
2457 PUSH_BYTE(utf8, cur, bits);
2458 l += 7;
2459 }
2460 /* Determine how many chars are left in the requested field */
2461 l /= 8;
2462 if (howlen == e_star) field_len = 0;
2463 else field_len -= l;
2464 Zero(cur, field_len, char);
2465 cur += field_len;
2466 break;
2467 }
2468 case 'H':
2469 case 'h': {
2470 const char *str, *end;
2471 I32 l, field_len;
2472 U8 bits;
2473 bool utf8_source;
2474 U32 utf8_flags;
2475
2476 fromstr = NEXTFROM;
2477 str = SvPV_const(fromstr, fromlen);
2478 end = str + fromlen;
2479 if (DO_UTF8(fromstr)) {
2480 utf8_source = TRUE;
2481 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2482 } else {
2483 utf8_source = FALSE;
2484 utf8_flags = 0; /* Unused, but keep compilers happy */
2485 }
2486 if (howlen == e_star) len = fromlen;
2487 field_len = (len+1)/2;
2488 GROWING(utf8, cat, start, cur, field_len);
2489 if (!utf8 && len > (I32)fromlen) len = fromlen;
2490 bits = 0;
2491 l = 0;
2492 if (datumtype == 'H')
2493 while (l++ < len) {
2494 if (utf8_source) {
2495 UV val = 0;
2496 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2497 if (val < 256 && isALPHA(val))
2498 bits |= (val + 9) & 0xf;
2499 else
2500 bits |= val & 0xf;
2501 } else if (isALPHA(*str))
2502 bits |= (*str++ + 9) & 0xf;
2503 else
2504 bits |= *str++ & 0xf;
2505 if (l & 1) bits <<= 4;
2506 else {
2507 PUSH_BYTE(utf8, cur, bits);
2508 bits = 0;
2509 }
2510 }
2511 else
2512 while (l++ < len) {
2513 if (utf8_source) {
2514 UV val = 0;
2515 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2516 if (val < 256 && isALPHA(val))
2517 bits |= ((val + 9) & 0xf) << 4;
2518 else
2519 bits |= (val & 0xf) << 4;
2520 } else if (isALPHA(*str))
2521 bits |= ((*str++ + 9) & 0xf) << 4;
2522 else
2523 bits |= (*str++ & 0xf) << 4;
2524 if (l & 1) bits >>= 4;
2525 else {
2526 PUSH_BYTE(utf8, cur, bits);
2527 bits = 0;
2528 }
2529 }
2530 l--;
2531 if (l & 1) {
2532 PUSH_BYTE(utf8, cur, bits);
2533 l++;
2534 }
2535 /* Determine how many chars are left in the requested field */
2536 l /= 2;
2537 if (howlen == e_star) field_len = 0;
2538 else field_len -= l;
2539 Zero(cur, field_len, char);
2540 cur += field_len;
2541 break;
2542 }
2543 case 'c':
2544 while (len-- > 0) {
2545 IV aiv;
2546 fromstr = NEXTFROM;
2547 aiv = SvIV(fromstr);
2548 if ((-128 > aiv || aiv > 127))
2549 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2550 "Character in 'c' format wrapped in pack");
2551 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2552 }
2553 break;
2554 case 'C':
2555 if (len == 0) {
2556 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2557 break;
2558 }
2559 while (len-- > 0) {
2560 IV aiv;
2561 fromstr = NEXTFROM;
2562 aiv = SvIV(fromstr);
2563 if ((0 > aiv || aiv > 0xff))
2564 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2565 "Character in 'C' format wrapped in pack");
2566 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2567 }
2568 break;
2569 case 'W': {
2570 char *end;
2571 U8 in_bytes = (U8)IN_BYTES;
2572
2573 end = start+SvLEN(cat)-1;
2574 if (utf8) end -= UTF8_MAXLEN-1;
2575 while (len-- > 0) {
2576 UV auv;
2577 fromstr = NEXTFROM;
2578 auv = SvUV(fromstr);
2579 if (in_bytes) auv = auv % 0x100;
2580 if (utf8) {
2581 W_utf8:
2582 if (cur > end) {
2583 *cur = '\0';
2584 SvCUR_set(cat, cur - start);
2585
2586 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2587 end = start+SvLEN(cat)-UTF8_MAXLEN;
2588 }
2589 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2590 auv,
2591 warn_utf8 ?
2592 0 : UNICODE_ALLOW_ANY);
2593 } else {
2594 if (auv >= 0x100) {
2595 if (!SvUTF8(cat)) {
2596 *cur = '\0';
2597 SvCUR_set(cat, cur - start);
2598 marked_upgrade(aTHX_ cat, symptr);
2599 lookahead.flags |= FLAG_DO_UTF8;
2600 lookahead.strbeg = symptr->strbeg;
2601 utf8 = 1;
2602 start = SvPVX(cat);
2603 cur = start + SvCUR(cat);
2604 end = start+SvLEN(cat)-UTF8_MAXLEN;
2605 goto W_utf8;
2606 }
2607 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2608 "Character in 'W' format wrapped in pack");
2609 auv &= 0xff;
2610 }
2611 if (cur >= end) {
2612 *cur = '\0';
2613 SvCUR_set(cat, cur - start);
2614 GROWING(0, cat, start, cur, len+1);
2615 end = start+SvLEN(cat)-1;
2616 }
2617 *(U8 *) cur++ = (U8)auv;
2618 }
2619 }
2620 break;
2621 }
2622 case 'U': {
2623 char *end;
2624
2625 if (len == 0) {
2626 if (!(symptr->flags & FLAG_DO_UTF8)) {
2627 marked_upgrade(aTHX_ cat, symptr);
2628 lookahead.flags |= FLAG_DO_UTF8;
2629 lookahead.strbeg = symptr->strbeg;
2630 }
2631 utf8 = 0;
2632 goto no_change;
2633 }
2634
2635 end = start+SvLEN(cat);
2636 if (!utf8) end -= UTF8_MAXLEN;
2637 while (len-- > 0) {
2638 UV auv;
2639 fromstr = NEXTFROM;
2640 auv = SvUV(fromstr);
2641 if (utf8) {
2642 U8 buffer[UTF8_MAXLEN], *endb;
2643 endb = uvchr_to_utf8_flags(buffer, auv,
2644 warn_utf8 ?
2645 0 : UNICODE_ALLOW_ANY);
2646 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2647 *cur = '\0';
2648 SvCUR_set(cat, cur - start);
2649 GROWING(0, cat, start, cur,
2650 len+(endb-buffer)*UTF8_EXPAND);
2651 end = start+SvLEN(cat);
2652 }
2653 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2654 } else {
2655 if (cur >= end) {
2656 *cur = '\0';
2657 SvCUR_set(cat, cur - start);
2658 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2659 end = start+SvLEN(cat)-UTF8_MAXLEN;
2660 }
2661 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
2662 warn_utf8 ?
2663 0 : UNICODE_ALLOW_ANY);
2664 }
2665 }
2666 break;
2667 }
2668 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2669 case 'f':
2670 while (len-- > 0) {
2671 float afloat;
2672 NV anv;
2673 fromstr = NEXTFROM;
2674 anv = SvNV(fromstr);
2675# if defined(VMS) && !defined(_IEEE_FP)
2676 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2677 * on Alpha; fake it if we don't have them.
2678 */
2679 if (anv > FLT_MAX)
2680 afloat = FLT_MAX;
2681 else if (anv < -FLT_MAX)
2682 afloat = -FLT_MAX;
2683 else afloat = (float)anv;
2684# else
2685 afloat = (float)anv;
2686# endif
2687 PUSH_VAR(utf8, cur, afloat, needs_swap);
2688 }
2689 break;
2690 case 'd':
2691 while (len-- > 0) {
2692 double adouble;
2693 NV anv;
2694 fromstr = NEXTFROM;
2695 anv = SvNV(fromstr);
2696# if defined(VMS) && !defined(_IEEE_FP)
2697 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2698 * on Alpha; fake it if we don't have them.
2699 */
2700 if (anv > DBL_MAX)
2701 adouble = DBL_MAX;
2702 else if (anv < -DBL_MAX)
2703 adouble = -DBL_MAX;
2704 else adouble = (double)anv;
2705# else
2706 adouble = (double)anv;
2707# endif
2708 PUSH_VAR(utf8, cur, adouble, needs_swap);
2709 }
2710 break;
2711 case 'F': {
2712 NV_bytes anv;
2713 Zero(&anv, 1, NV); /* can be long double with unused bits */
2714 while (len-- > 0) {
2715 fromstr = NEXTFROM;
2716#ifdef __GNUC__
2717 /* to work round a gcc/x86 bug; don't use SvNV */
2718 anv.nv = sv_2nv(fromstr);
2719#else
2720 anv.nv = SvNV(fromstr);
2721#endif
2722 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2723 }
2724 break;
2725 }
2726#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2727 case 'D': {
2728 ld_bytes aldouble;
2729 /* long doubles can have unused bits, which may be nonzero */
2730 Zero(&aldouble, 1, long double);
2731 while (len-- > 0) {
2732 fromstr = NEXTFROM;
2733# ifdef __GNUC__
2734 /* to work round a gcc/x86 bug; don't use SvNV */
2735 aldouble.ld = (long double)sv_2nv(fromstr);
2736# else
2737 aldouble.ld = (long double)SvNV(fromstr);
2738# endif
2739 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2740 needs_swap);
2741 }
2742 break;
2743 }
2744#endif
2745 case 'n' | TYPE_IS_SHRIEKING:
2746 case 'n':
2747 while (len-- > 0) {
2748 I16 ai16;
2749 fromstr = NEXTFROM;
2750 ai16 = (I16)SvIV(fromstr);
2751 ai16 = PerlSock_htons(ai16);
2752 PUSH16(utf8, cur, &ai16, FALSE);
2753 }
2754 break;
2755 case 'v' | TYPE_IS_SHRIEKING:
2756 case 'v':
2757 while (len-- > 0) {
2758 I16 ai16;
2759 fromstr = NEXTFROM;
2760 ai16 = (I16)SvIV(fromstr);
2761 ai16 = htovs(ai16);
2762 PUSH16(utf8, cur, &ai16, FALSE);
2763 }
2764 break;
2765 case 'S' | TYPE_IS_SHRIEKING:
2766#if SHORTSIZE != SIZE16
2767 while (len-- > 0) {
2768 unsigned short aushort;
2769 fromstr = NEXTFROM;
2770 aushort = SvUV(fromstr);
2771 PUSH_VAR(utf8, cur, aushort, needs_swap);
2772 }
2773 break;
2774#else
2775 /* FALLTHROUGH */
2776#endif
2777 case 'S':
2778 while (len-- > 0) {
2779 U16 au16;
2780 fromstr = NEXTFROM;
2781 au16 = (U16)SvUV(fromstr);
2782 PUSH16(utf8, cur, &au16, needs_swap);
2783 }
2784 break;
2785 case 's' | TYPE_IS_SHRIEKING:
2786#if SHORTSIZE != SIZE16
2787 while (len-- > 0) {
2788 short ashort;
2789 fromstr = NEXTFROM;
2790 ashort = SvIV(fromstr);
2791 PUSH_VAR(utf8, cur, ashort, needs_swap);
2792 }
2793 break;
2794#else
2795 /* FALLTHROUGH */
2796#endif
2797 case 's':
2798 while (len-- > 0) {
2799 I16 ai16;
2800 fromstr = NEXTFROM;
2801 ai16 = (I16)SvIV(fromstr);
2802 PUSH16(utf8, cur, &ai16, needs_swap);
2803 }
2804 break;
2805 case 'I':
2806 case 'I' | TYPE_IS_SHRIEKING:
2807 while (len-- > 0) {
2808 unsigned int auint;
2809 fromstr = NEXTFROM;
2810 auint = SvUV(fromstr);
2811 PUSH_VAR(utf8, cur, auint, needs_swap);
2812 }
2813 break;
2814 case 'j':
2815 while (len-- > 0) {
2816 IV aiv;
2817 fromstr = NEXTFROM;
2818 aiv = SvIV(fromstr);
2819 PUSH_VAR(utf8, cur, aiv, needs_swap);
2820 }
2821 break;
2822 case 'J':
2823 while (len-- > 0) {
2824 UV auv;
2825 fromstr = NEXTFROM;
2826 auv = SvUV(fromstr);
2827 PUSH_VAR(utf8, cur, auv, needs_swap);
2828 }
2829 break;
2830 case 'w':
2831 while (len-- > 0) {
2832 NV anv;
2833 fromstr = NEXTFROM;
2834 anv = SvNV(fromstr);
2835
2836 if (anv < 0) {
2837 *cur = '\0';
2838 SvCUR_set(cat, cur - start);
2839 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2840 }
2841
2842 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2843 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2844 any negative IVs will have already been got by the croak()
2845 above. IOK is untrue for fractions, so we test them
2846 against UV_MAX_P1. */
2847 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2848 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2849 char *in = buf + sizeof(buf);
2850 UV auv = SvUV(fromstr);
2851
2852 do {
2853 *--in = (char)((auv & 0x7f) | 0x80);
2854 auv >>= 7;
2855 } while (auv);
2856 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2857 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2858 in, (buf + sizeof(buf)) - in);
2859 } else if (SvPOKp(fromstr))
2860 goto w_string;
2861 else if (SvNOKp(fromstr)) {
2862 /* 10**NV_MAX_10_EXP is the largest power of 10
2863 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2864 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2865 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2866 And with that many bytes only Inf can overflow.
2867 Some C compilers are strict about integral constant
2868 expressions so we conservatively divide by a slightly
2869 smaller integer instead of multiplying by the exact
2870 floating-point value.
2871 */
2872#ifdef NV_MAX_10_EXP
2873 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2874 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2875#else
2876 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2877 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2878#endif
2879 char *in = buf + sizeof(buf);
2880
2881 anv = Perl_floor(anv);
2882 do {
2883 const NV next = Perl_floor(anv / 128);
2884 if (in <= buf) /* this cannot happen ;-) */
2885 Perl_croak(aTHX_ "Cannot compress integer in pack");
2886 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2887 anv = next;
2888 } while (anv > 0);
2889 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2890 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2891 in, (buf + sizeof(buf)) - in);
2892 } else {
2893 const char *from;
2894 char *result, *in;
2895 SV *norm;
2896 STRLEN len;
2897 bool done;
2898
2899 w_string:
2900 /* Copy string and check for compliance */
2901 from = SvPV_const(fromstr, len);
2902 if ((norm = is_an_int(from, len)) == NULL)
2903 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2904
2905 Newx(result, len, char);
2906 in = result + len;
2907 done = FALSE;
2908 while (!done) *--in = div128(norm, &done) | 0x80;
2909 result[len - 1] &= 0x7F; /* clear continue bit */
2910 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2911 in, (result + len) - in);
2912 Safefree(result);
2913 SvREFCNT_dec(norm); /* free norm */
2914 }
2915 }
2916 break;
2917 case 'i':
2918 case 'i' | TYPE_IS_SHRIEKING:
2919 while (len-- > 0) {
2920 int aint;
2921 fromstr = NEXTFROM;
2922 aint = SvIV(fromstr);
2923 PUSH_VAR(utf8, cur, aint, needs_swap);
2924 }
2925 break;
2926 case 'N' | TYPE_IS_SHRIEKING:
2927 case 'N':
2928 while (len-- > 0) {
2929 U32 au32;
2930 fromstr = NEXTFROM;
2931 au32 = SvUV(fromstr);
2932 au32 = PerlSock_htonl(au32);
2933 PUSH32(utf8, cur, &au32, FALSE);
2934 }
2935 break;
2936 case 'V' | TYPE_IS_SHRIEKING:
2937 case 'V':
2938 while (len-- > 0) {
2939 U32 au32;
2940 fromstr = NEXTFROM;
2941 au32 = SvUV(fromstr);
2942 au32 = htovl(au32);
2943 PUSH32(utf8, cur, &au32, FALSE);
2944 }
2945 break;
2946 case 'L' | TYPE_IS_SHRIEKING:
2947#if LONGSIZE != SIZE32
2948 while (len-- > 0) {
2949 unsigned long aulong;
2950 fromstr = NEXTFROM;
2951 aulong = SvUV(fromstr);
2952 PUSH_VAR(utf8, cur, aulong, needs_swap);
2953 }
2954 break;
2955#else
2956 /* Fall though! */
2957#endif
2958 case 'L':
2959 while (len-- > 0) {
2960 U32 au32;
2961 fromstr = NEXTFROM;
2962 au32 = SvUV(fromstr);
2963 PUSH32(utf8, cur, &au32, needs_swap);
2964 }
2965 break;
2966 case 'l' | TYPE_IS_SHRIEKING:
2967#if LONGSIZE != SIZE32
2968 while (len-- > 0) {
2969 long along;
2970 fromstr = NEXTFROM;
2971 along = SvIV(fromstr);
2972 PUSH_VAR(utf8, cur, along, needs_swap);
2973 }
2974 break;
2975#else
2976 /* Fall though! */
2977#endif
2978 case 'l':
2979 while (len-- > 0) {
2980 I32 ai32;
2981 fromstr = NEXTFROM;
2982 ai32 = SvIV(fromstr);
2983 PUSH32(utf8, cur, &ai32, needs_swap);
2984 }
2985 break;
2986#if defined(HAS_QUAD) && IVSIZE >= 8
2987 case 'Q':
2988 while (len-- > 0) {
2989 Uquad_t auquad;
2990 fromstr = NEXTFROM;
2991 auquad = (Uquad_t) SvUV(fromstr);
2992 PUSH_VAR(utf8, cur, auquad, needs_swap);
2993 }
2994 break;
2995 case 'q':
2996 while (len-- > 0) {
2997 Quad_t aquad;
2998 fromstr = NEXTFROM;
2999 aquad = (Quad_t)SvIV(fromstr);
3000 PUSH_VAR(utf8, cur, aquad, needs_swap);
3001 }
3002 break;
3003#endif
3004 case 'P':
3005 len = 1; /* assume SV is correct length */
3006 GROWING(utf8, cat, start, cur, sizeof(char *));
3007 /* FALLTHROUGH */
3008 case 'p':
3009 while (len-- > 0) {
3010 const char *aptr;
3011
3012 fromstr = NEXTFROM;
3013 SvGETMAGIC(fromstr);
3014 if (!SvOK(fromstr)) aptr = NULL;
3015 else {
3016 /* XXX better yet, could spirit away the string to
3017 * a safe spot and hang on to it until the result
3018 * of pack() (and all copies of the result) are
3019 * gone.
3020 */
3021 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3022 !SvREADONLY(fromstr)))) {
3023 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3024 "Attempt to pack pointer to temporary value");
3025 }
3026 if (SvPOK(fromstr) || SvNIOK(fromstr))
3027 aptr = SvPV_nomg_const_nolen(fromstr);
3028 else
3029 aptr = SvPV_force_flags_nolen(fromstr, 0);
3030 }
3031 PUSH_VAR(utf8, cur, aptr, needs_swap);
3032 }
3033 break;
3034 case 'u': {
3035 const char *aptr, *aend;
3036 bool from_utf8;
3037
3038 fromstr = NEXTFROM;
3039 if (len <= 2) len = 45;
3040 else len = len / 3 * 3;
3041 if (len >= 64) {
3042 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3043 "Field too wide in 'u' format in pack");
3044 len = 63;
3045 }
3046 aptr = SvPV_const(fromstr, fromlen);
3047 from_utf8 = DO_UTF8(fromstr);
3048 if (from_utf8) {
3049 aend = aptr + fromlen;
3050 fromlen = sv_len_utf8_nomg(fromstr);
3051 } else aend = NULL; /* Unused, but keep compilers happy */
3052 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3053 while (fromlen > 0) {
3054 U8 *end;
3055 I32 todo;
3056 U8 hunk[1+63/3*4+1];
3057
3058 if ((I32)fromlen > len)
3059 todo = len;
3060 else
3061 todo = fromlen;
3062 if (from_utf8) {
3063 char buffer[64];
3064 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3065 'u' | TYPE_IS_PACK)) {
3066 *cur = '\0';
3067 SvCUR_set(cat, cur - start);
3068 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3069 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3070 aptr, aend, buffer, (long) todo);
3071 }
3072 end = doencodes(hunk, buffer, todo);
3073 } else {
3074 end = doencodes(hunk, aptr, todo);
3075 aptr += todo;
3076 }
3077 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3078 fromlen -= todo;
3079 }
3080 break;
3081 }
3082 }
3083 *cur = '\0';
3084 SvCUR_set(cat, cur - start);
3085 no_change:
3086 *symptr = lookahead;
3087 }
3088 return beglist;
3089}
3090#undef NEXTFROM
3091
3092
3093PP(pp_pack)
3094{
3095 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3096 SV *cat = TARG;
3097 STRLEN fromlen;
3098 SV *pat_sv = *++MARK;
3099 const char *pat = SvPV_const(pat_sv, fromlen);
3100 const char *patend = pat + fromlen;
3101
3102 MARK++;
3103 sv_setpvs(cat, "");
3104 SvUTF8_off(cat);
3105
3106 packlist(cat, pat, patend, MARK, SP + 1);
3107
3108 SvSETMAGIC(cat);
3109 SP = ORIGMARK;
3110 PUSHs(cat);
3111 RETURN;
3112}
3113
3114/*
3115 * Local variables:
3116 * c-indentation-style: bsd
3117 * c-basic-offset: 4
3118 * indent-tabs-mode: nil
3119 * End:
3120 *
3121 * ex: set ts=8 sts=4 sw=4 et:
3122 */