This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
infnan: perldiag entries fixing, add "in pack" to one message.
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19 */
20
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
85#ifndef CHAR_BIT
86# define CHAR_BIT 8
87#endif
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND 2
90
91/*
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105/*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
115#if U16SIZE > SIZE16 || U32SIZE > SIZE32
116# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117# define OFF16(p) ((char*)(p))
118# define OFF32(p) ((char*)(p))
119# else
120# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123# else
124 ++++ bad cray byte order
125# endif
126# endif
127#else
128# define OFF16(p) ((char *) (p))
129# define OFF32(p) ((char *) (p))
130#endif
131
132#define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134#define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
136
137#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141#else
142# error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
148#endif
149
150/* Only to be used inside a loop (see the break) */
151#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
152STMT_START { \
153 if (UNLIKELY(utf8)) { \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
156 } else { \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
161 s += len; \
162 } \
163} STMT_END
164
165#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
167
168#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
170
171#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
173
174#define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
176
177/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178#define MAX_SUB_TEMPLATE_LEVEL 100
179
180/* flags (note that type modifiers can also be used as flags!) */
181#define FLAG_WAS_UTF8 0x40
182#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183#define FLAG_UNPACK_ONLY_ONE 0x10
184#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185#define FLAG_SLASH 0x04
186#define FLAG_COMMA 0x02
187#define FLAG_PACK 0x01
188
189STATIC SV *
190S_mul128(pTHX_ SV *sv, U8 m)
191{
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
195
196 PERL_ARGS_ASSERT_MUL128;
197
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
213 }
214 return (sv);
215}
216
217/* Explosives and implosives. */
218
219#if 'I' == 73 && 'J' == 74
220/* On an ASCII/ISO kind of system */
221#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222#else
223/*
224 Some other sort of character set - use memchr() so we don't match
225 the null byte.
226 */
227#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
228#endif
229
230/* type modifiers */
231#define TYPE_IS_SHRIEKING 0x100
232#define TYPE_IS_BIG_ENDIAN 0x200
233#define TYPE_IS_LITTLE_ENDIAN 0x400
234#define TYPE_IS_PACK 0x800
235#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
238
239# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
241
242# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
243
244#define PACK_SIZE_CANNOT_CSUM 0x80
245#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246#define PACK_SIZE_MASK 0x3F
247
248#include "packsizetables.c"
249
250static void
251S_reverse_copy(const char *src, char *dest, STRLEN len)
252{
253 dest += len;
254 while (len--)
255 *--dest = *src++;
256}
257
258STATIC U8
259uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
260{
261 STRLEN retlen;
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
270 if (val >= 0x100) {
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
274 val &= 0xff;
275 }
276 *s += retlen;
277 return (U8)val;
278}
279
280#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
282 *(U8 *)(s)++)
283
284STATIC bool
285uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
286{
287 UV val;
288 STRLEN retlen;
289 const char *from = *s;
290 int bad = 0;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
294
295 if (UNLIKELY(needs_swap))
296 buf += buf_len;
297
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
303 bad |= 1;
304 } else from += retlen;
305 if (val >= 0x100) {
306 bad |= 2;
307 val &= 0xff;
308 }
309 if (UNLIKELY(needs_swap))
310 *(U8 *)--buf = (U8)val;
311 else
312 *(U8 *)buf++ = (U8)val;
313 }
314 /* We have enough characters for the buffer. Did we have problems ? */
315 if (bad) {
316 if (bad & 1) {
317 /* Rewalk the string fragment while warning */
318 const char *ptr;
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
323 }
324 if (from > end) from = end;
325 }
326 if ((bad & 2))
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
332 }
333 *s = from;
334 return TRUE;
335}
336
337STATIC bool
338next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
339{
340 STRLEN retlen;
341 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
342 if (val >= 0x100 || !ISUUCHAR(val) ||
343 retlen == (STRLEN) -1 || retlen == 0) {
344 *out = 0;
345 return FALSE;
346 }
347 *out = PL_uudmap[val] & 077;
348 *s += retlen;
349 return TRUE;
350}
351
352STATIC char *
353S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
354 PERL_ARGS_ASSERT_BYTES_TO_UNI;
355
356 if (UNLIKELY(needs_swap)) {
357 const U8 *p = start + len;
358 while (p-- > start) {
359 append_utf8_from_native_byte(*p, (U8 **) & dest);
360 }
361 } else {
362 const U8 * const end = start + len;
363 while (start < end) {
364 append_utf8_from_native_byte(*start, (U8 **) & dest);
365 start++;
366 }
367 }
368 return dest;
369}
370
371#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
372STMT_START { \
373 if (UNLIKELY(utf8)) \
374 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
375 else { \
376 if (UNLIKELY(needs_swap)) \
377 S_reverse_copy((char *)(buf), cur, len); \
378 else \
379 Copy(buf, cur, len, char); \
380 (cur) += (len); \
381 } \
382} STMT_END
383
384#define GROWING(utf8, cat, start, cur, in_len) \
385STMT_START { \
386 STRLEN glen = (in_len); \
387 if (utf8) glen *= UTF8_EXPAND; \
388 if ((cur) + glen >= (start) + SvLEN(cat)) { \
389 (start) = sv_exp_grow(cat, glen); \
390 (cur) = (start) + SvCUR(cat); \
391 } \
392} STMT_END
393
394#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
395STMT_START { \
396 const STRLEN glen = (in_len); \
397 STRLEN gl = glen; \
398 if (utf8) gl *= UTF8_EXPAND; \
399 if ((cur) + gl >= (start) + SvLEN(cat)) { \
400 *cur = '\0'; \
401 SvCUR_set((cat), (cur) - (start)); \
402 (start) = sv_exp_grow(cat, gl); \
403 (cur) = (start) + SvCUR(cat); \
404 } \
405 PUSH_BYTES(utf8, cur, buf, glen, 0); \
406} STMT_END
407
408#define PUSH_BYTE(utf8, s, byte) \
409STMT_START { \
410 if (utf8) { \
411 const U8 au8 = (byte); \
412 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
413 } else *(U8 *)(s)++ = (byte); \
414} STMT_END
415
416/* Only to be used inside a loop (see the break) */
417#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
418STMT_START { \
419 STRLEN retlen; \
420 if (str >= end) break; \
421 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
422 if (retlen == (STRLEN) -1 || retlen == 0) { \
423 *cur = '\0'; \
424 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
425 } \
426 str += retlen; \
427} STMT_END
428
429static const char *_action( const tempsym_t* symptr )
430{
431 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
432}
433
434/* Returns the sizeof() struct described by pat */
435STATIC I32
436S_measure_struct(pTHX_ tempsym_t* symptr)
437{
438 I32 total = 0;
439
440 PERL_ARGS_ASSERT_MEASURE_STRUCT;
441
442 while (next_symbol(symptr)) {
443 I32 len;
444 int size;
445
446 switch (symptr->howlen) {
447 case e_star:
448 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
449 _action( symptr ) );
450
451 default:
452 /* e_no_len and e_number */
453 len = symptr->length;
454 break;
455 }
456
457 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
458 if (!size) {
459 int star;
460 /* endianness doesn't influence the size of a type */
461 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
462 default:
463 Perl_croak(aTHX_ "Invalid type '%c' in %s",
464 (int)TYPE_NO_MODIFIERS(symptr->code),
465 _action( symptr ) );
466 case '.' | TYPE_IS_SHRIEKING:
467 case '@' | TYPE_IS_SHRIEKING:
468 case '@':
469 case '.':
470 case '/':
471 case 'U': /* XXXX Is it correct? */
472 case 'w':
473 case 'u':
474 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
475 (int) TYPE_NO_MODIFIERS(symptr->code),
476 _action( symptr ) );
477 case '%':
478 size = 0;
479 break;
480 case '(':
481 {
482 tempsym_t savsym = *symptr;
483 symptr->patptr = savsym.grpbeg;
484 symptr->patend = savsym.grpend;
485 /* XXXX Theoretically, we need to measure many times at
486 different positions, since the subexpression may contain
487 alignment commands, but be not of aligned length.
488 Need to detect this and croak(). */
489 size = measure_struct(symptr);
490 *symptr = savsym;
491 break;
492 }
493 case 'X' | TYPE_IS_SHRIEKING:
494 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
495 */
496 if (!len) /* Avoid division by 0 */
497 len = 1;
498 len = total % len; /* Assumed: the start is aligned. */
499 /* FALLTHROUGH */
500 case 'X':
501 size = -1;
502 if (total < len)
503 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
504 break;
505 case 'x' | TYPE_IS_SHRIEKING:
506 if (!len) /* Avoid division by 0 */
507 len = 1;
508 star = total % len; /* Assumed: the start is aligned. */
509 if (star) /* Other portable ways? */
510 len = len - star;
511 else
512 len = 0;
513 /* FALLTHROUGH */
514 case 'x':
515 case 'A':
516 case 'Z':
517 case 'a':
518 size = 1;
519 break;
520 case 'B':
521 case 'b':
522 len = (len + 7)/8;
523 size = 1;
524 break;
525 case 'H':
526 case 'h':
527 len = (len + 1)/2;
528 size = 1;
529 break;
530
531 case 'P':
532 len = 1;
533 size = sizeof(char*);
534 break;
535 }
536 }
537 total += len * size;
538 }
539 return total;
540}
541
542
543/* locate matching closing parenthesis or bracket
544 * returns char pointer to char after match, or NULL
545 */
546STATIC const char *
547S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
548{
549 PERL_ARGS_ASSERT_GROUP_END;
550
551 while (patptr < patend) {
552 const char c = *patptr++;
553
554 if (isSPACE(c))
555 continue;
556 else if (c == ender)
557 return patptr-1;
558 else if (c == '#') {
559 while (patptr < patend && *patptr != '\n')
560 patptr++;
561 continue;
562 } else if (c == '(')
563 patptr = group_end(patptr, patend, ')') + 1;
564 else if (c == '[')
565 patptr = group_end(patptr, patend, ']') + 1;
566 }
567 Perl_croak(aTHX_ "No group ending character '%c' found in template",
568 ender);
569 NOT_REACHED; /* NOTREACHED */
570}
571
572
573/* Convert unsigned decimal number to binary.
574 * Expects a pointer to the first digit and address of length variable
575 * Advances char pointer to 1st non-digit char and returns number
576 */
577STATIC const char *
578S_get_num(pTHX_ const char *patptr, I32 *lenptr )
579{
580 I32 len = *patptr++ - '0';
581
582 PERL_ARGS_ASSERT_GET_NUM;
583
584 while (isDIGIT(*patptr)) {
585 if (len >= 0x7FFFFFFF/10)
586 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
587 len = (len * 10) + (*patptr++ - '0');
588 }
589 *lenptr = len;
590 return patptr;
591}
592
593/* The marvellous template parsing routine: Using state stored in *symptr,
594 * locates next template code and count
595 */
596STATIC bool
597S_next_symbol(pTHX_ tempsym_t* symptr )
598{
599 const char* patptr = symptr->patptr;
600 const char* const patend = symptr->patend;
601
602 PERL_ARGS_ASSERT_NEXT_SYMBOL;
603
604 symptr->flags &= ~FLAG_SLASH;
605
606 while (patptr < patend) {
607 if (isSPACE(*patptr))
608 patptr++;
609 else if (*patptr == '#') {
610 patptr++;
611 while (patptr < patend && *patptr != '\n')
612 patptr++;
613 if (patptr < patend)
614 patptr++;
615 } else {
616 /* We should have found a template code */
617 I32 code = *patptr++ & 0xFF;
618 U32 inherited_modifiers = 0;
619
620 if (code == ','){ /* grandfather in commas but with a warning */
621 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
622 symptr->flags |= FLAG_COMMA;
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Invalid type ',' in %s", _action( symptr ) );
625 }
626 continue;
627 }
628
629 /* for '(', skip to ')' */
630 if (code == '(') {
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
633 _action( symptr ) );
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
638 _action( symptr ) );
639 }
640
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
645 }
646
647 /* look for modifiers */
648 while (patptr < patend) {
649 const char *allowed;
650 I32 modifier;
651 switch (*patptr) {
652 case '!':
653 modifier = TYPE_IS_SHRIEKING;
654 allowed = "sSiIlLxXnNvV@.";
655 break;
656 case '>':
657 modifier = TYPE_IS_BIG_ENDIAN;
658 allowed = ENDIANNESS_ALLOWED_TYPES;
659 break;
660 case '<':
661 modifier = TYPE_IS_LITTLE_ENDIAN;
662 allowed = ENDIANNESS_ALLOWED_TYPES;
663 break;
664 default:
665 allowed = "";
666 modifier = 0;
667 break;
668 }
669
670 if (modifier == 0)
671 break;
672
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
675 allowed, _action( symptr ) );
676
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
683 *patptr, _action( symptr ) );
684
685 if ((code & modifier)) {
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
689 _action( symptr ) );
690 }
691
692 code |= modifier;
693 patptr++;
694 }
695
696 /* inherit modifiers */
697 code |= inherited_modifiers;
698
699 /* look for count and/or / */
700 if (patptr < patend) {
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
703 symptr->howlen = e_number;
704
705 } else if (*patptr == '*') {
706 patptr++;
707 symptr->howlen = e_star;
708
709 } else if (*patptr == '[') {
710 const char* lenptr = ++patptr;
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
716 if( *lenptr != ']' )
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
718 _action( symptr ) );
719 } else {
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
724 *symptr = savsym;
725 }
726 } else {
727 symptr->howlen = e_no_len;
728 symptr->length = 1;
729 }
730
731 /* try to find / */
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
734 patptr++;
735 else if (*patptr == '#') {
736 patptr++;
737 while (patptr < patend && *patptr != '\n')
738 patptr++;
739 if (patptr < patend)
740 patptr++;
741 } else {
742 if (*patptr == '/') {
743 symptr->flags |= FLAG_SLASH;
744 patptr++;
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
748 _action( symptr ) );
749 }
750 break;
751 }
752 }
753 } else {
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
756 symptr->length = 1;
757 }
758
759 symptr->code = code;
760 symptr->patptr = patptr;
761 return TRUE;
762 }
763 }
764 symptr->patptr = patptr;
765 return FALSE;
766}
767
768/*
769 There is no way to cleanly handle the case where we should process the
770 string per byte in its upgraded form while it's really in downgraded form
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
775 themselves if they need to do a lot of unpacks like this on it
776*/
777STATIC bool
778need_utf8(const char *pat, const char *patend)
779{
780 bool first = TRUE;
781
782 PERL_ARGS_ASSERT_NEED_UTF8;
783
784 while (pat < patend) {
785 if (pat[0] == '#') {
786 pat++;
787 pat = (const char *) memchr(pat, '\n', patend-pat);
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
792 pat++;
793 }
794 return FALSE;
795}
796
797STATIC char
798first_symbol(const char *pat, const char *patend) {
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
800
801 while (pat < patend) {
802 if (pat[0] != '#') return pat[0];
803 pat++;
804 pat = (const char *) memchr(pat, '\n', patend-pat);
805 if (!pat) return 0;
806 pat++;
807 }
808 return 0;
809}
810
811/*
812
813=head1 Pack and Unpack
814
815=for apidoc unpackstring
816
817The engine implementing the unpack() Perl function.
818
819Using the template pat..patend, this function unpacks the string
820s..strend into a number of mortal SVs, which it pushes onto the perl
821argument (@_) stack (so you will need to issue a C<PUTBACK> before and
822C<SPAGAIN> after the call to this function). It returns the number of
823pushed elements.
824
825The strend and patend pointers should point to the byte following the last
826character of each string.
827
828Although this function returns its values on the perl argument stack, it
829doesn't take any parameters from that stack (and thus in particular
830there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
831example).
832
833=cut */
834
835I32
836Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
837{
838 tempsym_t sym;
839
840 PERL_ARGS_ASSERT_UNPACKSTRING;
841
842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
847 s = (char *) bytes_to_utf8((U8 *) s, &len);
848 SAVEFREEPV(s);
849 strend = s + len;
850 flags |= FLAG_DO_UTF8;
851 }
852
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
855
856 TEMPSYM_INIT(&sym, pat, patend, flags);
857
858 return unpack_rec(&sym, s, s, strend, NULL );
859}
860
861STATIC I32
862S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
863{
864 dSP;
865 SV *sv = NULL;
866 const I32 start_sp_offset = SP - PL_stack_base;
867 howlen_t howlen;
868 I32 checksum = 0;
869 UV cuv = 0;
870 NV cdouble = 0.0;
871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
872 bool beyond = FALSE;
873 bool explicit_length;
874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
876
877 PERL_ARGS_ASSERT_UNPACK_REC;
878
879 symptr->strbeg = s - strbeg;
880
881 while (next_symbol(symptr)) {
882 packprops_t props;
883 I32 len;
884 I32 datumtype = symptr->code;
885 bool needs_swap;
886 /* do first one only unless in list context
887 / is implemented by unpacking the count, then popping it from the
888 stack, so must check that we're not in the middle of a / */
889 if ( unpack_only_one
890 && (SP - PL_stack_base == start_sp_offset + 1)
891 && (datumtype != '/') ) /* XXX can this be omitted */
892 break;
893
894 switch (howlen = symptr->howlen) {
895 case e_star:
896 len = strend - strbeg; /* long enough */
897 break;
898 default:
899 /* e_no_len and e_number */
900 len = symptr->length;
901 break;
902 }
903
904 explicit_length = TRUE;
905 redo_switch:
906 beyond = s >= strend;
907
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
909 if (props) {
910 /* props nonzero means we can process this letter. */
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
913 if (len > howmany)
914 len = howmany;
915
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
918 EXTEND(SP, len);
919 EXTEND_MORTAL(len);
920 }
921 }
922
923 needs_swap = NEEDS_SWAP(datumtype);
924
925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
926 default:
927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
928
929 case '%':
930 if (howlen == e_no_len)
931 len = 16; /* len is not specified */
932 checksum = len;
933 cuv = 0;
934 cdouble = 0;
935 continue;
936
937 case '(':
938 {
939 tempsym_t savsym = *symptr;
940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
941 symptr->flags |= group_modifiers;
942 symptr->patend = savsym.grpend;
943 symptr->previous = &savsym;
944 symptr->level++;
945 PUTBACK;
946 if (len && unpack_only_one) len = 1;
947 while (len--) {
948 symptr->patptr = savsym.grpbeg;
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
953 break; /* No way to continue */
954 }
955 SPAGAIN;
956 savsym.flags = symptr->flags & ~group_modifiers;
957 *symptr = savsym;
958 break;
959 }
960 case '.' | TYPE_IS_SHRIEKING:
961 case '.': {
962 const char *from;
963 SV *sv;
964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
967 else {
968 tempsym_t *group = symptr;
969
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
972 }
973 sv = from <= s ?
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
976 mXPUSHs(sv);
977 break;
978 }
979 case '@' | TYPE_IS_SHRIEKING:
980 case '@':
981 s = strbeg + symptr->strbeg;
982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
983 {
984 while (len > 0) {
985 if (s >= strend)
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
987 s += UTF8SKIP(s);
988 len--;
989 }
990 if (s > strend)
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 } else {
993 if (strend-s < len)
994 Perl_croak(aTHX_ "'@' outside of string in unpack");
995 s += len;
996 }
997 break;
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1000 len = 1;
1001 if (utf8) {
1002 const char *hop, *last;
1003 I32 l = len;
1004 hop = last = strbeg;
1005 while (hop < s) {
1006 hop += UTF8SKIP(hop);
1007 if (--l == 0) {
1008 last = hop;
1009 l = len;
1010 }
1011 }
1012 if (last > s)
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1014 s = last;
1015 break;
1016 }
1017 len = (s - strbeg) % len;
1018 /* FALLTHROUGH */
1019 case 'X':
1020 if (utf8) {
1021 while (len > 0) {
1022 if (s <= strbeg)
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
1025 if (s <= strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1027 }
1028 len--;
1029 }
1030 } else {
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033 s -= len;
1034 }
1035 break;
1036 case 'x' | TYPE_IS_SHRIEKING: {
1037 I32 ai32;
1038 if (!len) /* Avoid division by 0 */
1039 len = 1;
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
1042 if (ai32 == 0) break;
1043 len -= ai32;
1044 }
1045 /* FALLTHROUGH */
1046 case 'x':
1047 if (utf8) {
1048 while (len>0) {
1049 if (s >= strend)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 s += UTF8SKIP(s);
1052 len--;
1053 }
1054 } else {
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1057 s += len;
1058 }
1059 break;
1060 case '/':
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1062
1063 case 'A':
1064 case 'Z':
1065 case 'a':
1066 if (checksum) {
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1069 goto W_checksum;
1070 }
1071 if (utf8) {
1072 I32 l;
1073 const char *hop;
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1076 if (hop > strend)
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 break;
1079 }
1080 }
1081 if (hop > strend)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 len = hop - s;
1084 } else if (len > strend - s)
1085 len = strend - s;
1086
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
1089 const char *ptr, *end;
1090 end = s + len;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
1097 const char *ptr;
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1101 !isSPACE_utf8(ptr)) break;
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 else ptr++;
1104 if (ptr > s+len)
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1106 } else {
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 ptr++;
1110 }
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1113
1114 if (utf8) {
1115 SvUTF8_on(sv);
1116 /* Undo any upgrade done due to need_utf8() */
1117 if (!(symptr->flags & FLAG_WAS_UTF8))
1118 sv_utf8_downgrade(sv, 0);
1119 }
1120 mXPUSHs(sv);
1121 s += len;
1122 break;
1123 case 'B':
1124 case 'b': {
1125 char *str;
1126 if (howlen == e_star || len > (strend - s) * 8)
1127 len = (strend - s) * 8;
1128 if (checksum) {
1129 if (utf8)
1130 while (len >= 8 && s < strend) {
1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1132 len -= 8;
1133 }
1134 else
1135 while (len >= 8) {
1136 cuv += PL_bitcount[*(U8 *)s++];
1137 len -= 8;
1138 }
1139 if (len && s < strend) {
1140 U8 bits;
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
1143 while (len-- > 0) {
1144 if (bits & 1) cuv++;
1145 bits >>= 1;
1146 }
1147 else
1148 while (len-- > 0) {
1149 if (bits & 0x80) cuv++;
1150 bits <<= 1;
1151 }
1152 }
1153 break;
1154 }
1155
1156 sv = sv_2mortal(newSV(len ? len : 1));
1157 SvPOK_on(sv);
1158 str = SvPVX(sv);
1159 if (datumtype == 'b') {
1160 U8 bits = 0;
1161 const I32 ai32 = len;
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1164 else if (utf8) {
1165 if (s >= strend) break;
1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
1169 }
1170 } else {
1171 U8 bits = 0;
1172 const I32 ai32 = len;
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1175 else if (utf8) {
1176 if (s >= strend) break;
1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
1180 }
1181 }
1182 *str = '\0';
1183 SvCUR_set(sv, str - SvPVX_const(sv));
1184 XPUSHs(sv);
1185 break;
1186 }
1187 case 'H':
1188 case 'h': {
1189 char *str = NULL;
1190 /* Preliminary length estimate, acceptable for utf8 too */
1191 if (howlen == e_star || len > (strend - s) * 2)
1192 len = (strend - s) * 2;
1193 if (!checksum) {
1194 sv = sv_2mortal(newSV(len ? len : 1));
1195 SvPOK_on(sv);
1196 str = SvPVX(sv);
1197 }
1198 if (datumtype == 'h') {
1199 U8 bits = 0;
1200 I32 ai32 = len;
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1203 else if (utf8) {
1204 if (s >= strend) break;
1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1206 } else bits = * (U8 *) s++;
1207 if (!checksum)
1208 *str++ = PL_hexdigit[bits & 15];
1209 }
1210 } else {
1211 U8 bits = 0;
1212 const I32 ai32 = len;
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1215 else if (utf8) {
1216 if (s >= strend) break;
1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1218 } else bits = *(U8 *) s++;
1219 if (!checksum)
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
1221 }
1222 }
1223 if (!checksum) {
1224 *str = '\0';
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1226 XPUSHs(sv);
1227 }
1228 break;
1229 }
1230 case 'C':
1231 if (len == 0) {
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1235 break;
1236 }
1237 /* FALLTHROUGH */
1238 case 'c':
1239 while (len-- > 0 && s < strend) {
1240 int aint;
1241 if (utf8)
1242 {
1243 STRLEN retlen;
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 }
1250 else
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1253 aint -= 256;
1254 if (!checksum)
1255 mPUSHi(aint);
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1258 else
1259 cuv += aint;
1260 }
1261 break;
1262 case 'W':
1263 W_checksum:
1264 if (utf8) {
1265 while (len-- > 0 && s < strend) {
1266 STRLEN retlen;
1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 s += retlen;
1272 if (!checksum)
1273 mPUSHu(val);
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
1276 else
1277 cuv += val;
1278 }
1279 } else if (!checksum)
1280 while (len-- > 0) {
1281 const U8 ch = *(U8 *) s++;
1282 mPUSHu(ch);
1283 }
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1286 else
1287 while (len-- > 0) cuv += *(U8 *) s++;
1288 break;
1289 case 'U':
1290 if (len == 0) {
1291 if (explicit_length && howlen != e_star) {
1292 /* Switch to "bytes in UTF-8" mode */
1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1294 else
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1297 }
1298 break;
1299 }
1300 if (len > strend - s) len = strend - s;
1301 if (!checksum) {
1302 if (len && unpack_only_one) len = 1;
1303 EXTEND(SP, len);
1304 EXTEND_MORTAL(len);
1305 }
1306 while (len-- > 0 && s < strend) {
1307 STRLEN retlen;
1308 UV auv;
1309 if (utf8) {
1310 U8 result[UTF8_MAXLEN];
1311 const char *ptr = s;
1312 STRLEN len;
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 'U'))
1317 break;
1318 len = UTF8SKIP(result);
1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
1320 (char *) &result[1], len-1, 'U')) break;
1321 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1322 s = ptr;
1323 } else {
1324 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1325 if (retlen == (STRLEN) -1 || retlen == 0)
1326 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1327 s += retlen;
1328 }
1329 if (!checksum)
1330 mPUSHu(auv);
1331 else if (checksum > bits_in_uv)
1332 cdouble += (NV) auv;
1333 else
1334 cuv += auv;
1335 }
1336 break;
1337 case 's' | TYPE_IS_SHRIEKING:
1338#if SHORTSIZE != SIZE16
1339 while (len-- > 0) {
1340 short ashort;
1341 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1342 if (!checksum)
1343 mPUSHi(ashort);
1344 else if (checksum > bits_in_uv)
1345 cdouble += (NV)ashort;
1346 else
1347 cuv += ashort;
1348 }
1349 break;
1350#else
1351 /* FALLTHROUGH */
1352#endif
1353 case 's':
1354 while (len-- > 0) {
1355 I16 ai16;
1356
1357#if U16SIZE > SIZE16
1358 ai16 = 0;
1359#endif
1360 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1361#if U16SIZE > SIZE16
1362 if (ai16 > 32767)
1363 ai16 -= 65536;
1364#endif
1365 if (!checksum)
1366 mPUSHi(ai16);
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1369 else
1370 cuv += ai16;
1371 }
1372 break;
1373 case 'S' | TYPE_IS_SHRIEKING:
1374#if SHORTSIZE != SIZE16
1375 while (len-- > 0) {
1376 unsigned short aushort;
1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1378 needs_swap);
1379 if (!checksum)
1380 mPUSHu(aushort);
1381 else if (checksum > bits_in_uv)
1382 cdouble += (NV)aushort;
1383 else
1384 cuv += aushort;
1385 }
1386 break;
1387#else
1388 /* FALLTHROUGH */
1389#endif
1390 case 'v':
1391 case 'n':
1392 case 'S':
1393 while (len-- > 0) {
1394 U16 au16;
1395#if U16SIZE > SIZE16
1396 au16 = 0;
1397#endif
1398 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1399 if (datumtype == 'n')
1400 au16 = PerlSock_ntohs(au16);
1401 if (datumtype == 'v')
1402 au16 = vtohs(au16);
1403 if (!checksum)
1404 mPUSHu(au16);
1405 else if (checksum > bits_in_uv)
1406 cdouble += (NV) au16;
1407 else
1408 cuv += au16;
1409 }
1410 break;
1411 case 'v' | TYPE_IS_SHRIEKING:
1412 case 'n' | TYPE_IS_SHRIEKING:
1413 while (len-- > 0) {
1414 I16 ai16;
1415# if U16SIZE > SIZE16
1416 ai16 = 0;
1417# endif
1418 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1419 /* There should never be any byte-swapping here. */
1420 assert(!TYPE_ENDIANNESS(datumtype));
1421 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1422 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1423 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1424 ai16 = (I16) vtohs((U16) ai16);
1425 if (!checksum)
1426 mPUSHi(ai16);
1427 else if (checksum > bits_in_uv)
1428 cdouble += (NV) ai16;
1429 else
1430 cuv += ai16;
1431 }
1432 break;
1433 case 'i':
1434 case 'i' | TYPE_IS_SHRIEKING:
1435 while (len-- > 0) {
1436 int aint;
1437 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1438 if (!checksum)
1439 mPUSHi(aint);
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)aint;
1442 else
1443 cuv += aint;
1444 }
1445 break;
1446 case 'I':
1447 case 'I' | TYPE_IS_SHRIEKING:
1448 while (len-- > 0) {
1449 unsigned int auint;
1450 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1451 if (!checksum)
1452 mPUSHu(auint);
1453 else if (checksum > bits_in_uv)
1454 cdouble += (NV)auint;
1455 else
1456 cuv += auint;
1457 }
1458 break;
1459 case 'j':
1460 while (len-- > 0) {
1461 IV aiv;
1462 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1463 if (!checksum)
1464 mPUSHi(aiv);
1465 else if (checksum > bits_in_uv)
1466 cdouble += (NV)aiv;
1467 else
1468 cuv += aiv;
1469 }
1470 break;
1471 case 'J':
1472 while (len-- > 0) {
1473 UV auv;
1474 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1475 if (!checksum)
1476 mPUSHu(auv);
1477 else if (checksum > bits_in_uv)
1478 cdouble += (NV)auv;
1479 else
1480 cuv += auv;
1481 }
1482 break;
1483 case 'l' | TYPE_IS_SHRIEKING:
1484#if LONGSIZE != SIZE32
1485 while (len-- > 0) {
1486 long along;
1487 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1488 if (!checksum)
1489 mPUSHi(along);
1490 else if (checksum > bits_in_uv)
1491 cdouble += (NV)along;
1492 else
1493 cuv += along;
1494 }
1495 break;
1496#else
1497 /* FALLTHROUGH */
1498#endif
1499 case 'l':
1500 while (len-- > 0) {
1501 I32 ai32;
1502#if U32SIZE > SIZE32
1503 ai32 = 0;
1504#endif
1505 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1506#if U32SIZE > SIZE32
1507 if (ai32 > 2147483647) ai32 -= 4294967296;
1508#endif
1509 if (!checksum)
1510 mPUSHi(ai32);
1511 else if (checksum > bits_in_uv)
1512 cdouble += (NV)ai32;
1513 else
1514 cuv += ai32;
1515 }
1516 break;
1517 case 'L' | TYPE_IS_SHRIEKING:
1518#if LONGSIZE != SIZE32
1519 while (len-- > 0) {
1520 unsigned long aulong;
1521 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1522 if (!checksum)
1523 mPUSHu(aulong);
1524 else if (checksum > bits_in_uv)
1525 cdouble += (NV)aulong;
1526 else
1527 cuv += aulong;
1528 }
1529 break;
1530#else
1531 /* FALLTHROUGH */
1532#endif
1533 case 'V':
1534 case 'N':
1535 case 'L':
1536 while (len-- > 0) {
1537 U32 au32;
1538#if U32SIZE > SIZE32
1539 au32 = 0;
1540#endif
1541 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1542 if (datumtype == 'N')
1543 au32 = PerlSock_ntohl(au32);
1544 if (datumtype == 'V')
1545 au32 = vtohl(au32);
1546 if (!checksum)
1547 mPUSHu(au32);
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV)au32;
1550 else
1551 cuv += au32;
1552 }
1553 break;
1554 case 'V' | TYPE_IS_SHRIEKING:
1555 case 'N' | TYPE_IS_SHRIEKING:
1556 while (len-- > 0) {
1557 I32 ai32;
1558#if U32SIZE > SIZE32
1559 ai32 = 0;
1560#endif
1561 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1562 /* There should never be any byte swapping here. */
1563 assert(!TYPE_ENDIANNESS(datumtype));
1564 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1566 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1567 ai32 = (I32)vtohl((U32)ai32);
1568 if (!checksum)
1569 mPUSHi(ai32);
1570 else if (checksum > bits_in_uv)
1571 cdouble += (NV)ai32;
1572 else
1573 cuv += ai32;
1574 }
1575 break;
1576 case 'p':
1577 while (len-- > 0) {
1578 const char *aptr;
1579 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1580 /* newSVpv generates undef if aptr is NULL */
1581 mPUSHs(newSVpv(aptr, 0));
1582 }
1583 break;
1584 case 'w':
1585 {
1586 UV auv = 0;
1587 U32 bytes = 0;
1588
1589 while (len > 0 && s < strend) {
1590 U8 ch;
1591 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1592 auv = (auv << 7) | (ch & 0x7f);
1593 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1594 if (ch < 0x80) {
1595 bytes = 0;
1596 mPUSHu(auv);
1597 len--;
1598 auv = 0;
1599 continue;
1600 }
1601 if (++bytes >= sizeof(UV)) { /* promote to string */
1602 const char *t;
1603
1604 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1605 while (s < strend) {
1606 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1607 sv = mul128(sv, (U8)(ch & 0x7f));
1608 if (!(ch & 0x80)) {
1609 bytes = 0;
1610 break;
1611 }
1612 }
1613 t = SvPV_nolen_const(sv);
1614 while (*t == '0')
1615 t++;
1616 sv_chop(sv, t);
1617 mPUSHs(sv);
1618 len--;
1619 auv = 0;
1620 }
1621 }
1622 if ((s >= strend) && bytes)
1623 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1624 }
1625 break;
1626 case 'P':
1627 if (symptr->howlen == e_star)
1628 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1629 EXTEND(SP, 1);
1630 if (s + sizeof(char*) <= strend) {
1631 char *aptr;
1632 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1633 /* newSVpvn generates undef if aptr is NULL */
1634 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1635 }
1636 break;
1637#if defined(HAS_QUAD) && IVSIZE >= 8
1638 case 'q':
1639 while (len-- > 0) {
1640 Quad_t aquad;
1641 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1642 if (!checksum)
1643 mPUSHs(newSViv((IV)aquad));
1644 else if (checksum > bits_in_uv)
1645 cdouble += (NV)aquad;
1646 else
1647 cuv += aquad;
1648 }
1649 break;
1650 case 'Q':
1651 while (len-- > 0) {
1652 Uquad_t auquad;
1653 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1654 if (!checksum)
1655 mPUSHs(newSVuv((UV)auquad));
1656 else if (checksum > bits_in_uv)
1657 cdouble += (NV)auquad;
1658 else
1659 cuv += auquad;
1660 }
1661 break;
1662#endif
1663 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1664 case 'f':
1665 while (len-- > 0) {
1666 float afloat;
1667 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1668 if (!checksum)
1669 mPUSHn(afloat);
1670 else
1671 cdouble += afloat;
1672 }
1673 break;
1674 case 'd':
1675 while (len-- > 0) {
1676 double adouble;
1677 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1678 if (!checksum)
1679 mPUSHn(adouble);
1680 else
1681 cdouble += adouble;
1682 }
1683 break;
1684 case 'F':
1685 while (len-- > 0) {
1686 NV_bytes anv;
1687 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1688 datumtype, needs_swap);
1689 if (!checksum)
1690 mPUSHn(anv.nv);
1691 else
1692 cdouble += anv.nv;
1693 }
1694 break;
1695#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1696 case 'D':
1697 while (len-- > 0) {
1698 ld_bytes aldouble;
1699 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1700 sizeof(aldouble.bytes), datumtype, needs_swap);
1701 /* The most common long double format, the x86 80-bit
1702 * extended precision, has either 2 or 6 unused bytes,
1703 * which may contain garbage, which may contain
1704 * unintentional data. While we do zero the bytes of
1705 * the long double data in pack(), here in unpack() we
1706 * don't, because it's really hard to envision that
1707 * reading the long double off aldouble would be
1708 * affected by the unused bytes.
1709 *
1710 * Note that trying to unpack 'long doubles' of 'long
1711 * doubles' packed in another system is in the general
1712 * case doomed without having more detail. */
1713 if (!checksum)
1714 mPUSHn(aldouble.ld);
1715 else
1716 cdouble += aldouble.ld;
1717 }
1718 break;
1719#endif
1720 case 'u':
1721 if (!checksum) {
1722 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1723 sv = sv_2mortal(newSV(l));
1724 if (l) SvPOK_on(sv);
1725 }
1726 if (utf8) {
1727 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1728 I32 a, b, c, d;
1729 char hunk[3];
1730
1731 while (len > 0) {
1732 next_uni_uu(aTHX_ &s, strend, &a);
1733 next_uni_uu(aTHX_ &s, strend, &b);
1734 next_uni_uu(aTHX_ &s, strend, &c);
1735 next_uni_uu(aTHX_ &s, strend, &d);
1736 hunk[0] = (char)((a << 2) | (b >> 4));
1737 hunk[1] = (char)((b << 4) | (c >> 2));
1738 hunk[2] = (char)((c << 6) | d);
1739 if (!checksum)
1740 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1741 len -= 3;
1742 }
1743 if (s < strend) {
1744 if (*s == '\n') {
1745 s++;
1746 }
1747 else {
1748 /* possible checksum byte */
1749 const char *skip = s+UTF8SKIP(s);
1750 if (skip < strend && *skip == '\n')
1751 s = skip+1;
1752 }
1753 }
1754 }
1755 } else {
1756 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1757 I32 a, b, c, d;
1758 char hunk[3];
1759
1760 len = PL_uudmap[*(U8*)s++] & 077;
1761 while (len > 0) {
1762 if (s < strend && ISUUCHAR(*s))
1763 a = PL_uudmap[*(U8*)s++] & 077;
1764 else
1765 a = 0;
1766 if (s < strend && ISUUCHAR(*s))
1767 b = PL_uudmap[*(U8*)s++] & 077;
1768 else
1769 b = 0;
1770 if (s < strend && ISUUCHAR(*s))
1771 c = PL_uudmap[*(U8*)s++] & 077;
1772 else
1773 c = 0;
1774 if (s < strend && ISUUCHAR(*s))
1775 d = PL_uudmap[*(U8*)s++] & 077;
1776 else
1777 d = 0;
1778 hunk[0] = (char)((a << 2) | (b >> 4));
1779 hunk[1] = (char)((b << 4) | (c >> 2));
1780 hunk[2] = (char)((c << 6) | d);
1781 if (!checksum)
1782 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1783 len -= 3;
1784 }
1785 if (*s == '\n')
1786 s++;
1787 else /* possible checksum byte */
1788 if (s + 1 < strend && s[1] == '\n')
1789 s += 2;
1790 }
1791 }
1792 if (!checksum)
1793 XPUSHs(sv);
1794 break;
1795 }
1796
1797 if (checksum) {
1798 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1799 (checksum > bits_in_uv &&
1800 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1801 NV trouble, anv;
1802
1803 anv = (NV) (1 << (checksum & 15));
1804 while (checksum >= 16) {
1805 checksum -= 16;
1806 anv *= 65536.0;
1807 }
1808 while (cdouble < 0.0)
1809 cdouble += anv;
1810 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1811 sv = newSVnv(cdouble);
1812 }
1813 else {
1814 if (checksum < bits_in_uv) {
1815 UV mask = ((UV)1 << checksum) - 1;
1816 cuv &= mask;
1817 }
1818 sv = newSVuv(cuv);
1819 }
1820 mXPUSHs(sv);
1821 checksum = 0;
1822 }
1823
1824 if (symptr->flags & FLAG_SLASH){
1825 if (SP - PL_stack_base - start_sp_offset <= 0)
1826 break;
1827 if( next_symbol(symptr) ){
1828 if( symptr->howlen == e_number )
1829 Perl_croak(aTHX_ "Count after length/code in unpack" );
1830 if( beyond ){
1831 /* ...end of char buffer then no decent length available */
1832 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1833 } else {
1834 /* take top of stack (hope it's numeric) */
1835 len = POPi;
1836 if( len < 0 )
1837 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1838 }
1839 } else {
1840 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1841 }
1842 datumtype = symptr->code;
1843 explicit_length = FALSE;
1844 goto redo_switch;
1845 }
1846 }
1847
1848 if (new_s)
1849 *new_s = s;
1850 PUTBACK;
1851 return SP - PL_stack_base - start_sp_offset;
1852}
1853
1854PP(pp_unpack)
1855{
1856 dSP;
1857 dPOPPOPssrl;
1858 I32 gimme = GIMME_V;
1859 STRLEN llen;
1860 STRLEN rlen;
1861 const char *pat = SvPV_const(left, llen);
1862 const char *s = SvPV_const(right, rlen);
1863 const char *strend = s + rlen;
1864 const char *patend = pat + llen;
1865 I32 cnt;
1866
1867 PUTBACK;
1868 cnt = unpackstring(pat, patend, s, strend,
1869 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1870 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1871
1872 SPAGAIN;
1873 if ( !cnt && gimme == G_SCALAR )
1874 PUSHs(&PL_sv_undef);
1875 RETURN;
1876}
1877
1878STATIC U8 *
1879doencodes(U8 *h, const char *s, I32 len)
1880{
1881 *h++ = PL_uuemap[len];
1882 while (len > 2) {
1883 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1884 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1885 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1886 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1887 s += 3;
1888 len -= 3;
1889 }
1890 if (len > 0) {
1891 const char r = (len > 1 ? s[1] : '\0');
1892 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1893 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1894 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1895 *h++ = PL_uuemap[0];
1896 }
1897 *h++ = '\n';
1898 return h;
1899}
1900
1901STATIC SV *
1902S_is_an_int(pTHX_ const char *s, STRLEN l)
1903{
1904 SV *result = newSVpvn(s, l);
1905 char *const result_c = SvPV_nolen(result); /* convenience */
1906 char *out = result_c;
1907 bool skip = 1;
1908 bool ignore = 0;
1909
1910 PERL_ARGS_ASSERT_IS_AN_INT;
1911
1912 while (*s) {
1913 switch (*s) {
1914 case ' ':
1915 break;
1916 case '+':
1917 if (!skip) {
1918 SvREFCNT_dec(result);
1919 return (NULL);
1920 }
1921 break;
1922 case '0':
1923 case '1':
1924 case '2':
1925 case '3':
1926 case '4':
1927 case '5':
1928 case '6':
1929 case '7':
1930 case '8':
1931 case '9':
1932 skip = 0;
1933 if (!ignore) {
1934 *(out++) = *s;
1935 }
1936 break;
1937 case '.':
1938 ignore = 1;
1939 break;
1940 default:
1941 SvREFCNT_dec(result);
1942 return (NULL);
1943 }
1944 s++;
1945 }
1946 *(out++) = '\0';
1947 SvCUR_set(result, out - result_c);
1948 return (result);
1949}
1950
1951/* pnum must be '\0' terminated */
1952STATIC int
1953S_div128(pTHX_ SV *pnum, bool *done)
1954{
1955 STRLEN len;
1956 char * const s = SvPV(pnum, len);
1957 char *t = s;
1958 int m = 0;
1959
1960 PERL_ARGS_ASSERT_DIV128;
1961
1962 *done = 1;
1963 while (*t) {
1964 const int i = m * 10 + (*t - '0');
1965 const int r = (i >> 7); /* r < 10 */
1966 m = i & 0x7F;
1967 if (r) {
1968 *done = 0;
1969 }
1970 *(t++) = '0' + r;
1971 }
1972 *(t++) = '\0';
1973 SvCUR_set(pnum, (STRLEN) (t - s));
1974 return (m);
1975}
1976
1977/*
1978=for apidoc packlist
1979
1980The engine implementing pack() Perl function.
1981
1982=cut
1983*/
1984
1985void
1986Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1987{
1988 tempsym_t sym;
1989
1990 PERL_ARGS_ASSERT_PACKLIST;
1991
1992 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1993
1994 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1995 Also make sure any UTF8 flag is loaded */
1996 SvPV_force_nolen(cat);
1997 if (DO_UTF8(cat))
1998 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1999
2000 (void)pack_rec( cat, &sym, beglist, endlist );
2001}
2002
2003/* like sv_utf8_upgrade, but also repoint the group start markers */
2004STATIC void
2005marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2006 STRLEN len;
2007 tempsym_t *group;
2008 const char *from_ptr, *from_start, *from_end, **marks, **m;
2009 char *to_start, *to_ptr;
2010
2011 if (SvUTF8(sv)) return;
2012
2013 from_start = SvPVX_const(sv);
2014 from_end = from_start + SvCUR(sv);
2015 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2016 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2017 if (from_ptr == from_end) {
2018 /* Simple case: no character needs to be changed */
2019 SvUTF8_on(sv);
2020 return;
2021 }
2022
2023 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2024 Newx(to_start, len, char);
2025 Copy(from_start, to_start, from_ptr-from_start, char);
2026 to_ptr = to_start + (from_ptr-from_start);
2027
2028 Newx(marks, sym_ptr->level+2, const char *);
2029 for (group=sym_ptr; group; group = group->previous)
2030 marks[group->level] = from_start + group->strbeg;
2031 marks[sym_ptr->level+1] = from_end+1;
2032 for (m = marks; *m < from_ptr; m++)
2033 *m = to_start + (*m-from_start);
2034
2035 for (;from_ptr < from_end; from_ptr++) {
2036 while (*m == from_ptr) *m++ = to_ptr;
2037 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2038 }
2039 *to_ptr = 0;
2040
2041 while (*m == from_ptr) *m++ = to_ptr;
2042 if (m != marks + sym_ptr->level+1) {
2043 Safefree(marks);
2044 Safefree(to_start);
2045 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2046 "level=%d", m, marks, sym_ptr->level);
2047 }
2048 for (group=sym_ptr; group; group = group->previous)
2049 group->strbeg = marks[group->level] - to_start;
2050 Safefree(marks);
2051
2052 if (SvOOK(sv)) {
2053 if (SvIVX(sv)) {
2054 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2055 from_start -= SvIVX(sv);
2056 SvIV_set(sv, 0);
2057 }
2058 SvFLAGS(sv) &= ~SVf_OOK;
2059 }
2060 if (SvLEN(sv) != 0)
2061 Safefree(from_start);
2062 SvPV_set(sv, to_start);
2063 SvCUR_set(sv, to_ptr - to_start);
2064 SvLEN_set(sv, len);
2065 SvUTF8_on(sv);
2066}
2067
2068/* Exponential string grower. Makes string extension effectively O(n)
2069 needed says how many extra bytes we need (not counting the final '\0')
2070 Only grows the string if there is an actual lack of space
2071*/
2072STATIC char *
2073S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2074 const STRLEN cur = SvCUR(sv);
2075 const STRLEN len = SvLEN(sv);
2076 STRLEN extend;
2077
2078 PERL_ARGS_ASSERT_SV_EXP_GROW;
2079
2080 if (len - cur > needed) return SvPVX(sv);
2081 extend = needed > len ? needed : len;
2082 return SvGROW(sv, len+extend+1);
2083}
2084
2085STATIC
2086SV **
2087S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2088{
2089 tempsym_t lookahead;
2090 I32 items = endlist - beglist;
2091 bool found = next_symbol(symptr);
2092 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2093 bool warn_utf8 = ckWARN(WARN_UTF8);
2094 char* from;
2095
2096 PERL_ARGS_ASSERT_PACK_REC;
2097
2098 if (symptr->level == 0 && found && symptr->code == 'U') {
2099 marked_upgrade(aTHX_ cat, symptr);
2100 symptr->flags |= FLAG_DO_UTF8;
2101 utf8 = 0;
2102 }
2103 symptr->strbeg = SvCUR(cat);
2104
2105 while (found) {
2106 SV *fromstr;
2107 STRLEN fromlen;
2108 I32 len;
2109 SV *lengthcode = NULL;
2110 I32 datumtype = symptr->code;
2111 howlen_t howlen = symptr->howlen;
2112 char *start = SvPVX(cat);
2113 char *cur = start + SvCUR(cat);
2114 bool needs_swap;
2115
2116#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2117#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2118
2119 switch (howlen) {
2120 case e_star:
2121 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2122 0 : items;
2123 break;
2124 default:
2125 /* e_no_len and e_number */
2126 len = symptr->length;
2127 break;
2128 }
2129
2130 if (len) {
2131 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2132
2133 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2134 /* We can process this letter. */
2135 STRLEN size = props & PACK_SIZE_MASK;
2136 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2137 }
2138 }
2139
2140 /* Look ahead for next symbol. Do we have code/code? */
2141 lookahead = *symptr;
2142 found = next_symbol(&lookahead);
2143 if (symptr->flags & FLAG_SLASH) {
2144 IV count;
2145 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2146 if (strchr("aAZ", lookahead.code)) {
2147 if (lookahead.howlen == e_number) count = lookahead.length;
2148 else {
2149 if (items > 0) {
2150 count = sv_len_utf8(*beglist);
2151 }
2152 else count = 0;
2153 if (lookahead.code == 'Z') count++;
2154 }
2155 } else {
2156 if (lookahead.howlen == e_number && lookahead.length < items)
2157 count = lookahead.length;
2158 else count = items;
2159 }
2160 lookahead.howlen = e_number;
2161 lookahead.length = count;
2162 lengthcode = sv_2mortal(newSViv(count));
2163 }
2164
2165 needs_swap = NEEDS_SWAP(datumtype);
2166
2167 fromstr = PEEKFROM;
2168 if (SvNOK(fromstr)) {
2169 const NV nv = SvNV(fromstr);
2170 const char c = TYPE_NO_MODIFIERS(datumtype);
2171 if (Perl_isinfnan(nv) && !strchr("fdFD", c)) {
2172 if (c == 'w')
2173 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2174 else
2175 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'",
2176 nv, (int) c);
2177 }
2178 }
2179
2180 /* Code inside the switch must take care to properly update
2181 cat (CUR length and '\0' termination) if it updated *cur and
2182 doesn't simply leave using break */
2183 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2184 default:
2185 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2186 (int) TYPE_NO_MODIFIERS(datumtype));
2187 case '%':
2188 Perl_croak(aTHX_ "'%%' may not be used in pack");
2189
2190 case '.' | TYPE_IS_SHRIEKING:
2191 case '.':
2192 if (howlen == e_star) from = start;
2193 else if (len == 0) from = cur;
2194 else {
2195 tempsym_t *group = symptr;
2196
2197 while (--len && group) group = group->previous;
2198 from = group ? start + group->strbeg : start;
2199 }
2200 fromstr = NEXTFROM;
2201 len = SvIV(fromstr);
2202 goto resize;
2203 case '@' | TYPE_IS_SHRIEKING:
2204 case '@':
2205 from = start + symptr->strbeg;
2206 resize:
2207 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2208 if (len >= 0) {
2209 while (len && from < cur) {
2210 from += UTF8SKIP(from);
2211 len--;
2212 }
2213 if (from > cur)
2214 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2215 if (len) {
2216 /* Here we know from == cur */
2217 grow:
2218 GROWING(0, cat, start, cur, len);
2219 Zero(cur, len, char);
2220 cur += len;
2221 } else if (from < cur) {
2222 len = cur - from;
2223 goto shrink;
2224 } else goto no_change;
2225 } else {
2226 cur = from;
2227 len = -len;
2228 goto utf8_shrink;
2229 }
2230 else {
2231 len -= cur - from;
2232 if (len > 0) goto grow;
2233 if (len == 0) goto no_change;
2234 len = -len;
2235 goto shrink;
2236 }
2237 break;
2238
2239 case '(': {
2240 tempsym_t savsym = *symptr;
2241 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2242 symptr->flags |= group_modifiers;
2243 symptr->patend = savsym.grpend;
2244 symptr->level++;
2245 symptr->previous = &lookahead;
2246 while (len--) {
2247 U32 was_utf8;
2248 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2249 else symptr->flags &= ~FLAG_PARSE_UTF8;
2250 was_utf8 = SvUTF8(cat);
2251 symptr->patptr = savsym.grpbeg;
2252 beglist = pack_rec(cat, symptr, beglist, endlist);
2253 if (SvUTF8(cat) != was_utf8)
2254 /* This had better be an upgrade while in utf8==0 mode */
2255 utf8 = 1;
2256
2257 if (savsym.howlen == e_star && beglist == endlist)
2258 break; /* No way to continue */
2259 }
2260 items = endlist - beglist;
2261 lookahead.flags = symptr->flags & ~group_modifiers;
2262 goto no_change;
2263 }
2264 case 'X' | TYPE_IS_SHRIEKING:
2265 if (!len) /* Avoid division by 0 */
2266 len = 1;
2267 if (utf8) {
2268 char *hop, *last;
2269 I32 l = len;
2270 hop = last = start;
2271 while (hop < cur) {
2272 hop += UTF8SKIP(hop);
2273 if (--l == 0) {
2274 last = hop;
2275 l = len;
2276 }
2277 }
2278 if (last > cur)
2279 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2280 cur = last;
2281 break;
2282 }
2283 len = (cur-start) % len;
2284 /* FALLTHROUGH */
2285 case 'X':
2286 if (utf8) {
2287 if (len < 1) goto no_change;
2288 utf8_shrink:
2289 while (len > 0) {
2290 if (cur <= start)
2291 Perl_croak(aTHX_ "'%c' outside of string in pack",
2292 (int) TYPE_NO_MODIFIERS(datumtype));
2293 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2294 if (cur <= start)
2295 Perl_croak(aTHX_ "'%c' outside of string in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
2297 }
2298 len--;
2299 }
2300 } else {
2301 shrink:
2302 if (cur - start < len)
2303 Perl_croak(aTHX_ "'%c' outside of string in pack",
2304 (int) TYPE_NO_MODIFIERS(datumtype));
2305 cur -= len;
2306 }
2307 if (cur < start+symptr->strbeg) {
2308 /* Make sure group starts don't point into the void */
2309 tempsym_t *group;
2310 const STRLEN length = cur-start;
2311 for (group = symptr;
2312 group && length < group->strbeg;
2313 group = group->previous) group->strbeg = length;
2314 lookahead.strbeg = length;
2315 }
2316 break;
2317 case 'x' | TYPE_IS_SHRIEKING: {
2318 I32 ai32;
2319 if (!len) /* Avoid division by 0 */
2320 len = 1;
2321 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2322 else ai32 = (cur - start) % len;
2323 if (ai32 == 0) goto no_change;
2324 len -= ai32;
2325 }
2326 /* FALLTHROUGH */
2327 case 'x':
2328 goto grow;
2329 case 'A':
2330 case 'Z':
2331 case 'a': {
2332 const char *aptr;
2333
2334 fromstr = NEXTFROM;
2335 aptr = SvPV_const(fromstr, fromlen);
2336 if (DO_UTF8(fromstr)) {
2337 const char *end, *s;
2338
2339 if (!utf8 && !SvUTF8(cat)) {
2340 marked_upgrade(aTHX_ cat, symptr);
2341 lookahead.flags |= FLAG_DO_UTF8;
2342 lookahead.strbeg = symptr->strbeg;
2343 utf8 = 1;
2344 start = SvPVX(cat);
2345 cur = start + SvCUR(cat);
2346 }
2347 if (howlen == e_star) {
2348 if (utf8) goto string_copy;
2349 len = fromlen+1;
2350 }
2351 s = aptr;
2352 end = aptr + fromlen;
2353 fromlen = datumtype == 'Z' ? len-1 : len;
2354 while ((I32) fromlen > 0 && s < end) {
2355 s += UTF8SKIP(s);
2356 fromlen--;
2357 }
2358 if (s > end)
2359 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2360 if (utf8) {
2361 len = fromlen;
2362 if (datumtype == 'Z') len++;
2363 fromlen = s-aptr;
2364 len += fromlen;
2365
2366 goto string_copy;
2367 }
2368 fromlen = len - fromlen;
2369 if (datumtype == 'Z') fromlen--;
2370 if (howlen == e_star) {
2371 len = fromlen;
2372 if (datumtype == 'Z') len++;
2373 }
2374 GROWING(0, cat, start, cur, len);
2375 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2376 datumtype | TYPE_IS_PACK))
2377 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2378 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2379 (int)datumtype, aptr, end, cur, (UV)fromlen);
2380 cur += fromlen;
2381 len -= fromlen;
2382 } else if (utf8) {
2383 if (howlen == e_star) {
2384 len = fromlen;
2385 if (datumtype == 'Z') len++;
2386 }
2387 if (len <= (I32) fromlen) {
2388 fromlen = len;
2389 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2390 }
2391 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2392 upgrade, so:
2393 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2394 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2395 len -= fromlen;
2396 while (fromlen > 0) {
2397 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2398 aptr++;
2399 fromlen--;
2400 }
2401 } else {
2402 string_copy:
2403 if (howlen == e_star) {
2404 len = fromlen;
2405 if (datumtype == 'Z') len++;
2406 }
2407 if (len <= (I32) fromlen) {
2408 fromlen = len;
2409 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2410 }
2411 GROWING(0, cat, start, cur, len);
2412 Copy(aptr, cur, fromlen, char);
2413 cur += fromlen;
2414 len -= fromlen;
2415 }
2416 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2417 cur += len;
2418 SvTAINT(cat);
2419 break;
2420 }
2421 case 'B':
2422 case 'b': {
2423 const char *str, *end;
2424 I32 l, field_len;
2425 U8 bits;
2426 bool utf8_source;
2427 U32 utf8_flags;
2428
2429 fromstr = NEXTFROM;
2430 str = SvPV_const(fromstr, fromlen);
2431 end = str + fromlen;
2432 if (DO_UTF8(fromstr)) {
2433 utf8_source = TRUE;
2434 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2435 } else {
2436 utf8_source = FALSE;
2437 utf8_flags = 0; /* Unused, but keep compilers happy */
2438 }
2439 if (howlen == e_star) len = fromlen;
2440 field_len = (len+7)/8;
2441 GROWING(utf8, cat, start, cur, field_len);
2442 if (len > (I32)fromlen) len = fromlen;
2443 bits = 0;
2444 l = 0;
2445 if (datumtype == 'B')
2446 while (l++ < len) {
2447 if (utf8_source) {
2448 UV val = 0;
2449 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2450 bits |= val & 1;
2451 } else bits |= *str++ & 1;
2452 if (l & 7) bits <<= 1;
2453 else {
2454 PUSH_BYTE(utf8, cur, bits);
2455 bits = 0;
2456 }
2457 }
2458 else
2459 /* datumtype == 'b' */
2460 while (l++ < len) {
2461 if (utf8_source) {
2462 UV val = 0;
2463 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2464 if (val & 1) bits |= 0x80;
2465 } else if (*str++ & 1)
2466 bits |= 0x80;
2467 if (l & 7) bits >>= 1;
2468 else {
2469 PUSH_BYTE(utf8, cur, bits);
2470 bits = 0;
2471 }
2472 }
2473 l--;
2474 if (l & 7) {
2475 if (datumtype == 'B')
2476 bits <<= 7 - (l & 7);
2477 else
2478 bits >>= 7 - (l & 7);
2479 PUSH_BYTE(utf8, cur, bits);
2480 l += 7;
2481 }
2482 /* Determine how many chars are left in the requested field */
2483 l /= 8;
2484 if (howlen == e_star) field_len = 0;
2485 else field_len -= l;
2486 Zero(cur, field_len, char);
2487 cur += field_len;
2488 break;
2489 }
2490 case 'H':
2491 case 'h': {
2492 const char *str, *end;
2493 I32 l, field_len;
2494 U8 bits;
2495 bool utf8_source;
2496 U32 utf8_flags;
2497
2498 fromstr = NEXTFROM;
2499 str = SvPV_const(fromstr, fromlen);
2500 end = str + fromlen;
2501 if (DO_UTF8(fromstr)) {
2502 utf8_source = TRUE;
2503 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2504 } else {
2505 utf8_source = FALSE;
2506 utf8_flags = 0; /* Unused, but keep compilers happy */
2507 }
2508 if (howlen == e_star) len = fromlen;
2509 field_len = (len+1)/2;
2510 GROWING(utf8, cat, start, cur, field_len);
2511 if (!utf8 && len > (I32)fromlen) len = fromlen;
2512 bits = 0;
2513 l = 0;
2514 if (datumtype == 'H')
2515 while (l++ < len) {
2516 if (utf8_source) {
2517 UV val = 0;
2518 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2519 if (val < 256 && isALPHA(val))
2520 bits |= (val + 9) & 0xf;
2521 else
2522 bits |= val & 0xf;
2523 } else if (isALPHA(*str))
2524 bits |= (*str++ + 9) & 0xf;
2525 else
2526 bits |= *str++ & 0xf;
2527 if (l & 1) bits <<= 4;
2528 else {
2529 PUSH_BYTE(utf8, cur, bits);
2530 bits = 0;
2531 }
2532 }
2533 else
2534 while (l++ < len) {
2535 if (utf8_source) {
2536 UV val = 0;
2537 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2538 if (val < 256 && isALPHA(val))
2539 bits |= ((val + 9) & 0xf) << 4;
2540 else
2541 bits |= (val & 0xf) << 4;
2542 } else if (isALPHA(*str))
2543 bits |= ((*str++ + 9) & 0xf) << 4;
2544 else
2545 bits |= (*str++ & 0xf) << 4;
2546 if (l & 1) bits >>= 4;
2547 else {
2548 PUSH_BYTE(utf8, cur, bits);
2549 bits = 0;
2550 }
2551 }
2552 l--;
2553 if (l & 1) {
2554 PUSH_BYTE(utf8, cur, bits);
2555 l++;
2556 }
2557 /* Determine how many chars are left in the requested field */
2558 l /= 2;
2559 if (howlen == e_star) field_len = 0;
2560 else field_len -= l;
2561 Zero(cur, field_len, char);
2562 cur += field_len;
2563 break;
2564 }
2565 case 'c':
2566 while (len-- > 0) {
2567 IV aiv;
2568 fromstr = NEXTFROM;
2569 aiv = SvIV(fromstr);
2570 if ((-128 > aiv || aiv > 127))
2571 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2572 "Character in 'c' format wrapped in pack");
2573 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2574 }
2575 break;
2576 case 'C':
2577 if (len == 0) {
2578 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2579 break;
2580 }
2581 while (len-- > 0) {
2582 IV aiv;
2583 fromstr = NEXTFROM;
2584 aiv = SvIV(fromstr);
2585 if ((0 > aiv || aiv > 0xff))
2586 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2587 "Character in 'C' format wrapped in pack");
2588 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2589 }
2590 break;
2591 case 'W': {
2592 char *end;
2593 U8 in_bytes = (U8)IN_BYTES;
2594
2595 end = start+SvLEN(cat)-1;
2596 if (utf8) end -= UTF8_MAXLEN-1;
2597 while (len-- > 0) {
2598 UV auv;
2599 fromstr = NEXTFROM;
2600 auv = SvUV(fromstr);
2601 if (in_bytes) auv = auv % 0x100;
2602 if (utf8) {
2603 W_utf8:
2604 if (cur > end) {
2605 *cur = '\0';
2606 SvCUR_set(cat, cur - start);
2607
2608 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2609 end = start+SvLEN(cat)-UTF8_MAXLEN;
2610 }
2611 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2612 auv,
2613 warn_utf8 ?
2614 0 : UNICODE_ALLOW_ANY);
2615 } else {
2616 if (auv >= 0x100) {
2617 if (!SvUTF8(cat)) {
2618 *cur = '\0';
2619 SvCUR_set(cat, cur - start);
2620 marked_upgrade(aTHX_ cat, symptr);
2621 lookahead.flags |= FLAG_DO_UTF8;
2622 lookahead.strbeg = symptr->strbeg;
2623 utf8 = 1;
2624 start = SvPVX(cat);
2625 cur = start + SvCUR(cat);
2626 end = start+SvLEN(cat)-UTF8_MAXLEN;
2627 goto W_utf8;
2628 }
2629 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2630 "Character in 'W' format wrapped in pack");
2631 auv &= 0xff;
2632 }
2633 if (cur >= end) {
2634 *cur = '\0';
2635 SvCUR_set(cat, cur - start);
2636 GROWING(0, cat, start, cur, len+1);
2637 end = start+SvLEN(cat)-1;
2638 }
2639 *(U8 *) cur++ = (U8)auv;
2640 }
2641 }
2642 break;
2643 }
2644 case 'U': {
2645 char *end;
2646
2647 if (len == 0) {
2648 if (!(symptr->flags & FLAG_DO_UTF8)) {
2649 marked_upgrade(aTHX_ cat, symptr);
2650 lookahead.flags |= FLAG_DO_UTF8;
2651 lookahead.strbeg = symptr->strbeg;
2652 }
2653 utf8 = 0;
2654 goto no_change;
2655 }
2656
2657 end = start+SvLEN(cat);
2658 if (!utf8) end -= UTF8_MAXLEN;
2659 while (len-- > 0) {
2660 UV auv;
2661 fromstr = NEXTFROM;
2662 auv = SvUV(fromstr);
2663 if (utf8) {
2664 U8 buffer[UTF8_MAXLEN], *endb;
2665 endb = uvchr_to_utf8_flags(buffer, auv,
2666 warn_utf8 ?
2667 0 : UNICODE_ALLOW_ANY);
2668 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2669 *cur = '\0';
2670 SvCUR_set(cat, cur - start);
2671 GROWING(0, cat, start, cur,
2672 len+(endb-buffer)*UTF8_EXPAND);
2673 end = start+SvLEN(cat);
2674 }
2675 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2676 } else {
2677 if (cur >= end) {
2678 *cur = '\0';
2679 SvCUR_set(cat, cur - start);
2680 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2681 end = start+SvLEN(cat)-UTF8_MAXLEN;
2682 }
2683 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
2684 warn_utf8 ?
2685 0 : UNICODE_ALLOW_ANY);
2686 }
2687 }
2688 break;
2689 }
2690 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2691 case 'f':
2692 while (len-- > 0) {
2693 float afloat;
2694 NV anv;
2695 fromstr = NEXTFROM;
2696 anv = SvNV(fromstr);
2697# if defined(VMS) && !defined(_IEEE_FP)
2698 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2699 * on Alpha; fake it if we don't have them.
2700 */
2701 if (anv > FLT_MAX)
2702 afloat = FLT_MAX;
2703 else if (anv < -FLT_MAX)
2704 afloat = -FLT_MAX;
2705 else afloat = (float)anv;
2706# else
2707 afloat = (float)anv;
2708# endif
2709 PUSH_VAR(utf8, cur, afloat, needs_swap);
2710 }
2711 break;
2712 case 'd':
2713 while (len-- > 0) {
2714 double adouble;
2715 NV anv;
2716 fromstr = NEXTFROM;
2717 anv = SvNV(fromstr);
2718# if defined(VMS) && !defined(_IEEE_FP)
2719 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2720 * on Alpha; fake it if we don't have them.
2721 */
2722 if (anv > DBL_MAX)
2723 adouble = DBL_MAX;
2724 else if (anv < -DBL_MAX)
2725 adouble = -DBL_MAX;
2726 else adouble = (double)anv;
2727# else
2728 adouble = (double)anv;
2729# endif
2730 PUSH_VAR(utf8, cur, adouble, needs_swap);
2731 }
2732 break;
2733 case 'F': {
2734 NV_bytes anv;
2735 Zero(&anv, 1, NV); /* can be long double with unused bits */
2736 while (len-- > 0) {
2737 fromstr = NEXTFROM;
2738#ifdef __GNUC__
2739 /* to work round a gcc/x86 bug; don't use SvNV */
2740 anv.nv = sv_2nv(fromstr);
2741#else
2742 anv.nv = SvNV(fromstr);
2743#endif
2744 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2745 }
2746 break;
2747 }
2748#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2749 case 'D': {
2750 ld_bytes aldouble;
2751 /* long doubles can have unused bits, which may be nonzero */
2752 Zero(&aldouble, 1, long double);
2753 while (len-- > 0) {
2754 fromstr = NEXTFROM;
2755# ifdef __GNUC__
2756 /* to work round a gcc/x86 bug; don't use SvNV */
2757 aldouble.ld = (long double)sv_2nv(fromstr);
2758# else
2759 aldouble.ld = (long double)SvNV(fromstr);
2760# endif
2761 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2762 needs_swap);
2763 }
2764 break;
2765 }
2766#endif
2767 case 'n' | TYPE_IS_SHRIEKING:
2768 case 'n':
2769 while (len-- > 0) {
2770 I16 ai16;
2771 fromstr = NEXTFROM;
2772 ai16 = (I16)SvIV(fromstr);
2773 ai16 = PerlSock_htons(ai16);
2774 PUSH16(utf8, cur, &ai16, FALSE);
2775 }
2776 break;
2777 case 'v' | TYPE_IS_SHRIEKING:
2778 case 'v':
2779 while (len-- > 0) {
2780 I16 ai16;
2781 fromstr = NEXTFROM;
2782 ai16 = (I16)SvIV(fromstr);
2783 ai16 = htovs(ai16);
2784 PUSH16(utf8, cur, &ai16, FALSE);
2785 }
2786 break;
2787 case 'S' | TYPE_IS_SHRIEKING:
2788#if SHORTSIZE != SIZE16
2789 while (len-- > 0) {
2790 unsigned short aushort;
2791 fromstr = NEXTFROM;
2792 aushort = SvUV(fromstr);
2793 PUSH_VAR(utf8, cur, aushort, needs_swap);
2794 }
2795 break;
2796#else
2797 /* FALLTHROUGH */
2798#endif
2799 case 'S':
2800 while (len-- > 0) {
2801 U16 au16;
2802 fromstr = NEXTFROM;
2803 au16 = (U16)SvUV(fromstr);
2804 PUSH16(utf8, cur, &au16, needs_swap);
2805 }
2806 break;
2807 case 's' | TYPE_IS_SHRIEKING:
2808#if SHORTSIZE != SIZE16
2809 while (len-- > 0) {
2810 short ashort;
2811 fromstr = NEXTFROM;
2812 ashort = SvIV(fromstr);
2813 PUSH_VAR(utf8, cur, ashort, needs_swap);
2814 }
2815 break;
2816#else
2817 /* FALLTHROUGH */
2818#endif
2819 case 's':
2820 while (len-- > 0) {
2821 I16 ai16;
2822 fromstr = NEXTFROM;
2823 ai16 = (I16)SvIV(fromstr);
2824 PUSH16(utf8, cur, &ai16, needs_swap);
2825 }
2826 break;
2827 case 'I':
2828 case 'I' | TYPE_IS_SHRIEKING:
2829 while (len-- > 0) {
2830 unsigned int auint;
2831 fromstr = NEXTFROM;
2832 auint = SvUV(fromstr);
2833 PUSH_VAR(utf8, cur, auint, needs_swap);
2834 }
2835 break;
2836 case 'j':
2837 while (len-- > 0) {
2838 IV aiv;
2839 fromstr = NEXTFROM;
2840 aiv = SvIV(fromstr);
2841 PUSH_VAR(utf8, cur, aiv, needs_swap);
2842 }
2843 break;
2844 case 'J':
2845 while (len-- > 0) {
2846 UV auv;
2847 fromstr = NEXTFROM;
2848 auv = SvUV(fromstr);
2849 PUSH_VAR(utf8, cur, auv, needs_swap);
2850 }
2851 break;
2852 case 'w':
2853 while (len-- > 0) {
2854 NV anv;
2855 fromstr = NEXTFROM;
2856 anv = SvNV(fromstr);
2857
2858 if (anv < 0) {
2859 *cur = '\0';
2860 SvCUR_set(cat, cur - start);
2861 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2862 }
2863
2864 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2865 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2866 any negative IVs will have already been got by the croak()
2867 above. IOK is untrue for fractions, so we test them
2868 against UV_MAX_P1. */
2869 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2870 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2871 char *in = buf + sizeof(buf);
2872 UV auv = SvUV(fromstr);
2873
2874 do {
2875 *--in = (char)((auv & 0x7f) | 0x80);
2876 auv >>= 7;
2877 } while (auv);
2878 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2879 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2880 in, (buf + sizeof(buf)) - in);
2881 } else if (SvPOKp(fromstr))
2882 goto w_string;
2883 else if (SvNOKp(fromstr)) {
2884 /* 10**NV_MAX_10_EXP is the largest power of 10
2885 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2886 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2887 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2888 And with that many bytes only Inf can overflow.
2889 Some C compilers are strict about integral constant
2890 expressions so we conservatively divide by a slightly
2891 smaller integer instead of multiplying by the exact
2892 floating-point value.
2893 */
2894#ifdef NV_MAX_10_EXP
2895 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2896 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2897#else
2898 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2899 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2900#endif
2901 char *in = buf + sizeof(buf);
2902
2903 anv = Perl_floor(anv);
2904 do {
2905 const NV next = Perl_floor(anv / 128);
2906 if (in <= buf) /* this cannot happen ;-) */
2907 Perl_croak(aTHX_ "Cannot compress integer in pack");
2908 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2909 anv = next;
2910 } while (anv > 0);
2911 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2912 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2913 in, (buf + sizeof(buf)) - in);
2914 } else {
2915 const char *from;
2916 char *result, *in;
2917 SV *norm;
2918 STRLEN len;
2919 bool done;
2920
2921 w_string:
2922 /* Copy string and check for compliance */
2923 from = SvPV_const(fromstr, len);
2924 if ((norm = is_an_int(from, len)) == NULL)
2925 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2926
2927 Newx(result, len, char);
2928 in = result + len;
2929 done = FALSE;
2930 while (!done) *--in = div128(norm, &done) | 0x80;
2931 result[len - 1] &= 0x7F; /* clear continue bit */
2932 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2933 in, (result + len) - in);
2934 Safefree(result);
2935 SvREFCNT_dec(norm); /* free norm */
2936 }
2937 }
2938 break;
2939 case 'i':
2940 case 'i' | TYPE_IS_SHRIEKING:
2941 while (len-- > 0) {
2942 int aint;
2943 fromstr = NEXTFROM;
2944 aint = SvIV(fromstr);
2945 PUSH_VAR(utf8, cur, aint, needs_swap);
2946 }
2947 break;
2948 case 'N' | TYPE_IS_SHRIEKING:
2949 case 'N':
2950 while (len-- > 0) {
2951 U32 au32;
2952 fromstr = NEXTFROM;
2953 au32 = SvUV(fromstr);
2954 au32 = PerlSock_htonl(au32);
2955 PUSH32(utf8, cur, &au32, FALSE);
2956 }
2957 break;
2958 case 'V' | TYPE_IS_SHRIEKING:
2959 case 'V':
2960 while (len-- > 0) {
2961 U32 au32;
2962 fromstr = NEXTFROM;
2963 au32 = SvUV(fromstr);
2964 au32 = htovl(au32);
2965 PUSH32(utf8, cur, &au32, FALSE);
2966 }
2967 break;
2968 case 'L' | TYPE_IS_SHRIEKING:
2969#if LONGSIZE != SIZE32
2970 while (len-- > 0) {
2971 unsigned long aulong;
2972 fromstr = NEXTFROM;
2973 aulong = SvUV(fromstr);
2974 PUSH_VAR(utf8, cur, aulong, needs_swap);
2975 }
2976 break;
2977#else
2978 /* Fall though! */
2979#endif
2980 case 'L':
2981 while (len-- > 0) {
2982 U32 au32;
2983 fromstr = NEXTFROM;
2984 au32 = SvUV(fromstr);
2985 PUSH32(utf8, cur, &au32, needs_swap);
2986 }
2987 break;
2988 case 'l' | TYPE_IS_SHRIEKING:
2989#if LONGSIZE != SIZE32
2990 while (len-- > 0) {
2991 long along;
2992 fromstr = NEXTFROM;
2993 along = SvIV(fromstr);
2994 PUSH_VAR(utf8, cur, along, needs_swap);
2995 }
2996 break;
2997#else
2998 /* Fall though! */
2999#endif
3000 case 'l':
3001 while (len-- > 0) {
3002 I32 ai32;
3003 fromstr = NEXTFROM;
3004 ai32 = SvIV(fromstr);
3005 PUSH32(utf8, cur, &ai32, needs_swap);
3006 }
3007 break;
3008#if defined(HAS_QUAD) && IVSIZE >= 8
3009 case 'Q':
3010 while (len-- > 0) {
3011 Uquad_t auquad;
3012 fromstr = NEXTFROM;
3013 auquad = (Uquad_t) SvUV(fromstr);
3014 PUSH_VAR(utf8, cur, auquad, needs_swap);
3015 }
3016 break;
3017 case 'q':
3018 while (len-- > 0) {
3019 Quad_t aquad;
3020 fromstr = NEXTFROM;
3021 aquad = (Quad_t)SvIV(fromstr);
3022 PUSH_VAR(utf8, cur, aquad, needs_swap);
3023 }
3024 break;
3025#endif
3026 case 'P':
3027 len = 1; /* assume SV is correct length */
3028 GROWING(utf8, cat, start, cur, sizeof(char *));
3029 /* FALLTHROUGH */
3030 case 'p':
3031 while (len-- > 0) {
3032 const char *aptr;
3033
3034 fromstr = NEXTFROM;
3035 SvGETMAGIC(fromstr);
3036 if (!SvOK(fromstr)) aptr = NULL;
3037 else {
3038 /* XXX better yet, could spirit away the string to
3039 * a safe spot and hang on to it until the result
3040 * of pack() (and all copies of the result) are
3041 * gone.
3042 */
3043 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3044 !SvREADONLY(fromstr)))) {
3045 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3046 "Attempt to pack pointer to temporary value");
3047 }
3048 if (SvPOK(fromstr) || SvNIOK(fromstr))
3049 aptr = SvPV_nomg_const_nolen(fromstr);
3050 else
3051 aptr = SvPV_force_flags_nolen(fromstr, 0);
3052 }
3053 PUSH_VAR(utf8, cur, aptr, needs_swap);
3054 }
3055 break;
3056 case 'u': {
3057 const char *aptr, *aend;
3058 bool from_utf8;
3059
3060 fromstr = NEXTFROM;
3061 if (len <= 2) len = 45;
3062 else len = len / 3 * 3;
3063 if (len >= 64) {
3064 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 "Field too wide in 'u' format in pack");
3066 len = 63;
3067 }
3068 aptr = SvPV_const(fromstr, fromlen);
3069 from_utf8 = DO_UTF8(fromstr);
3070 if (from_utf8) {
3071 aend = aptr + fromlen;
3072 fromlen = sv_len_utf8_nomg(fromstr);
3073 } else aend = NULL; /* Unused, but keep compilers happy */
3074 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3075 while (fromlen > 0) {
3076 U8 *end;
3077 I32 todo;
3078 U8 hunk[1+63/3*4+1];
3079
3080 if ((I32)fromlen > len)
3081 todo = len;
3082 else
3083 todo = fromlen;
3084 if (from_utf8) {
3085 char buffer[64];
3086 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3087 'u' | TYPE_IS_PACK)) {
3088 *cur = '\0';
3089 SvCUR_set(cat, cur - start);
3090 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3091 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3092 aptr, aend, buffer, (long) todo);
3093 }
3094 end = doencodes(hunk, buffer, todo);
3095 } else {
3096 end = doencodes(hunk, aptr, todo);
3097 aptr += todo;
3098 }
3099 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3100 fromlen -= todo;
3101 }
3102 break;
3103 }
3104 }
3105 *cur = '\0';
3106 SvCUR_set(cat, cur - start);
3107 no_change:
3108 *symptr = lookahead;
3109 }
3110 return beglist;
3111}
3112#undef NEXTFROM
3113
3114
3115PP(pp_pack)
3116{
3117 dSP; dMARK; dORIGMARK; dTARGET;
3118 SV *cat = TARG;
3119 STRLEN fromlen;
3120 SV *pat_sv = *++MARK;
3121 const char *pat = SvPV_const(pat_sv, fromlen);
3122 const char *patend = pat + fromlen;
3123
3124 MARK++;
3125 sv_setpvs(cat, "");
3126 SvUTF8_off(cat);
3127
3128 packlist(cat, pat, patend, MARK, SP + 1);
3129
3130 SvSETMAGIC(cat);
3131 SP = ORIGMARK;
3132 PUSHs(cat);
3133 RETURN;
3134}
3135
3136/*
3137 * Local variables:
3138 * c-indentation-style: bsd
3139 * c-basic-offset: 4
3140 * indent-tabs-mode: nil
3141 * End:
3142 *
3143 * ex: set ts=8 sts=4 sw=4 et:
3144 */