This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pack with a human face: the sequel
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a6ec74c1
JH
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 */
9
d31a8517
AT
10/*
11 * He still hopefully carried some of his gear in his pack: a small tinder-box,
12 * two small shallow pans, the smaller fitting into the larger; inside them a
13 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
14 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
15 * some salt.
16 */
17
a6ec74c1
JH
18#include "EXTERN.h"
19#define PERL_IN_PP_PACK_C
20#include "perl.h"
21
22/*
23 * The compiler on Concurrent CX/UX systems has a subtle bug which only
24 * seems to show up when compiling pp.c - it generates the wrong double
25 * precision constant value for (double)UV_MAX when used inline in the body
26 * of the code below, so this makes a static variable up front (which the
27 * compiler seems to get correct) and uses it in place of UV_MAX below.
28 */
29#ifdef CXUX_BROKEN_CONSTANT_CONVERT
30static double UV_MAX_cxux = ((double)UV_MAX);
31#endif
32
33/*
34 * Offset for integer pack/unpack.
35 *
36 * On architectures where I16 and I32 aren't really 16 and 32 bits,
37 * which for now are all Crays, pack and unpack have to play games.
38 */
39
40/*
41 * These values are required for portability of pack() output.
42 * If they're not right on your machine, then pack() and unpack()
43 * wouldn't work right anyway; you'll need to apply the Cray hack.
44 * (I'd like to check them with #if, but you can't use sizeof() in
45 * the preprocessor.) --???
46 */
47/*
48 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
49 defines are now in config.h. --Andy Dougherty April 1998
50 */
51#define SIZE16 2
52#define SIZE32 4
53
54/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
55 --jhi Feb 1999 */
56
57#if SHORTSIZE != SIZE16 || LONGSIZE != SIZE32
58# define PERL_NATINT_PACK
59#endif
60
61#if LONGSIZE > 4 && defined(_CRAY)
62# if BYTEORDER == 0x12345678
63# define OFF16(p) (char*)(p)
64# define OFF32(p) (char*)(p)
65# else
66# if BYTEORDER == 0x87654321
67# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
68# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
69# else
70 }}}} bad cray byte order
71# endif
72# endif
73# define COPY16(s,p) (*(p) = 0, Copy(s, OFF16(p), SIZE16, char))
74# define COPY32(s,p) (*(p) = 0, Copy(s, OFF32(p), SIZE32, char))
75# define COPYNN(s,p,n) (*(p) = 0, Copy(s, (char *)(p), n, char))
76# define CAT16(sv,p) sv_catpvn(sv, OFF16(p), SIZE16)
77# define CAT32(sv,p) sv_catpvn(sv, OFF32(p), SIZE32)
78#else
79# define COPY16(s,p) Copy(s, p, SIZE16, char)
80# define COPY32(s,p) Copy(s, p, SIZE32, char)
81# define COPYNN(s,p,n) Copy(s, (char *)(p), n, char)
82# define CAT16(sv,p) sv_catpvn(sv, (char*)(p), SIZE16)
83# define CAT32(sv,p) sv_catpvn(sv, (char*)(p), SIZE32)
84#endif
85
86STATIC SV *
87S_mul128(pTHX_ SV *sv, U8 m)
88{
89 STRLEN len;
90 char *s = SvPV(sv, len);
91 char *t;
92 U32 i = 0;
93
94 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
95 SV *tmpNew = newSVpvn("0000000000", 10);
96
97 sv_catsv(tmpNew, sv);
98 SvREFCNT_dec(sv); /* free old sv */
99 sv = tmpNew;
100 s = SvPV(sv, len);
101 }
102 t = s + len - 1;
103 while (!*t) /* trailing '\0'? */
104 t--;
105 while (t > s) {
106 i = ((*t - '0') << 7) + m;
107 *(t--) = '0' + (i % 10);
108 m = i / 10;
109 }
110 return (sv);
111}
112
113/* Explosives and implosives. */
114
115#if 'I' == 73 && 'J' == 74
116/* On an ASCII/ISO kind of system */
117#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
118#else
119/*
120 Some other sort of character set - use memchr() so we don't match
121 the null byte.
122 */
123#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
124#endif
125
18529408
IZ
126#define UNPACK_ONLY_ONE 0x1
127#define UNPACK_DO_UTF8 0x2
a6ec74c1 128
18529408
IZ
129STATIC char *
130S_group_end(pTHX_ register char *pat, register char *patend, char ender)
131{
132 while (pat < patend) {
133 char c = *pat++;
134
135 if (isSPACE(c))
136 continue;
137 else if (c == ender)
138 return --pat;
139 else if (c == '#') {
140 while (pat < patend && *pat != '\n')
141 pat++;
142 continue;
143 } else if (c == '(')
144 pat = group_end(pat, patend, ')') + 1;
206947d2
IZ
145 else if (c == '[')
146 pat = group_end(pat, patend, ']') + 1;
18529408
IZ
147 }
148 croak("No group ending character `%c' found", ender);
149}
150
206947d2
IZ
151/* Returns the sizeof() struct described by pat */
152I32
153S_measure_struct(pTHX_ char *pat, register char *patend)
154{
155 I32 datumtype;
156 register I32 len;
157 register I32 total = 0;
158 int commas = 0;
159 int star; /* 1 if count is *, -1 if no count given, -2 for / */
160#ifdef PERL_NATINT_PACK
161 int natint; /* native integer */
162 int unatint; /* unsigned native integer */
163#endif
164 char buf[2];
165 register int size;
166
167 while ((pat = next_symbol(pat, patend)) < patend) {
168 datumtype = *pat++ & 0xFF;
169#ifdef PERL_NATINT_PACK
170 natint = 0;
171#endif
172 if (*pat == '!') {
173 static const char *natstr = "sSiIlL";
174
175 if (strchr(natstr, datumtype)) {
176#ifdef PERL_NATINT_PACK
177 natint = 1;
178#endif
179 pat++;
180 }
181 else
182 croak("'!' allowed only after types %s", natstr);
183 }
184 len = find_count(&pat, patend, &star);
185 if (star > 0) /* */
186 croak("%s not allowed in length fields", "count *");
187 else if (star < 0) /* No explicit len */
188 len = datumtype != '@';
189
190 switch(datumtype) {
191 default:
192 croak("Invalid type in unpack: '%c'", (int)datumtype);
193 case '@':
194 case '/':
195 case 'U': /* XXXX Is it correct? */
196 case 'w':
197 case 'u':
198 buf[0] = datumtype;
199 buf[1] = 0;
200 croak("%s not allowed in length fields", buf);
201 case ',': /* grandfather in commas but with a warning */
202 if (commas++ == 0 && ckWARN(WARN_UNPACK))
203 Perl_warner(aTHX_ WARN_UNPACK,
204 "Invalid type in unpack: '%c'", (int)datumtype);
205 /* FALL THROUGH */
206 case '%':
207 size = 0;
208 break;
209 case '(':
210 {
211 char *beg = pat, *end;
212
213 if (star >= 0)
214 croak("()-group starts with a count");
215 end = group_end(beg, patend, ')');
216 pat = end + 1;
217 len = find_count(&pat, patend, &star);
218 if (star < 0) /* No count */
219 len = 1;
220 else if (star > 0) /* Star */
221 croak("%s not allowed in length fields", "count *");
222 size = measure_struct(beg, end);
223 break;
224 }
225 case 'X':
226 size = -1;
227 if (total < len)
228 croak("X outside of string");
229 break;
230 case 'x':
231 case 'A':
232 case 'Z':
233 case 'a':
234 case 'c':
235 case 'C':
236 size = 1;
237 break;
238 case 'B':
239 case 'b':
240 len = (len + 7)/8;
241 size = 1;
242 break;
243 case 'H':
244 case 'h':
245 len = (len + 1)/2;
246 size = 1;
247 break;
248 case 's':
249#if SHORTSIZE == SIZE16
250 size = SIZE16;
251#else
252 size = (natint ? sizeof(short) : SIZE16);
253#endif
254 break;
255 case 'v':
256 case 'n':
257 case 'S':
258#if SHORTSIZE == SIZE16
259 size = SIZE16;
260#else
261 unatint = natint && datumtype == 'S';
262 size = (unatint ? sizeof(unsigned short) : SIZE16);
263#endif
264 break;
265 case 'i':
266 size = sizeof(int);
267 break;
268 case 'I':
269 size = sizeof(unsigned int);
270 break;
271 case 'l':
272#if LONGSIZE == SIZE32
273 size = SIZE32;
274#else
275 size = (natint ? sizeof(long) : SIZE32);
276#endif
277 break;
278 case 'V':
279 case 'N':
280 case 'L':
281#if LONGSIZE == SIZE32
282 size = SIZE32;
283#else
284 unatint = natint && datumtype == 'L';
285 size = (unatint ? sizeof(unsigned long) : SIZE32);
286#endif
287 break;
288 case 'P':
289 len = 1;
290 /* FALL THROUGH */
291 case 'p':
292 size = sizeof(char*);
293 break;
294#ifdef HAS_QUAD
295 case 'q':
296 size = sizeof(Quad_t);
297 break;
298 case 'Q':
299 size = sizeof(Uquad_t);
300 break;
301#endif
302 case 'f':
303 case 'F':
304 size = sizeof(float);
305 break;
306 case 'd':
307 case 'D':
308 size = sizeof(double);
309 break;
310 }
311 total += len * size;
312 }
313 return total;
314}
315
18529408
IZ
316/* Returns -1 on no count or on star */
317STATIC I32
318S_find_count(pTHX_ char **ppat, register char *patend, int *star)
319{
320 register char *pat = *ppat;
321 I32 len;
322
323 *star = 0;
324 if (pat >= patend)
325 len = 1;
326 else if (*pat == '*') {
327 pat++;
328 *star = 1;
329 len = -1;
330 }
331 else if (isDIGIT(*pat) || *pat == '[') {
332 bool brackets = *pat == '[';
333
206947d2 334 if (brackets) {
18529408 335 ++pat, len = 0;
206947d2
IZ
336 if (!isDIGIT(*pat)) {
337 char *end = group_end(pat, patend, ']');
338
339 *ppat = end + 1;
340 return measure_struct(pat, end);
341 }
342 }
18529408
IZ
343 else
344 len = *pat++ - '0';
345 while (isDIGIT(*pat)) {
346 len = (len * 10) + (*pat++ - '0');
347 if (len < 0)
348 croak("Repeat count in unpack overflows");
349 }
350 if (brackets && *pat++ != ']')
351 croak("No repeat count ender ] found after digits");
352 }
353 else
354 len = *star = -1;
355 *ppat = pat;
356 return len;
357}
358
359STATIC char *
360S_next_symbol(pTHX_ register char *pat, register char *patend)
361{
362 while (pat < patend) {
363 if (isSPACE(*pat))
364 pat++;
365 else if (*pat == '#') {
366 pat++;
367 while (pat < patend && *pat != '\n')
368 pat++;
369 if (pat < patend)
370 pat++;
371 }
372 else
373 return pat;
374 }
375 return pat;
376}
377
18529408
IZ
378/*
379=for apidoc unpack_str
380
381The engine implementing unpack() Perl function.
382
383=cut */
384
385I32
386Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1
JH
387{
388 dSP;
a6ec74c1
JH
389 I32 datumtype;
390 register I32 len;
391 register I32 bits = 0;
392 register char *str;
18529408
IZ
393 SV *sv;
394 I32 start_sp_offset = SP - PL_stack_base;
a6ec74c1
JH
395
396 /* These must not be in registers: */
397 short ashort;
398 int aint;
399 long along;
400#ifdef HAS_QUAD
401 Quad_t aquad;
402#endif
403 U16 aushort;
404 unsigned int auint;
405 U32 aulong;
406#ifdef HAS_QUAD
407 Uquad_t auquad;
408#endif
409 char *aptr;
410 float afloat;
411 double adouble;
412 I32 checksum = 0;
fa8ec7c1 413 UV culong = 0;
a6ec74c1 414 NV cdouble = 0.0;
fa8ec7c1 415 const int bits_in_uv = 8 * sizeof(culong);
a6ec74c1 416 int commas = 0;
18529408 417 int star; /* 1 if count is *, -1 if no count given, -2 for / */
a6ec74c1
JH
418#ifdef PERL_NATINT_PACK
419 int natint; /* native integer */
420 int unatint; /* unsigned native integer */
421#endif
18529408 422 bool do_utf8 = flags & UNPACK_DO_UTF8;
a6ec74c1 423
18529408 424 while ((pat = next_symbol(pat, patend)) < patend) {
a6ec74c1
JH
425 datumtype = *pat++ & 0xFF;
426#ifdef PERL_NATINT_PACK
427 natint = 0;
428#endif
206947d2
IZ
429 /* do first one only unless in list context
430 / is implemented by unpacking the count, then poping it from the
431 stack, so must check that we're not in the middle of a / */
432 if ( (flags & UNPACK_ONLY_ONE)
433 && (SP - PL_stack_base == start_sp_offset + 1)
434 && (datumtype != '/') )
435 break;
a6ec74c1 436 if (*pat == '!') {
206947d2 437 static const char natstr[] = "sSiIlL";
a6ec74c1
JH
438
439 if (strchr(natstr, datumtype)) {
440#ifdef PERL_NATINT_PACK
441 natint = 1;
442#endif
443 pat++;
444 }
445 else
18529408 446 croak("'!' allowed only after types %s", natstr);
a6ec74c1 447 }
18529408
IZ
448 len = find_count(&pat, patend, &star);
449 if (star > 0)
450 len = strend - strbeg; /* long enough */
451 else if (star < 0) /* No explicit len */
206947d2 452 len = datumtype != '@';
18529408 453
a6ec74c1
JH
454 redo_switch:
455 switch(datumtype) {
456 default:
18529408 457 croak("Invalid type in unpack: '%c'", (int)datumtype);
a6ec74c1
JH
458 case ',': /* grandfather in commas but with a warning */
459 if (commas++ == 0 && ckWARN(WARN_UNPACK))
460 Perl_warner(aTHX_ WARN_UNPACK,
461 "Invalid type in unpack: '%c'", (int)datumtype);
462 break;
463 case '%':
18529408
IZ
464 if (len == 1 && pat[-1] != '1' && pat[-1] != ']')
465 len = 16; /* len is not specified */
a6ec74c1
JH
466 checksum = len;
467 culong = 0;
468 cdouble = 0;
18529408 469 continue;
a6ec74c1 470 break;
18529408
IZ
471 case '(':
472 {
473 char *beg = pat;
474 char *ss = s; /* Move from register */
475
476 if (star >= 0)
477 croak("()-group starts with a count");
478 aptr = group_end(beg, patend, ')');
479 pat = aptr + 1;
480 if (star != -2) {
481 len = find_count(&pat, patend, &star);
482 if (star < 0) /* No count */
483 len = 1;
484 else if (star > 0) /* Star */
485 len = strend - strbeg; /* long enough? */
486 }
487 PUTBACK;
488 while (len--) {
489 unpack_str(beg, aptr, ss, strbeg, strend, &ss,
490 ocnt + SP - PL_stack_base - start_sp_offset, flags);
491 if (star > 0 && ss == strend)
492 break; /* No way to continue */
493 }
494 SPAGAIN;
495 s = ss;
496 break;
497 }
a6ec74c1
JH
498 case '@':
499 if (len > strend - strbeg)
18529408 500 croak("@ outside of string");
a6ec74c1
JH
501 s = strbeg + len;
502 break;
503 case 'X':
504 if (len > s - strbeg)
18529408 505 croak("X outside of string");
a6ec74c1
JH
506 s -= len;
507 break;
508 case 'x':
509 if (len > strend - s)
18529408 510 croak("x outside of string");
a6ec74c1
JH
511 s += len;
512 break;
513 case '/':
18529408
IZ
514 if (ocnt + SP - PL_stack_base - start_sp_offset <= 0)
515 croak("/ must follow a numeric type");
a6ec74c1
JH
516 datumtype = *pat++;
517 if (*pat == '*')
518 pat++; /* ignore '*' for compatibility with pack */
519 if (isDIGIT(*pat))
18529408 520 croak("/ cannot take a count" );
a6ec74c1 521 len = POPi;
18529408 522 star = -2;
a6ec74c1
JH
523 goto redo_switch;
524 case 'A':
525 case 'Z':
526 case 'a':
527 if (len > strend - s)
528 len = strend - s;
529 if (checksum)
530 goto uchar_checksum;
531 sv = NEWSV(35, len);
532 sv_setpvn(sv, s, len);
a6ec74c1
JH
533 if (datumtype == 'A' || datumtype == 'Z') {
534 aptr = s; /* borrow register */
535 if (datumtype == 'Z') { /* 'Z' strips stuff after first null */
536 s = SvPVX(sv);
537 while (*s)
538 s++;
18529408 539 if (star > 0) /* exact for 'Z*' */
d50dd4e4 540 len = s - SvPVX(sv) + 1;
a6ec74c1
JH
541 }
542 else { /* 'A' strips both nulls and spaces */
543 s = SvPVX(sv) + len - 1;
544 while (s >= SvPVX(sv) && (!*s || isSPACE(*s)))
545 s--;
546 *++s = '\0';
547 }
548 SvCUR_set(sv, s - SvPVX(sv));
549 s = aptr; /* unborrow register */
550 }
d50dd4e4 551 s += len;
a6ec74c1
JH
552 XPUSHs(sv_2mortal(sv));
553 break;
554 case 'B':
555 case 'b':
18529408 556 if (star > 0 || len > (strend - s) * 8)
a6ec74c1
JH
557 len = (strend - s) * 8;
558 if (checksum) {
559 if (!PL_bitcount) {
560 Newz(601, PL_bitcount, 256, char);
561 for (bits = 1; bits < 256; bits++) {
562 if (bits & 1) PL_bitcount[bits]++;
563 if (bits & 2) PL_bitcount[bits]++;
564 if (bits & 4) PL_bitcount[bits]++;
565 if (bits & 8) PL_bitcount[bits]++;
566 if (bits & 16) PL_bitcount[bits]++;
567 if (bits & 32) PL_bitcount[bits]++;
568 if (bits & 64) PL_bitcount[bits]++;
569 if (bits & 128) PL_bitcount[bits]++;
570 }
571 }
572 while (len >= 8) {
573 culong += PL_bitcount[*(unsigned char*)s++];
574 len -= 8;
575 }
576 if (len) {
577 bits = *s;
578 if (datumtype == 'b') {
579 while (len-- > 0) {
580 if (bits & 1) culong++;
581 bits >>= 1;
582 }
583 }
584 else {
585 while (len-- > 0) {
586 if (bits & 128) culong++;
587 bits <<= 1;
588 }
589 }
590 }
591 break;
592 }
593 sv = NEWSV(35, len + 1);
594 SvCUR_set(sv, len);
595 SvPOK_on(sv);
596 str = SvPVX(sv);
597 if (datumtype == 'b') {
598 aint = len;
599 for (len = 0; len < aint; len++) {
600 if (len & 7) /*SUPPRESS 595*/
601 bits >>= 1;
602 else
603 bits = *s++;
604 *str++ = '0' + (bits & 1);
605 }
606 }
607 else {
608 aint = len;
609 for (len = 0; len < aint; len++) {
610 if (len & 7)
611 bits <<= 1;
612 else
613 bits = *s++;
614 *str++ = '0' + ((bits & 128) != 0);
615 }
616 }
617 *str = '\0';
618 XPUSHs(sv_2mortal(sv));
619 break;
620 case 'H':
621 case 'h':
18529408 622 if (star > 0 || len > (strend - s) * 2)
a6ec74c1
JH
623 len = (strend - s) * 2;
624 sv = NEWSV(35, len + 1);
625 SvCUR_set(sv, len);
626 SvPOK_on(sv);
627 str = SvPVX(sv);
628 if (datumtype == 'h') {
629 aint = len;
630 for (len = 0; len < aint; len++) {
631 if (len & 1)
632 bits >>= 4;
633 else
634 bits = *s++;
635 *str++ = PL_hexdigit[bits & 15];
636 }
637 }
638 else {
639 aint = len;
640 for (len = 0; len < aint; len++) {
641 if (len & 1)
642 bits <<= 4;
643 else
644 bits = *s++;
645 *str++ = PL_hexdigit[(bits >> 4) & 15];
646 }
647 }
648 *str = '\0';
649 XPUSHs(sv_2mortal(sv));
650 break;
651 case 'c':
652 if (len > strend - s)
653 len = strend - s;
654 if (checksum) {
655 while (len-- > 0) {
656 aint = *s++;
657 if (aint >= 128) /* fake up signed chars */
658 aint -= 256;
fa8ec7c1
NC
659 if (checksum > bits_in_uv)
660 cdouble += (NV)aint;
661 else
662 culong += aint;
a6ec74c1
JH
663 }
664 }
665 else {
666 EXTEND(SP, len);
667 EXTEND_MORTAL(len);
668 while (len-- > 0) {
669 aint = *s++;
670 if (aint >= 128) /* fake up signed chars */
671 aint -= 256;
672 sv = NEWSV(36, 0);
673 sv_setiv(sv, (IV)aint);
674 PUSHs(sv_2mortal(sv));
675 }
676 }
677 break;
678 case 'C':
35bcd338
JH
679 unpack_C: /* unpack U will jump here if not UTF-8 */
680 if (len == 0) {
681 do_utf8 = FALSE;
682 break;
683 }
a6ec74c1
JH
684 if (len > strend - s)
685 len = strend - s;
686 if (checksum) {
687 uchar_checksum:
688 while (len-- > 0) {
689 auint = *s++ & 255;
690 culong += auint;
691 }
692 }
693 else {
694 EXTEND(SP, len);
695 EXTEND_MORTAL(len);
696 while (len-- > 0) {
697 auint = *s++ & 255;
698 sv = NEWSV(37, 0);
699 sv_setiv(sv, (IV)auint);
700 PUSHs(sv_2mortal(sv));
701 }
702 }
703 break;
704 case 'U':
35bcd338
JH
705 if (len == 0) {
706 do_utf8 = TRUE;
707 break;
708 }
709 if (!do_utf8)
710 goto unpack_C;
a6ec74c1
JH
711 if (len > strend - s)
712 len = strend - s;
713 if (checksum) {
714 while (len-- > 0 && s < strend) {
715 STRLEN alen;
e87322b2 716 auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, 0));
a6ec74c1
JH
717 along = alen;
718 s += along;
fa8ec7c1 719 if (checksum > bits_in_uv)
a6ec74c1
JH
720 cdouble += (NV)auint;
721 else
722 culong += auint;
723 }
724 }
725 else {
726 EXTEND(SP, len);
727 EXTEND_MORTAL(len);
728 while (len-- > 0 && s < strend) {
729 STRLEN alen;
e87322b2 730 auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, 0));
a6ec74c1
JH
731 along = alen;
732 s += along;
733 sv = NEWSV(37, 0);
734 sv_setuv(sv, (UV)auint);
735 PUSHs(sv_2mortal(sv));
736 }
737 }
738 break;
739 case 's':
740#if SHORTSIZE == SIZE16
741 along = (strend - s) / SIZE16;
742#else
743 along = (strend - s) / (natint ? sizeof(short) : SIZE16);
744#endif
745 if (len > along)
746 len = along;
747 if (checksum) {
748#if SHORTSIZE != SIZE16
749 if (natint) {
750 short ashort;
751 while (len-- > 0) {
752 COPYNN(s, &ashort, sizeof(short));
753 s += sizeof(short);
fa8ec7c1
NC
754 if (checksum > bits_in_uv)
755 cdouble += (NV)ashort;
756 else
757 culong += ashort;
a6ec74c1
JH
758
759 }
760 }
761 else
762#endif
763 {
764 while (len-- > 0) {
765 COPY16(s, &ashort);
766#if SHORTSIZE > SIZE16
767 if (ashort > 32767)
768 ashort -= 65536;
769#endif
770 s += SIZE16;
fa8ec7c1
NC
771 if (checksum > bits_in_uv)
772 cdouble += (NV)ashort;
773 else
774 culong += ashort;
a6ec74c1
JH
775 }
776 }
777 }
778 else {
779 EXTEND(SP, len);
780 EXTEND_MORTAL(len);
781#if SHORTSIZE != SIZE16
782 if (natint) {
783 short ashort;
784 while (len-- > 0) {
785 COPYNN(s, &ashort, sizeof(short));
786 s += sizeof(short);
787 sv = NEWSV(38, 0);
788 sv_setiv(sv, (IV)ashort);
789 PUSHs(sv_2mortal(sv));
790 }
791 }
792 else
793#endif
794 {
795 while (len-- > 0) {
796 COPY16(s, &ashort);
797#if SHORTSIZE > SIZE16
798 if (ashort > 32767)
799 ashort -= 65536;
800#endif
801 s += SIZE16;
802 sv = NEWSV(38, 0);
803 sv_setiv(sv, (IV)ashort);
804 PUSHs(sv_2mortal(sv));
805 }
806 }
807 }
808 break;
809 case 'v':
810 case 'n':
811 case 'S':
812#if SHORTSIZE == SIZE16
813 along = (strend - s) / SIZE16;
814#else
815 unatint = natint && datumtype == 'S';
816 along = (strend - s) / (unatint ? sizeof(unsigned short) : SIZE16);
817#endif
818 if (len > along)
819 len = along;
820 if (checksum) {
821#if SHORTSIZE != SIZE16
822 if (unatint) {
823 unsigned short aushort;
824 while (len-- > 0) {
825 COPYNN(s, &aushort, sizeof(unsigned short));
826 s += sizeof(unsigned short);
fa8ec7c1
NC
827 if (checksum > bits_in_uv)
828 cdouble += (NV)aushort;
829 else
830 culong += aushort;
a6ec74c1
JH
831 }
832 }
833 else
834#endif
835 {
836 while (len-- > 0) {
837 COPY16(s, &aushort);
838 s += SIZE16;
839#ifdef HAS_NTOHS
840 if (datumtype == 'n')
841 aushort = PerlSock_ntohs(aushort);
842#endif
843#ifdef HAS_VTOHS
844 if (datumtype == 'v')
845 aushort = vtohs(aushort);
846#endif
fa8ec7c1
NC
847 if (checksum > bits_in_uv)
848 cdouble += (NV)aushort;
849 else
850 culong += aushort;
a6ec74c1
JH
851 }
852 }
853 }
854 else {
855 EXTEND(SP, len);
856 EXTEND_MORTAL(len);
857#if SHORTSIZE != SIZE16
858 if (unatint) {
859 unsigned short aushort;
860 while (len-- > 0) {
861 COPYNN(s, &aushort, sizeof(unsigned short));
862 s += sizeof(unsigned short);
863 sv = NEWSV(39, 0);
864 sv_setiv(sv, (UV)aushort);
865 PUSHs(sv_2mortal(sv));
866 }
867 }
868 else
869#endif
870 {
871 while (len-- > 0) {
872 COPY16(s, &aushort);
873 s += SIZE16;
874 sv = NEWSV(39, 0);
875#ifdef HAS_NTOHS
876 if (datumtype == 'n')
877 aushort = PerlSock_ntohs(aushort);
878#endif
879#ifdef HAS_VTOHS
880 if (datumtype == 'v')
881 aushort = vtohs(aushort);
882#endif
883 sv_setiv(sv, (UV)aushort);
884 PUSHs(sv_2mortal(sv));
885 }
886 }
887 }
888 break;
889 case 'i':
890 along = (strend - s) / sizeof(int);
891 if (len > along)
892 len = along;
893 if (checksum) {
894 while (len-- > 0) {
895 Copy(s, &aint, 1, int);
896 s += sizeof(int);
fa8ec7c1 897 if (checksum > bits_in_uv)
a6ec74c1
JH
898 cdouble += (NV)aint;
899 else
900 culong += aint;
901 }
902 }
903 else {
904 EXTEND(SP, len);
905 EXTEND_MORTAL(len);
906 while (len-- > 0) {
907 Copy(s, &aint, 1, int);
908 s += sizeof(int);
909 sv = NEWSV(40, 0);
910#ifdef __osf__
911 /* Without the dummy below unpack("i", pack("i",-1))
912 * return 0xFFffFFff instead of -1 for Digital Unix V4.0
913 * cc with optimization turned on.
914 *
915 * The bug was detected in
916 * DEC C V5.8-009 on Digital UNIX V4.0 (Rev. 1091) (V4.0E)
917 * with optimization (-O4) turned on.
918 * DEC C V5.2-040 on Digital UNIX V4.0 (Rev. 564) (V4.0B)
919 * does not have this problem even with -O4.
920 *
921 * This bug was reported as DECC_BUGS 1431
922 * and tracked internally as GEM_BUGS 7775.
923 *
924 * The bug is fixed in
925 * Tru64 UNIX V5.0: Compaq C V6.1-006 or later
926 * UNIX V4.0F support: DEC C V5.9-006 or later
927 * UNIX V4.0E support: DEC C V5.8-011 or later
928 * and also in DTK.
929 *
930 * See also few lines later for the same bug.
931 */
932 (aint) ?
933 sv_setiv(sv, (IV)aint) :
934#endif
935 sv_setiv(sv, (IV)aint);
936 PUSHs(sv_2mortal(sv));
937 }
938 }
939 break;
940 case 'I':
941 along = (strend - s) / sizeof(unsigned int);
942 if (len > along)
943 len = along;
944 if (checksum) {
945 while (len-- > 0) {
946 Copy(s, &auint, 1, unsigned int);
947 s += sizeof(unsigned int);
fa8ec7c1 948 if (checksum > bits_in_uv)
a6ec74c1
JH
949 cdouble += (NV)auint;
950 else
951 culong += auint;
952 }
953 }
954 else {
955 EXTEND(SP, len);
956 EXTEND_MORTAL(len);
957 while (len-- > 0) {
958 Copy(s, &auint, 1, unsigned int);
959 s += sizeof(unsigned int);
960 sv = NEWSV(41, 0);
961#ifdef __osf__
962 /* Without the dummy below unpack("I", pack("I",0xFFFFFFFF))
963 * returns 1.84467440737096e+19 instead of 0xFFFFFFFF.
964 * See details few lines earlier. */
965 (auint) ?
966 sv_setuv(sv, (UV)auint) :
967#endif
968 sv_setuv(sv, (UV)auint);
969 PUSHs(sv_2mortal(sv));
970 }
971 }
972 break;
973 case 'l':
974#if LONGSIZE == SIZE32
975 along = (strend - s) / SIZE32;
976#else
977 along = (strend - s) / (natint ? sizeof(long) : SIZE32);
978#endif
979 if (len > along)
980 len = along;
981 if (checksum) {
982#if LONGSIZE != SIZE32
983 if (natint) {
984 while (len-- > 0) {
985 COPYNN(s, &along, sizeof(long));
986 s += sizeof(long);
fa8ec7c1 987 if (checksum > bits_in_uv)
a6ec74c1
JH
988 cdouble += (NV)along;
989 else
990 culong += along;
991 }
992 }
993 else
994#endif
995 {
996 while (len-- > 0) {
997#if LONGSIZE > SIZE32 && INTSIZE == SIZE32
998 I32 along;
999#endif
1000 COPY32(s, &along);
1001#if LONGSIZE > SIZE32
1002 if (along > 2147483647)
1003 along -= 4294967296;
1004#endif
1005 s += SIZE32;
fa8ec7c1 1006 if (checksum > bits_in_uv)
a6ec74c1
JH
1007 cdouble += (NV)along;
1008 else
1009 culong += along;
1010 }
1011 }
1012 }
1013 else {
1014 EXTEND(SP, len);
1015 EXTEND_MORTAL(len);
1016#if LONGSIZE != SIZE32
1017 if (natint) {
1018 while (len-- > 0) {
1019 COPYNN(s, &along, sizeof(long));
1020 s += sizeof(long);
1021 sv = NEWSV(42, 0);
1022 sv_setiv(sv, (IV)along);
1023 PUSHs(sv_2mortal(sv));
1024 }
1025 }
1026 else
1027#endif
1028 {
1029 while (len-- > 0) {
1030#if LONGSIZE > SIZE32 && INTSIZE == SIZE32
1031 I32 along;
1032#endif
1033 COPY32(s, &along);
1034#if LONGSIZE > SIZE32
1035 if (along > 2147483647)
1036 along -= 4294967296;
1037#endif
1038 s += SIZE32;
1039 sv = NEWSV(42, 0);
1040 sv_setiv(sv, (IV)along);
1041 PUSHs(sv_2mortal(sv));
1042 }
1043 }
1044 }
1045 break;
1046 case 'V':
1047 case 'N':
1048 case 'L':
1049#if LONGSIZE == SIZE32
1050 along = (strend - s) / SIZE32;
1051#else
1052 unatint = natint && datumtype == 'L';
1053 along = (strend - s) / (unatint ? sizeof(unsigned long) : SIZE32);
1054#endif
1055 if (len > along)
1056 len = along;
1057 if (checksum) {
1058#if LONGSIZE != SIZE32
1059 if (unatint) {
1060 unsigned long aulong;
1061 while (len-- > 0) {
1062 COPYNN(s, &aulong, sizeof(unsigned long));
1063 s += sizeof(unsigned long);
fa8ec7c1 1064 if (checksum > bits_in_uv)
a6ec74c1
JH
1065 cdouble += (NV)aulong;
1066 else
1067 culong += aulong;
1068 }
1069 }
1070 else
1071#endif
1072 {
1073 while (len-- > 0) {
1074 COPY32(s, &aulong);
1075 s += SIZE32;
1076#ifdef HAS_NTOHL
1077 if (datumtype == 'N')
1078 aulong = PerlSock_ntohl(aulong);
1079#endif
1080#ifdef HAS_VTOHL
1081 if (datumtype == 'V')
1082 aulong = vtohl(aulong);
1083#endif
fa8ec7c1 1084 if (checksum > bits_in_uv)
a6ec74c1
JH
1085 cdouble += (NV)aulong;
1086 else
1087 culong += aulong;
1088 }
1089 }
1090 }
1091 else {
1092 EXTEND(SP, len);
1093 EXTEND_MORTAL(len);
1094#if LONGSIZE != SIZE32
1095 if (unatint) {
1096 unsigned long aulong;
1097 while (len-- > 0) {
1098 COPYNN(s, &aulong, sizeof(unsigned long));
1099 s += sizeof(unsigned long);
1100 sv = NEWSV(43, 0);
1101 sv_setuv(sv, (UV)aulong);
1102 PUSHs(sv_2mortal(sv));
1103 }
1104 }
1105 else
1106#endif
1107 {
1108 while (len-- > 0) {
1109 COPY32(s, &aulong);
1110 s += SIZE32;
1111#ifdef HAS_NTOHL
1112 if (datumtype == 'N')
1113 aulong = PerlSock_ntohl(aulong);
1114#endif
1115#ifdef HAS_VTOHL
1116 if (datumtype == 'V')
1117 aulong = vtohl(aulong);
1118#endif
1119 sv = NEWSV(43, 0);
1120 sv_setuv(sv, (UV)aulong);
1121 PUSHs(sv_2mortal(sv));
1122 }
1123 }
1124 }
1125 break;
1126 case 'p':
1127 along = (strend - s) / sizeof(char*);
1128 if (len > along)
1129 len = along;
1130 EXTEND(SP, len);
1131 EXTEND_MORTAL(len);
1132 while (len-- > 0) {
1133 if (sizeof(char*) > strend - s)
1134 break;
1135 else {
1136 Copy(s, &aptr, 1, char*);
1137 s += sizeof(char*);
1138 }
1139 sv = NEWSV(44, 0);
1140 if (aptr)
1141 sv_setpv(sv, aptr);
1142 PUSHs(sv_2mortal(sv));
1143 }
1144 break;
1145 case 'w':
1146 EXTEND(SP, len);
1147 EXTEND_MORTAL(len);
1148 {
1149 UV auv = 0;
1150 U32 bytes = 0;
1151
1152 while ((len > 0) && (s < strend)) {
1153 auv = (auv << 7) | (*s & 0x7f);
1154 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1155 if ((U8)(*s++) < 0x80) {
1156 bytes = 0;
1157 sv = NEWSV(40, 0);
1158 sv_setuv(sv, auv);
1159 PUSHs(sv_2mortal(sv));
1160 len--;
1161 auv = 0;
1162 }
1163 else if (++bytes >= sizeof(UV)) { /* promote to string */
1164 char *t;
1165 STRLEN n_a;
1166
1167 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1168 while (s < strend) {
1169 sv = mul128(sv, *s & 0x7f);
1170 if (!(*s++ & 0x80)) {
1171 bytes = 0;
1172 break;
1173 }
1174 }
1175 t = SvPV(sv, n_a);
1176 while (*t == '0')
1177 t++;
1178 sv_chop(sv, t);
1179 PUSHs(sv_2mortal(sv));
1180 len--;
1181 auv = 0;
1182 }
1183 }
1184 if ((s >= strend) && bytes)
18529408 1185 croak("Unterminated compressed integer");
a6ec74c1
JH
1186 }
1187 break;
1188 case 'P':
18529408
IZ
1189 if (star > 0)
1190 croak("P must have an explicit size");
a6ec74c1
JH
1191 EXTEND(SP, 1);
1192 if (sizeof(char*) > strend - s)
1193 break;
1194 else {
1195 Copy(s, &aptr, 1, char*);
1196 s += sizeof(char*);
1197 }
1198 sv = NEWSV(44, 0);
1199 if (aptr)
1200 sv_setpvn(sv, aptr, len);
1201 PUSHs(sv_2mortal(sv));
1202 break;
1203#ifdef HAS_QUAD
1204 case 'q':
1205 along = (strend - s) / sizeof(Quad_t);
1206 if (len > along)
1207 len = along;
fa8ec7c1
NC
1208 if (checksum) {
1209 while (len-- > 0) {
a6ec74c1
JH
1210 Copy(s, &aquad, 1, Quad_t);
1211 s += sizeof(Quad_t);
fa8ec7c1
NC
1212 if (checksum > bits_in_uv)
1213 cdouble += (NV)aquad;
1214 else
1215 culong += aquad;
a6ec74c1 1216 }
a6ec74c1 1217 }
fa8ec7c1
NC
1218 else {
1219 EXTEND(SP, len);
1220 EXTEND_MORTAL(len);
1221 while (len-- > 0) {
1222 if (s + sizeof(Quad_t) > strend)
1223 aquad = 0;
1224 else {
1225 Copy(s, &aquad, 1, Quad_t);
1226 s += sizeof(Quad_t);
1227 }
1228 sv = NEWSV(42, 0);
1229 if (aquad >= IV_MIN && aquad <= IV_MAX)
1230 sv_setiv(sv, (IV)aquad);
1231 else
1232 sv_setnv(sv, (NV)aquad);
1233 PUSHs(sv_2mortal(sv));
1234 }
1235 }
a6ec74c1
JH
1236 break;
1237 case 'Q':
206947d2 1238 along = (strend - s) / sizeof(Uquad_t);
a6ec74c1
JH
1239 if (len > along)
1240 len = along;
fa8ec7c1
NC
1241 if (checksum) {
1242 while (len-- > 0) {
a6ec74c1
JH
1243 Copy(s, &auquad, 1, Uquad_t);
1244 s += sizeof(Uquad_t);
fa8ec7c1
NC
1245 if (checksum > bits_in_uv)
1246 cdouble += (NV)auquad;
1247 else
1248 culong += auquad;
a6ec74c1 1249 }
a6ec74c1 1250 }
fa8ec7c1
NC
1251 else {
1252 EXTEND(SP, len);
1253 EXTEND_MORTAL(len);
1254 while (len-- > 0) {
1255 if (s + sizeof(Uquad_t) > strend)
1256 auquad = 0;
1257 else {
1258 Copy(s, &auquad, 1, Uquad_t);
1259 s += sizeof(Uquad_t);
1260 }
1261 sv = NEWSV(43, 0);
1262 if (auquad <= UV_MAX)
1263 sv_setuv(sv, (UV)auquad);
1264 else
1265 sv_setnv(sv, (NV)auquad);
1266 PUSHs(sv_2mortal(sv));
1267 }
1268 }
a6ec74c1
JH
1269 break;
1270#endif
1271 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1272 case 'f':
1273 case 'F':
1274 along = (strend - s) / sizeof(float);
1275 if (len > along)
1276 len = along;
1277 if (checksum) {
1278 while (len-- > 0) {
1279 Copy(s, &afloat, 1, float);
1280 s += sizeof(float);
1281 cdouble += afloat;
1282 }
1283 }
1284 else {
1285 EXTEND(SP, len);
1286 EXTEND_MORTAL(len);
1287 while (len-- > 0) {
1288 Copy(s, &afloat, 1, float);
1289 s += sizeof(float);
1290 sv = NEWSV(47, 0);
1291 sv_setnv(sv, (NV)afloat);
1292 PUSHs(sv_2mortal(sv));
1293 }
1294 }
1295 break;
1296 case 'd':
1297 case 'D':
1298 along = (strend - s) / sizeof(double);
1299 if (len > along)
1300 len = along;
1301 if (checksum) {
1302 while (len-- > 0) {
1303 Copy(s, &adouble, 1, double);
1304 s += sizeof(double);
1305 cdouble += adouble;
1306 }
1307 }
1308 else {
1309 EXTEND(SP, len);
1310 EXTEND_MORTAL(len);
1311 while (len-- > 0) {
1312 Copy(s, &adouble, 1, double);
1313 s += sizeof(double);
1314 sv = NEWSV(48, 0);
1315 sv_setnv(sv, (NV)adouble);
1316 PUSHs(sv_2mortal(sv));
1317 }
1318 }
1319 break;
1320 case 'u':
1321 /* MKS:
1322 * Initialise the decode mapping. By using a table driven
1323 * algorithm, the code will be character-set independent
1324 * (and just as fast as doing character arithmetic)
1325 */
1326 if (PL_uudmap['M'] == 0) {
1327 int i;
1328
1329 for (i = 0; i < sizeof(PL_uuemap); i += 1)
1330 PL_uudmap[(U8)PL_uuemap[i]] = i;
1331 /*
1332 * Because ' ' and '`' map to the same value,
1333 * we need to decode them both the same.
1334 */
1335 PL_uudmap[' '] = 0;
1336 }
1337
1338 along = (strend - s) * 3 / 4;
1339 sv = NEWSV(42, along);
1340 if (along)
1341 SvPOK_on(sv);
1342 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1343 I32 a, b, c, d;
1344 char hunk[4];
1345
1346 hunk[3] = '\0';
1347 len = PL_uudmap[*(U8*)s++] & 077;
1348 while (len > 0) {
1349 if (s < strend && ISUUCHAR(*s))
1350 a = PL_uudmap[*(U8*)s++] & 077;
1351 else
1352 a = 0;
1353 if (s < strend && ISUUCHAR(*s))
1354 b = PL_uudmap[*(U8*)s++] & 077;
1355 else
1356 b = 0;
1357 if (s < strend && ISUUCHAR(*s))
1358 c = PL_uudmap[*(U8*)s++] & 077;
1359 else
1360 c = 0;
1361 if (s < strend && ISUUCHAR(*s))
1362 d = PL_uudmap[*(U8*)s++] & 077;
1363 else
1364 d = 0;
1365 hunk[0] = (a << 2) | (b >> 4);
1366 hunk[1] = (b << 4) | (c >> 2);
1367 hunk[2] = (c << 6) | d;
1368 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1369 len -= 3;
1370 }
1371 if (*s == '\n')
1372 s++;
1373 else if (s[1] == '\n') /* possible checksum byte */
1374 s += 2;
1375 }
1376 XPUSHs(sv_2mortal(sv));
1377 break;
1378 }
1379 if (checksum) {
1380 sv = NEWSV(42, 0);
1381 if (strchr("fFdD", datumtype) ||
fa8ec7c1 1382 (checksum > bits_in_uv && strchr("csSiIlLnNUvVqQ", datumtype)) ) {
a6ec74c1
JH
1383 NV trouble;
1384
fa8ec7c1 1385 adouble = (NV) (1 << (checksum & 15));
a6ec74c1
JH
1386 while (checksum >= 16) {
1387 checksum -= 16;
1388 adouble *= 65536.0;
1389 }
a6ec74c1
JH
1390 while (cdouble < 0.0)
1391 cdouble += adouble;
1392 cdouble = Perl_modf(cdouble / adouble, &trouble) * adouble;
1393 sv_setnv(sv, cdouble);
1394 }
1395 else {
fa8ec7c1
NC
1396 if (checksum < bits_in_uv) {
1397 UV mask = ((UV)1 << checksum) - 1;
1398 culong &= mask;
a6ec74c1
JH
1399 }
1400 sv_setuv(sv, (UV)culong);
1401 }
1402 XPUSHs(sv_2mortal(sv));
1403 checksum = 0;
1404 }
1405 }
18529408
IZ
1406 if (new_s)
1407 *new_s = s;
1408 PUTBACK;
1409 return SP - PL_stack_base - start_sp_offset;
1410}
1411
1412PP(pp_unpack)
1413{
1414 dSP;
1415 dPOPPOPssrl;
1416 I32 gimme = GIMME_V;
1417 STRLEN llen;
1418 STRLEN rlen;
1419 register char *pat = SvPV(left, llen);
1420#ifdef PACKED_IS_OCTETS
1421 /* Packed side is assumed to be octets - so force downgrade if it
1422 has been UTF-8 encoded by accident
1423 */
1424 register char *s = SvPVbyte(right, rlen);
1425#else
1426 register char *s = SvPV(right, rlen);
1427#endif
1428 char *strend = s + rlen;
1429 register char *patend = pat + llen;
1430 register I32 cnt;
1431
1432 PUTBACK;
1433 cnt = unpack_str(pat, patend, s, s, strend, NULL, 0,
1434 ((gimme == G_SCALAR) ? UNPACK_ONLY_ONE : 0)
1435 | (DO_UTF8(right) ? UNPACK_DO_UTF8 : 0));
1436 SPAGAIN;
1437 if ( !cnt && gimme == G_SCALAR )
1438 PUSHs(&PL_sv_undef);
a6ec74c1
JH
1439 RETURN;
1440}
1441
1442STATIC void
1443S_doencodes(pTHX_ register SV *sv, register char *s, register I32 len)
1444{
1445 char hunk[5];
1446
1447 *hunk = PL_uuemap[len];
1448 sv_catpvn(sv, hunk, 1);
1449 hunk[4] = '\0';
1450 while (len > 2) {
1451 hunk[0] = PL_uuemap[(077 & (*s >> 2))];
1452 hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017)))];
1453 hunk[2] = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1454 hunk[3] = PL_uuemap[(077 & (s[2] & 077))];
1455 sv_catpvn(sv, hunk, 4);
1456 s += 3;
1457 len -= 3;
1458 }
1459 if (len > 0) {
1460 char r = (len > 1 ? s[1] : '\0');
1461 hunk[0] = PL_uuemap[(077 & (*s >> 2))];
1462 hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((r >> 4) & 017)))];
1463 hunk[2] = PL_uuemap[(077 & ((r << 2) & 074))];
1464 hunk[3] = PL_uuemap[0];
1465 sv_catpvn(sv, hunk, 4);
1466 }
1467 sv_catpvn(sv, "\n", 1);
1468}
1469
1470STATIC SV *
1471S_is_an_int(pTHX_ char *s, STRLEN l)
1472{
1473 STRLEN n_a;
1474 SV *result = newSVpvn(s, l);
1475 char *result_c = SvPV(result, n_a); /* convenience */
1476 char *out = result_c;
1477 bool skip = 1;
1478 bool ignore = 0;
1479
1480 while (*s) {
1481 switch (*s) {
1482 case ' ':
1483 break;
1484 case '+':
1485 if (!skip) {
1486 SvREFCNT_dec(result);
1487 return (NULL);
1488 }
1489 break;
1490 case '0':
1491 case '1':
1492 case '2':
1493 case '3':
1494 case '4':
1495 case '5':
1496 case '6':
1497 case '7':
1498 case '8':
1499 case '9':
1500 skip = 0;
1501 if (!ignore) {
1502 *(out++) = *s;
1503 }
1504 break;
1505 case '.':
1506 ignore = 1;
1507 break;
1508 default:
1509 SvREFCNT_dec(result);
1510 return (NULL);
1511 }
1512 s++;
1513 }
1514 *(out++) = '\0';
1515 SvCUR_set(result, out - result_c);
1516 return (result);
1517}
1518
1519/* pnum must be '\0' terminated */
1520STATIC int
1521S_div128(pTHX_ SV *pnum, bool *done)
1522{
1523 STRLEN len;
1524 char *s = SvPV(pnum, len);
1525 int m = 0;
1526 int r = 0;
1527 char *t = s;
1528
1529 *done = 1;
1530 while (*t) {
1531 int i;
1532
1533 i = m * 10 + (*t - '0');
1534 m = i & 0x7F;
1535 r = (i >> 7); /* r < 10 */
1536 if (r) {
1537 *done = 0;
1538 }
1539 *(t++) = '0' + r;
1540 }
1541 *(t++) = '\0';
1542 SvCUR_set(pnum, (STRLEN) (t - s));
1543 return (m);
1544}
1545
18529408 1546#define PACK_CHILD 0x1
a6ec74c1 1547
18529408
IZ
1548/*
1549=for apidoc pack_cat
1550
1551The engine implementing pack() Perl function.
1552
1553=cut */
1554
1555void
1556Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 1557{
a6ec74c1
JH
1558 register I32 items;
1559 STRLEN fromlen;
a6ec74c1
JH
1560 register I32 len;
1561 I32 datumtype;
1562 SV *fromstr;
1563 /*SUPPRESS 442*/
1564 static char null10[] = {0,0,0,0,0,0,0,0,0,0};
1565 static char *space10 = " ";
18529408 1566 int star;
a6ec74c1
JH
1567
1568 /* These must not be in registers: */
1569 char achar;
1570 I16 ashort;
1571 int aint;
1572 unsigned int auint;
1573 I32 along;
1574 U32 aulong;
1575#ifdef HAS_QUAD
1576 Quad_t aquad;
1577 Uquad_t auquad;
1578#endif
1579 char *aptr;
1580 float afloat;
1581 double adouble;
1582 int commas = 0;
1583#ifdef PERL_NATINT_PACK
1584 int natint; /* native integer */
1585#endif
1586
18529408
IZ
1587 items = endlist - beglist;
1588#ifndef PACKED_IS_OCTETS
1589 pat = next_symbol(pat, patend);
1590 if (pat < patend && *pat == 'U' && !flags)
1591 SvUTF8_on(cat);
1592#endif
1593 while ((pat = next_symbol(pat, patend)) < patend) {
a6ec74c1 1594 SV *lengthcode = Nullsv;
18529408 1595#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
a6ec74c1
JH
1596 datumtype = *pat++ & 0xFF;
1597#ifdef PERL_NATINT_PACK
1598 natint = 0;
1599#endif
a6ec74c1 1600 if (*pat == '!') {
206947d2 1601 static const char natstr[] = "sSiIlL";
a6ec74c1
JH
1602
1603 if (strchr(natstr, datumtype)) {
1604#ifdef PERL_NATINT_PACK
1605 natint = 1;
1606#endif
1607 pat++;
1608 }
1609 else
18529408 1610 croak("'!' allowed only after types %s", natstr);
a6ec74c1 1611 }
18529408
IZ
1612 len = find_count(&pat, patend, &star);
1613 if (star > 0) /* Count is '*' */
a6ec74c1 1614 len = strchr("@Xxu", datumtype) ? 0 : items;
18529408 1615 else if (star < 0) /* Default len */
a6ec74c1 1616 len = 1;
18529408 1617 if (*pat == '/') { /* doing lookahead how... */
a6ec74c1
JH
1618 ++pat;
1619 if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*')
18529408 1620 croak("/ must be followed by a*, A* or Z*");
a6ec74c1 1621 lengthcode = sv_2mortal(newSViv(sv_len(items > 0
18529408 1622 ? *beglist : &PL_sv_no)
a6ec74c1
JH
1623 + (*pat == 'Z' ? 1 : 0)));
1624 }
1625 switch(datumtype) {
1626 default:
18529408 1627 croak("Invalid type in pack: '%c'", (int)datumtype);
a6ec74c1
JH
1628 case ',': /* grandfather in commas but with a warning */
1629 if (commas++ == 0 && ckWARN(WARN_PACK))
1630 Perl_warner(aTHX_ WARN_PACK,
1631 "Invalid type in pack: '%c'", (int)datumtype);
1632 break;
1633 case '%':
18529408 1634 croak("%% may only be used in unpack");
a6ec74c1
JH
1635 case '@':
1636 len -= SvCUR(cat);
1637 if (len > 0)
1638 goto grow;
1639 len = -len;
1640 if (len > 0)
1641 goto shrink;
1642 break;
18529408
IZ
1643 case '(':
1644 {
1645 char *beg = pat;
1646 SV **savebeglist = beglist; /* beglist de-register-ed */
1647
1648 if (star >= 0)
1649 croak("()-group starts with a count");
1650 aptr = group_end(beg, patend, ')');
1651 pat = aptr + 1;
1652 if (star != -2) {
1653 len = find_count(&pat, patend, &star);
1654 if (star < 0) /* No count */
1655 len = 1;
1656 else if (star > 0) /* Star */
1657 len = items; /* long enough? */
1658 }
1659 while (len--) {
1660 pack_cat(cat, beg, aptr, savebeglist, endlist,
1661 &savebeglist, PACK_CHILD);
1662 if (star > 0 && savebeglist == endlist)
1663 break; /* No way to continue */
1664 }
1665 beglist = savebeglist;
1666 break;
1667 }
a6ec74c1
JH
1668 case 'X':
1669 shrink:
1670 if (SvCUR(cat) < len)
18529408 1671 croak("X outside of string");
a6ec74c1
JH
1672 SvCUR(cat) -= len;
1673 *SvEND(cat) = '\0';
1674 break;
1675 case 'x':
1676 grow:
1677 while (len >= 10) {
1678 sv_catpvn(cat, null10, 10);
1679 len -= 10;
1680 }
1681 sv_catpvn(cat, null10, len);
1682 break;
1683 case 'A':
1684 case 'Z':
1685 case 'a':
1686 fromstr = NEXTFROM;
1687 aptr = SvPV(fromstr, fromlen);
18529408 1688 if (star > 0) { /* -2 after '/' */
a6ec74c1
JH
1689 len = fromlen;
1690 if (datumtype == 'Z')
1691 ++len;
1692 }
1693 if (fromlen >= len) {
1694 sv_catpvn(cat, aptr, len);
1695 if (datumtype == 'Z')
1696 *(SvEND(cat)-1) = '\0';
1697 }
1698 else {
1699 sv_catpvn(cat, aptr, fromlen);
1700 len -= fromlen;
1701 if (datumtype == 'A') {
1702 while (len >= 10) {
1703 sv_catpvn(cat, space10, 10);
1704 len -= 10;
1705 }
1706 sv_catpvn(cat, space10, len);
1707 }
1708 else {
1709 while (len >= 10) {
1710 sv_catpvn(cat, null10, 10);
1711 len -= 10;
1712 }
1713 sv_catpvn(cat, null10, len);
1714 }
1715 }
1716 break;
1717 case 'B':
1718 case 'b':
1719 {
1720 register char *str;
1721 I32 saveitems;
1722
1723 fromstr = NEXTFROM;
1724 saveitems = items;
1725 str = SvPV(fromstr, fromlen);
18529408 1726 if (star > 0)
a6ec74c1
JH
1727 len = fromlen;
1728 aint = SvCUR(cat);
1729 SvCUR(cat) += (len+7)/8;
1730 SvGROW(cat, SvCUR(cat) + 1);
1731 aptr = SvPVX(cat) + aint;
1732 if (len > fromlen)
1733 len = fromlen;
1734 aint = len;
1735 items = 0;
1736 if (datumtype == 'B') {
1737 for (len = 0; len++ < aint;) {
1738 items |= *str++ & 1;
1739 if (len & 7)
1740 items <<= 1;
1741 else {
1742 *aptr++ = items & 0xff;
1743 items = 0;
1744 }
1745 }
1746 }
1747 else {
1748 for (len = 0; len++ < aint;) {
1749 if (*str++ & 1)
1750 items |= 128;
1751 if (len & 7)
1752 items >>= 1;
1753 else {
1754 *aptr++ = items & 0xff;
1755 items = 0;
1756 }
1757 }
1758 }
1759 if (aint & 7) {
1760 if (datumtype == 'B')
1761 items <<= 7 - (aint & 7);
1762 else
1763 items >>= 7 - (aint & 7);
1764 *aptr++ = items & 0xff;
1765 }
1766 str = SvPVX(cat) + SvCUR(cat);
1767 while (aptr <= str)
1768 *aptr++ = '\0';
1769
1770 items = saveitems;
1771 }
1772 break;
1773 case 'H':
1774 case 'h':
1775 {
1776 register char *str;
1777 I32 saveitems;
1778
1779 fromstr = NEXTFROM;
1780 saveitems = items;
1781 str = SvPV(fromstr, fromlen);
18529408 1782 if (star > 0)
a6ec74c1
JH
1783 len = fromlen;
1784 aint = SvCUR(cat);
1785 SvCUR(cat) += (len+1)/2;
1786 SvGROW(cat, SvCUR(cat) + 1);
1787 aptr = SvPVX(cat) + aint;
1788 if (len > fromlen)
1789 len = fromlen;
1790 aint = len;
1791 items = 0;
1792 if (datumtype == 'H') {
1793 for (len = 0; len++ < aint;) {
1794 if (isALPHA(*str))
1795 items |= ((*str++ & 15) + 9) & 15;
1796 else
1797 items |= *str++ & 15;
1798 if (len & 1)
1799 items <<= 4;
1800 else {
1801 *aptr++ = items & 0xff;
1802 items = 0;
1803 }
1804 }
1805 }
1806 else {
1807 for (len = 0; len++ < aint;) {
1808 if (isALPHA(*str))
1809 items |= (((*str++ & 15) + 9) & 15) << 4;
1810 else
1811 items |= (*str++ & 15) << 4;
1812 if (len & 1)
1813 items >>= 4;
1814 else {
1815 *aptr++ = items & 0xff;
1816 items = 0;
1817 }
1818 }
1819 }
1820 if (aint & 1)
1821 *aptr++ = items & 0xff;
1822 str = SvPVX(cat) + SvCUR(cat);
1823 while (aptr <= str)
1824 *aptr++ = '\0';
1825
1826 items = saveitems;
1827 }
1828 break;
1829 case 'C':
1830 case 'c':
1831 while (len-- > 0) {
1832 fromstr = NEXTFROM;
1833 switch (datumtype) {
1834 case 'C':
1835 aint = SvIV(fromstr);
1836 if ((aint < 0 || aint > 255) &&
1837 ckWARN(WARN_PACK))
1838 Perl_warner(aTHX_ WARN_PACK,
1839 "Character in \"C\" format wrapped");
1840 achar = aint & 255;
1841 sv_catpvn(cat, &achar, sizeof(char));
1842 break;
1843 case 'c':
1844 aint = SvIV(fromstr);
1845 if ((aint < -128 || aint > 127) &&
1846 ckWARN(WARN_PACK))
1847 Perl_warner(aTHX_ WARN_PACK,
1848 "Character in \"c\" format wrapped");
1849 achar = aint & 255;
1850 sv_catpvn(cat, &achar, sizeof(char));
1851 break;
1852 }
1853 }
1854 break;
1855 case 'U':
1856 while (len-- > 0) {
1857 fromstr = NEXTFROM;
e87322b2 1858 auint = UNI_TO_NATIVE(SvUV(fromstr));
a6ec74c1
JH
1859 SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
1860 SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint)
1861 - SvPVX(cat));
1862 }
1863 *SvEND(cat) = '\0';
1864 break;
1865 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
1866 case 'f':
1867 case 'F':
1868 while (len-- > 0) {
1869 fromstr = NEXTFROM;
1870 afloat = (float)SvNV(fromstr);
1871 sv_catpvn(cat, (char *)&afloat, sizeof (float));
1872 }
1873 break;
1874 case 'd':
1875 case 'D':
1876 while (len-- > 0) {
1877 fromstr = NEXTFROM;
1878 adouble = (double)SvNV(fromstr);
1879 sv_catpvn(cat, (char *)&adouble, sizeof (double));
1880 }
1881 break;
1882 case 'n':
1883 while (len-- > 0) {
1884 fromstr = NEXTFROM;
1885 ashort = (I16)SvIV(fromstr);
1886#ifdef HAS_HTONS
1887 ashort = PerlSock_htons(ashort);
1888#endif
1889 CAT16(cat, &ashort);
1890 }
1891 break;
1892 case 'v':
1893 while (len-- > 0) {
1894 fromstr = NEXTFROM;
1895 ashort = (I16)SvIV(fromstr);
1896#ifdef HAS_HTOVS
1897 ashort = htovs(ashort);
1898#endif
1899 CAT16(cat, &ashort);
1900 }
1901 break;
1902 case 'S':
1903#if SHORTSIZE != SIZE16
1904 if (natint) {
1905 unsigned short aushort;
1906
1907 while (len-- > 0) {
1908 fromstr = NEXTFROM;
1909 aushort = SvUV(fromstr);
1910 sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short));
1911 }
1912 }
1913 else
1914#endif
1915 {
1916 U16 aushort;
1917
1918 while (len-- > 0) {
1919 fromstr = NEXTFROM;
1920 aushort = (U16)SvUV(fromstr);
1921 CAT16(cat, &aushort);
1922 }
1923
1924 }
1925 break;
1926 case 's':
1927#if SHORTSIZE != SIZE16
1928 if (natint) {
1929 short ashort;
1930
1931 while (len-- > 0) {
1932 fromstr = NEXTFROM;
1933 ashort = SvIV(fromstr);
1934 sv_catpvn(cat, (char *)&ashort, sizeof(short));
1935 }
1936 }
1937 else
1938#endif
1939 {
1940 while (len-- > 0) {
1941 fromstr = NEXTFROM;
1942 ashort = (I16)SvIV(fromstr);
1943 CAT16(cat, &ashort);
1944 }
1945 }
1946 break;
1947 case 'I':
1948 while (len-- > 0) {
1949 fromstr = NEXTFROM;
1950 auint = SvUV(fromstr);
1951 sv_catpvn(cat, (char*)&auint, sizeof(unsigned int));
1952 }
1953 break;
1954 case 'w':
1955 while (len-- > 0) {
1956 fromstr = NEXTFROM;
1957 adouble = Perl_floor(SvNV(fromstr));
1958
1959 if (adouble < 0)
18529408 1960 croak("Cannot compress negative numbers");
a6ec74c1
JH
1961
1962 if (
1963#if UVSIZE > 4 && UVSIZE >= NVSIZE
1964 adouble <= 0xffffffff
1965#else
1966# ifdef CXUX_BROKEN_CONSTANT_CONVERT
1967 adouble <= UV_MAX_cxux
1968# else
1969 adouble <= UV_MAX
1970# endif
1971#endif
1972 )
1973 {
1974 char buf[1 + sizeof(UV)];
1975 char *in = buf + sizeof(buf);
1976 UV auv = U_V(adouble);
1977
1978 do {
1979 *--in = (auv & 0x7f) | 0x80;
1980 auv >>= 7;
1981 } while (auv);
1982 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
1983 sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
1984 }
1985 else if (SvPOKp(fromstr)) { /* decimal string arithmetics */
1986 char *from, *result, *in;
1987 SV *norm;
1988 STRLEN len;
1989 bool done;
1990
1991 /* Copy string and check for compliance */
1992 from = SvPV(fromstr, len);
1993 if ((norm = is_an_int(from, len)) == NULL)
18529408 1994 croak("can compress only unsigned integer");
a6ec74c1
JH
1995
1996 New('w', result, len, char);
1997 in = result + len;
1998 done = FALSE;
1999 while (!done)
2000 *--in = div128(norm, &done) | 0x80;
2001 result[len - 1] &= 0x7F; /* clear continue bit */
2002 sv_catpvn(cat, in, (result + len) - in);
2003 Safefree(result);
2004 SvREFCNT_dec(norm); /* free norm */
2005 }
2006 else if (SvNOKp(fromstr)) {
2007 char buf[sizeof(double) * 2]; /* 8/7 <= 2 */
2008 char *in = buf + sizeof(buf);
2009
2010 do {
2011 double next = floor(adouble / 128);
2012 *--in = (unsigned char)(adouble - (next * 128)) | 0x80;
2013 if (in <= buf) /* this cannot happen ;-) */
18529408 2014 croak("Cannot compress integer");
a6ec74c1
JH
2015 adouble = next;
2016 } while (adouble > 0);
2017 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2018 sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
2019 }
735b914b
JH
2020 else {
2021 char *from, *result, *in;
2022 SV *norm;
2023 STRLEN len;
2024 bool done;
2025
2026 /* Copy string and check for compliance */
2027 from = SvPV(fromstr, len);
2028 if ((norm = is_an_int(from, len)) == NULL)
18529408 2029 croak("can compress only unsigned integer");
735b914b
JH
2030
2031 New('w', result, len, char);
2032 in = result + len;
2033 done = FALSE;
2034 while (!done)
2035 *--in = div128(norm, &done) | 0x80;
2036 result[len - 1] &= 0x7F; /* clear continue bit */
2037 sv_catpvn(cat, in, (result + len) - in);
2038 Safefree(result);
2039 SvREFCNT_dec(norm); /* free norm */
2040 }
a6ec74c1
JH
2041 }
2042 break;
2043 case 'i':
2044 while (len-- > 0) {
2045 fromstr = NEXTFROM;
2046 aint = SvIV(fromstr);
2047 sv_catpvn(cat, (char*)&aint, sizeof(int));
2048 }
2049 break;
2050 case 'N':
2051 while (len-- > 0) {
2052 fromstr = NEXTFROM;
2053 aulong = SvUV(fromstr);
2054#ifdef HAS_HTONL
2055 aulong = PerlSock_htonl(aulong);
2056#endif
2057 CAT32(cat, &aulong);
2058 }
2059 break;
2060 case 'V':
2061 while (len-- > 0) {
2062 fromstr = NEXTFROM;
2063 aulong = SvUV(fromstr);
2064#ifdef HAS_HTOVL
2065 aulong = htovl(aulong);
2066#endif
2067 CAT32(cat, &aulong);
2068 }
2069 break;
2070 case 'L':
2071#if LONGSIZE != SIZE32
2072 if (natint) {
2073 unsigned long aulong;
2074
2075 while (len-- > 0) {
2076 fromstr = NEXTFROM;
2077 aulong = SvUV(fromstr);
2078 sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long));
2079 }
2080 }
2081 else
2082#endif
2083 {
2084 while (len-- > 0) {
2085 fromstr = NEXTFROM;
2086 aulong = SvUV(fromstr);
2087 CAT32(cat, &aulong);
2088 }
2089 }
2090 break;
2091 case 'l':
2092#if LONGSIZE != SIZE32
2093 if (natint) {
2094 long along;
2095
2096 while (len-- > 0) {
2097 fromstr = NEXTFROM;
2098 along = SvIV(fromstr);
2099 sv_catpvn(cat, (char *)&along, sizeof(long));
2100 }
2101 }
2102 else
2103#endif
2104 {
2105 while (len-- > 0) {
2106 fromstr = NEXTFROM;
2107 along = SvIV(fromstr);
2108 CAT32(cat, &along);
2109 }
2110 }
2111 break;
2112#ifdef HAS_QUAD
2113 case 'Q':
2114 while (len-- > 0) {
2115 fromstr = NEXTFROM;
2116 auquad = (Uquad_t)SvUV(fromstr);
2117 sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t));
2118 }
2119 break;
2120 case 'q':
2121 while (len-- > 0) {
2122 fromstr = NEXTFROM;
2123 aquad = (Quad_t)SvIV(fromstr);
2124 sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t));
2125 }
2126 break;
2127#endif
2128 case 'P':
2129 len = 1; /* assume SV is correct length */
2130 /* FALL THROUGH */
2131 case 'p':
2132 while (len-- > 0) {
2133 fromstr = NEXTFROM;
2134 if (fromstr == &PL_sv_undef)
2135 aptr = NULL;
2136 else {
2137 STRLEN n_a;
2138 /* XXX better yet, could spirit away the string to
2139 * a safe spot and hang on to it until the result
2140 * of pack() (and all copies of the result) are
2141 * gone.
2142 */
2143 if (ckWARN(WARN_PACK) && (SvTEMP(fromstr)
2144 || (SvPADTMP(fromstr)
2145 && !SvREADONLY(fromstr))))
2146 {
2147 Perl_warner(aTHX_ WARN_PACK,
2148 "Attempt to pack pointer to temporary value");
2149 }
2150 if (SvPOK(fromstr) || SvNIOK(fromstr))
2151 aptr = SvPV(fromstr,n_a);
2152 else
2153 aptr = SvPV_force(fromstr,n_a);
2154 }
2155 sv_catpvn(cat, (char*)&aptr, sizeof(char*));
2156 }
2157 break;
2158 case 'u':
2159 fromstr = NEXTFROM;
2160 aptr = SvPV(fromstr, fromlen);
2161 SvGROW(cat, fromlen * 4 / 3);
19c9db5e 2162 if (len <= 2)
a6ec74c1
JH
2163 len = 45;
2164 else
2165 len = len / 3 * 3;
2166 while (fromlen > 0) {
2167 I32 todo;
2168
2169 if (fromlen > len)
2170 todo = len;
2171 else
2172 todo = fromlen;
2173 doencodes(cat, aptr, todo);
2174 fromlen -= todo;
2175 aptr += todo;
2176 }
2177 break;
2178 }
2179 }
18529408
IZ
2180 if (next_in_list)
2181 *next_in_list = beglist;
2182}
2183#undef NEXTFROM
2184
2185
2186PP(pp_pack)
2187{
2188 dSP; dMARK; dORIGMARK; dTARGET;
2189 register SV *cat = TARG;
2190 STRLEN fromlen;
2191 register char *pat = SvPVx(*++MARK, fromlen);
2192 register char *patend = pat + fromlen;
2193
2194 MARK++;
2195 sv_setpvn(cat, "", 0);
2196
2197 pack_cat(cat, pat, patend, MARK, SP + 1, NULL, 0);
2198
a6ec74c1
JH
2199 SvSETMAGIC(cat);
2200 SP = ORIGMARK;
2201 PUSHs(cat);
2202 RETURN;
2203}
a6ec74c1 2204