This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Minor AUTHORS patch
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e
NC
34#if PERL_VERSION >= 9
35#define PERL_PACK_CAN_BYTEORDER
36#define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
a6ec74c1 39/*
a6ec74c1
JH
40 * Offset for integer pack/unpack.
41 *
42 * On architectures where I16 and I32 aren't really 16 and 32 bits,
43 * which for now are all Crays, pack and unpack have to play games.
44 */
45
46/*
47 * These values are required for portability of pack() output.
48 * If they're not right on your machine, then pack() and unpack()
49 * wouldn't work right anyway; you'll need to apply the Cray hack.
50 * (I'd like to check them with #if, but you can't use sizeof() in
51 * the preprocessor.) --???
52 */
53/*
54 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
55 defines are now in config.h. --Andy Dougherty April 1998
56 */
57#define SIZE16 2
58#define SIZE32 4
59
60/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
61 --jhi Feb 1999 */
62
1109a392
MHM
63#if U16SIZE > SIZE16 || U32SIZE > SIZE32
64# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
65# define OFF16(p) ((char*)(p))
66# define OFF32(p) ((char*)(p))
a6ec74c1 67# else
1109a392 68# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
69# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
70# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
71# else
08ca2aa3 72 ++++ bad cray byte order
a6ec74c1
JH
73# endif
74# endif
a6ec74c1 75#else
08ca2aa3
TH
76# define OFF16(p) ((char *) (p))
77# define OFF32(p) ((char *) (p))
a6ec74c1
JH
78#endif
79
08ca2aa3
TH
80#define COPY16(s,p) Copy(s, OFF16(p), SIZE16, char)
81#define COPY32(s,p) Copy(s, OFF32(p), SIZE32, char)
82#define CAT16(sv,p) sv_catpvn(sv, OFF16(p), SIZE16)
83#define CAT32(sv,p) sv_catpvn(sv, OFF32(p), SIZE32)
84
85/* Only to be used inside a loop (see the break) */
86#define COPYVAR(s,strend,utf8,var,format) \
87STMT_START { \
88 if (utf8) { \
89 if (!next_uni_bytes(aTHX_ &s, strend, \
90 (char *) &var, sizeof(var))) break; \
91 } else { \
92 Copy(s, (char *) &var, sizeof(var), char); \
93 s += sizeof(var); \
94 } \
95 DO_BO_UNPACK(var, format); \
96} STMT_END
97
49704364
LW
98/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
99#define MAX_SUB_TEMPLATE_LEVEL 100
100
66c611c5 101/* flags (note that type modifiers can also be used as flags!) */
08ca2aa3
TH
102#define FLAG_UNPACK_WAS_UTF8 0x40 /* original had FLAG_UNPACK_DO_UTF8 */
103#define FLAG_UNPACK_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 104#define FLAG_UNPACK_ONLY_ONE 0x10
08ca2aa3 105#define FLAG_UNPACK_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
106#define FLAG_SLASH 0x04
107#define FLAG_COMMA 0x02
108#define FLAG_PACK 0x01
109
a6ec74c1
JH
110STATIC SV *
111S_mul128(pTHX_ SV *sv, U8 m)
112{
113 STRLEN len;
114 char *s = SvPV(sv, len);
115 char *t;
116 U32 i = 0;
117
118 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
119 SV *tmpNew = newSVpvn("0000000000", 10);
120
121 sv_catsv(tmpNew, sv);
122 SvREFCNT_dec(sv); /* free old sv */
123 sv = tmpNew;
124 s = SvPV(sv, len);
125 }
126 t = s + len - 1;
127 while (!*t) /* trailing '\0'? */
128 t--;
129 while (t > s) {
130 i = ((*t - '0') << 7) + m;
eb160463
GS
131 *(t--) = '0' + (char)(i % 10);
132 m = (char)(i / 10);
a6ec74c1
JH
133 }
134 return (sv);
135}
136
137/* Explosives and implosives. */
138
139#if 'I' == 73 && 'J' == 74
140/* On an ASCII/ISO kind of system */
141#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
142#else
143/*
144 Some other sort of character set - use memchr() so we don't match
145 the null byte.
146 */
147#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
148#endif
149
66c611c5 150/* type modifiers */
62f95557 151#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
152#define TYPE_IS_BIG_ENDIAN 0x200
153#define TYPE_IS_LITTLE_ENDIAN 0x400
154#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 155#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
156#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
157
7212898e
NC
158#ifdef PERL_PACK_CAN_SHRIEKSIGN
159#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
160#else
161#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
162#endif
163
164#ifndef PERL_PACK_CAN_BYTEORDER
165/* Put "can't" first because it is shorter */
166# define TYPE_ENDIANNESS(t) 0
167# define TYPE_NO_ENDIANNESS(t) (t)
168
169# define ENDIANNESS_ALLOWED_TYPES ""
170
171# define DO_BO_UNPACK(var, type)
172# define DO_BO_PACK(var, type)
173# define DO_BO_UNPACK_PTR(var, type, pre_cast)
174# define DO_BO_PACK_PTR(var, type, pre_cast)
175# define DO_BO_UNPACK_N(var, type)
176# define DO_BO_PACK_N(var, type)
177# define DO_BO_UNPACK_P(var)
178# define DO_BO_PACK_P(var)
66c611c5 179
7212898e
NC
180#else
181
182# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
183# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
184
185# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
186
187# define DO_BO_UNPACK(var, type) \
1109a392 188 STMT_START { \
66c611c5 189 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
190 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
191 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
192 default: break; \
193 } \
194 } STMT_END
195
7212898e 196# define DO_BO_PACK(var, type) \
1109a392 197 STMT_START { \
66c611c5 198 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
199 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
200 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
201 default: break; \
202 } \
203 } STMT_END
204
7212898e 205# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 206 STMT_START { \
66c611c5 207 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
208 case TYPE_IS_BIG_ENDIAN: \
209 var = (void *) my_betoh ## type ((pre_cast) var); \
210 break; \
211 case TYPE_IS_LITTLE_ENDIAN: \
212 var = (void *) my_letoh ## type ((pre_cast) var); \
213 break; \
214 default: \
215 break; \
216 } \
217 } STMT_END
218
7212898e 219# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 220 STMT_START { \
66c611c5 221 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
222 case TYPE_IS_BIG_ENDIAN: \
223 var = (void *) my_htobe ## type ((pre_cast) var); \
224 break; \
225 case TYPE_IS_LITTLE_ENDIAN: \
226 var = (void *) my_htole ## type ((pre_cast) var); \
227 break; \
228 default: \
229 break; \
230 } \
231 } STMT_END
232
7212898e 233# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
234 STMT_START { \
235 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
236 case TYPE_IS_BIG_ENDIAN: \
237 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
238 "platform", #action, #type); \
239 break; \
240 case TYPE_IS_LITTLE_ENDIAN: \
241 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
242 "platform", #action, #type); \
243 break; \
244 default: \
245 break; \
246 } \
247 } STMT_END
248
7212898e
NC
249# if PTRSIZE == INTSIZE
250# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
251# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
252# elif PTRSIZE == LONGSIZE
253# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
254# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
255# else
256# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
257# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
258# endif
1109a392 259
7212898e 260# if defined(my_htolen) && defined(my_letohn) && \
1109a392 261 defined(my_htoben) && defined(my_betohn)
7212898e 262# define DO_BO_UNPACK_N(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
266 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
267 default: break; \
268 } \
269 } STMT_END
270
7212898e 271# define DO_BO_PACK_N(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
275 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
276 default: break; \
277 } \
278 } STMT_END
7212898e
NC
279# else
280# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
281# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
282# endif
283
1109a392 284#endif
62f95557 285
78d46eaa 286#define PACK_SIZE_CANNOT_CSUM 0x80
a72c271b 287#define PACK_SIZE_SPARE 0x40
78d46eaa
NC
288#define PACK_SIZE_MASK 0x3F
289
290
291struct packsize_t {
58c0bd94 292 const unsigned char *array;
78d46eaa
NC
293 int first;
294 int size;
295};
296
297#define PACK_SIZE_NORMAL 0
298#define PACK_SIZE_SHRIEKING 1
299
300/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
301 in). You're unlikely ever to need to regenerate them. */
302#if 'J'-'I' == 1
303/* ASCII */
304unsigned char size_normal[53] = {
305 /* C */ sizeof(unsigned char),
80a13697 306#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
78d46eaa 307 /* D */ LONG_DOUBLESIZE,
80a13697
NC
308#else
309 0,
310#endif
78d46eaa
NC
311 0,
312 /* F */ NVSIZE,
313 0, 0,
314 /* I */ sizeof(unsigned int),
315 /* J */ UVSIZE,
316 0,
317 /* L */ SIZE32,
318 0,
319 /* N */ SIZE32,
320 0, 0,
80a13697 321#if defined(HAS_QUAD)
78d46eaa 322 /* Q */ sizeof(Uquad_t),
80a13697
NC
323#else
324 0,
325#endif
78d46eaa
NC
326 0,
327 /* S */ SIZE16,
328 0,
329 /* U */ sizeof(char),
330 /* V */ SIZE32,
08ca2aa3
TH
331 /* W */ sizeof(unsigned char),
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78d46eaa
NC
333 /* c */ sizeof(char),
334 /* d */ sizeof(double),
335 0,
336 /* f */ sizeof(float),
337 0, 0,
338 /* i */ sizeof(int),
339 /* j */ IVSIZE,
340 0,
341 /* l */ SIZE32,
342 0,
343 /* n */ SIZE16,
344 0,
a72c271b 345 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 346#if defined(HAS_QUAD)
78d46eaa 347 /* q */ sizeof(Quad_t),
80a13697
NC
348#else
349 0,
350#endif
78d46eaa
NC
351 0,
352 /* s */ SIZE16,
353 0, 0,
354 /* v */ SIZE16,
7212898e 355 /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
78d46eaa
NC
356};
357unsigned char size_shrieking[46] = {
358 /* I */ sizeof(unsigned int),
359 0, 0,
360 /* L */ sizeof(unsigned long),
361 0,
7212898e 362#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 363 /* N */ SIZE32,
7212898e
NC
364#else
365 0,
366#endif
78d46eaa
NC
367 0, 0, 0, 0,
368 /* S */ sizeof(unsigned short),
369 0, 0,
7212898e 370#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 371 /* V */ SIZE32,
7212898e
NC
372#else
373 0,
374#endif
78d46eaa
NC
375 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 /* i */ sizeof(int),
377 0, 0,
378 /* l */ sizeof(long),
379 0,
7212898e 380#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 381 /* n */ SIZE16,
7212898e
NC
382#else
383 0,
384#endif
78d46eaa
NC
385 0, 0, 0, 0,
386 /* s */ sizeof(short),
387 0, 0,
7212898e 388#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 389 /* v */ SIZE16
7212898e
NC
390#else
391 0
392#endif
78d46eaa
NC
393};
394struct packsize_t packsize[2] = {
395 {size_normal, 67, 53},
396 {size_shrieking, 73, 46}
397};
398#else
399/* EBCDIC (or bust) */
08ca2aa3 400unsigned char size_normal[100] = {
78d46eaa
NC
401 /* c */ sizeof(char),
402 /* d */ sizeof(double),
403 0,
404 /* f */ sizeof(float),
405 0, 0,
406 /* i */ sizeof(int),
407 0, 0, 0, 0, 0, 0, 0,
408 /* j */ IVSIZE,
409 0,
410 /* l */ SIZE32,
411 0,
412 /* n */ SIZE16,
413 0,
a72c271b 414 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 415#if defined(HAS_QUAD)
78d46eaa 416 /* q */ sizeof(Quad_t),
80a13697
NC
417#else
418 0,
419#endif
78d46eaa
NC
420 0, 0, 0, 0, 0, 0, 0, 0, 0,
421 /* s */ SIZE16,
422 0, 0,
423 /* v */ SIZE16,
424 /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
426 0, 0,
427 /* C */ sizeof(unsigned char),
80a13697 428#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
78d46eaa 429 /* D */ LONG_DOUBLESIZE,
80a13697
NC
430#else
431 0,
432#endif
78d46eaa
NC
433 0,
434 /* F */ NVSIZE,
435 0, 0,
436 /* I */ sizeof(unsigned int),
437 0, 0, 0, 0, 0, 0, 0,
438 /* J */ UVSIZE,
439 0,
440 /* L */ SIZE32,
441 0,
442 /* N */ SIZE32,
443 0, 0,
80a13697 444#if defined(HAS_QUAD)
78d46eaa 445 /* Q */ sizeof(Uquad_t),
80a13697
NC
446#else
447 0,
448#endif
78d46eaa
NC
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* S */ SIZE16,
451 0,
452 /* U */ sizeof(char),
7212898e 453 /* V */ SIZE32,
08ca2aa3 454 /* W */ sizeof(unsigned char),
78d46eaa
NC
455};
456unsigned char size_shrieking[93] = {
457 /* i */ sizeof(int),
458 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 /* l */ sizeof(long),
460 0,
7212898e 461#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 462 /* n */ SIZE16,
7212898e
NC
463#else
464 0,
465#endif
78d46eaa
NC
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 /* s */ sizeof(short),
468 0, 0,
7212898e 469#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 470 /* v */ SIZE16,
7212898e
NC
471#else
472 0,
473#endif
78d46eaa
NC
474 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 0, 0, 0, 0, 0, 0, 0, 0, 0,
476 /* I */ sizeof(unsigned int),
477 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 /* L */ sizeof(unsigned long),
479 0,
7212898e 480#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 481 /* N */ SIZE32,
7212898e
NC
482#else
483 0,
484#endif
78d46eaa
NC
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 /* S */ sizeof(unsigned short),
487 0, 0,
7212898e 488#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 489 /* V */ SIZE32
7212898e
NC
490#else
491 0
492#endif
78d46eaa
NC
493};
494struct packsize_t packsize[2] = {
08ca2aa3 495 {size_normal, 131, 100},
78d46eaa
NC
496 {size_shrieking, 137, 93}
497};
498#endif
499
08ca2aa3
TH
500STATIC U8
501next_uni_byte(pTHX_ char **s, const char *end, I32 datumtype)
502{
503 UV val;
504 STRLEN retlen;
505 val =
4be42731 506 UNI_TO_NATIVE(utf8n_to_uvuni((U8*)*s, end-*s, &retlen,
08ca2aa3
TH
507 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY));
508 /* We try to process malformed UTF-8 as much as possible (preferrably with
509 warnings), but these two mean we make no progress in the string and
510 might enter an infinite loop */
511 if (retlen == (STRLEN) -1 || retlen == 0)
512 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
513 if (val >= 0x100) {
514 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
515 "Character in '%c' format wrapped in unpack",
516 (int) datumtype);
517 val &= 0xff;
518 }
519 *s += retlen;
520 return val;
521}
522
523#define NEXT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
524 next_uni_byte(aTHX_ &(s), (strend), (datumtype)) : \
525 *(U8 *)(s)++)
526
527STATIC bool
528next_uni_bytes(pTHX_ char **s, char *end, char *buf, int buf_len)
529{
530 UV val;
531 STRLEN retlen;
532 char *from = *s;
533 int bad = 0;
534 U32 flags = ckWARN(WARN_UTF8) ?
535 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
536 for (;buf_len > 0; buf_len--) {
537 if (from >= end) return FALSE;
4be42731 538 val = UNI_TO_NATIVE(utf8n_to_uvuni((U8*)from, end-from, &retlen, flags));
08ca2aa3
TH
539 if (retlen == (STRLEN) -1 || retlen == 0) {
540 from += UTF8SKIP(from);
541 bad |= 1;
542 } else from += retlen;
543 if (val >= 0x100) {
544 bad |= 2;
545 val &= 0xff;
546 }
547 *(U8 *)buf++ = val;
548 }
549 /* We have enough characters for the buffer. Did we have problems ? */
550 if (bad) {
551 if (bad & 1) {
552 /* Rewalk the string fragment while warning */
553 char *ptr;
554 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
555 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
556 if (ptr >= end) break;
4be42731 557 utf8n_to_uvuni((U8*)ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
558 }
559 if (from > end) from = end;
560 }
561 if ((bad & 2) && ckWARN(WARN_UNPACK))
562 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
563 "Character(s) wrapped in unpack");
564 }
565 *s = from;
566 return TRUE;
567}
568
569STATIC bool
570next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
571{
572 UV val;
573 STRLEN retlen;
574 char *from = *s;
4be42731 575 val = UNI_TO_NATIVE(utf8n_to_uvuni((U8*)*s, end-*s, &retlen, UTF8_CHECK_ONLY));
08ca2aa3
TH
576 if (val >= 0x100 || !ISUUCHAR(val) ||
577 retlen == (STRLEN) -1 || retlen == 0) {
578 *out = 0;
579 return FALSE;
580 }
581 *out = PL_uudmap[val] & 077;
582 *s = from;
583 return TRUE;
584}
78d46eaa 585
206947d2 586/* Returns the sizeof() struct described by pat */
028d1f6d 587STATIC I32
49704364 588S_measure_struct(pTHX_ register tempsym_t* symptr)
206947d2 589{
49704364 590 register I32 len = 0;
206947d2 591 register I32 total = 0;
49704364
LW
592 int star;
593
206947d2
IZ
594 register int size;
595
49704364 596 while (next_symbol(symptr)) {
80a13697
NC
597 int which = (symptr->code & TYPE_IS_SHRIEKING)
598 ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
599 int offset
600 = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
206947d2 601
49704364
LW
602 switch( symptr->howlen ){
603 case e_no_len:
604 case e_number:
605 len = symptr->length;
606 break;
607 case e_star:
608 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
609 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
610 break;
611 }
612
80a13697
NC
613 if ((offset >= 0) && (offset < packsize[which].size))
614 size = packsize[which].array[offset] & PACK_SIZE_MASK;
615 else
206947d2 616 size = 0;
78d46eaa 617
80a13697
NC
618 if (!size) {
619 /* endianness doesn't influence the size of a type */
620 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
621 default:
622 Perl_croak(aTHX_ "Invalid type '%c' in %s",
623 (int)TYPE_NO_MODIFIERS(symptr->code),
624 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
625 case '@':
626 case '/':
627 case 'U': /* XXXX Is it correct? */
628 case 'w':
629 case 'u':
630 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
631 (int)symptr->code,
632 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
633 case '%':
634 size = 0;
635 break;
636 case '(':
637 {
638 tempsym_t savsym = *symptr;
639 symptr->patptr = savsym.grpbeg;
640 symptr->patend = savsym.grpend;
641 /* XXXX Theoretically, we need to measure many times at
642 different positions, since the subexpression may contain
643 alignment commands, but be not of aligned length.
644 Need to detect this and croak(). */
645 size = measure_struct(symptr);
646 *symptr = savsym;
647 break;
648 }
649 case 'X' | TYPE_IS_SHRIEKING:
650 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
651 */
652 if (!len) /* Avoid division by 0 */
653 len = 1;
654 len = total % len; /* Assumed: the start is aligned. */
655 /* FALL THROUGH */
656 case 'X':
657 size = -1;
658 if (total < len)
659 Perl_croak(aTHX_ "'X' outside of string in %s",
660 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
661 break;
662 case 'x' | TYPE_IS_SHRIEKING:
663 if (!len) /* Avoid division by 0 */
664 len = 1;
665 star = total % len; /* Assumed: the start is aligned. */
666 if (star) /* Other portable ways? */
667 len = len - star;
668 else
669 len = 0;
670 /* FALL THROUGH */
671 case 'x':
672 case 'A':
673 case 'Z':
674 case 'a':
80a13697
NC
675 size = 1;
676 break;
677 case 'B':
678 case 'b':
679 len = (len + 7)/8;
680 size = 1;
681 break;
682 case 'H':
683 case 'h':
684 len = (len + 1)/2;
685 size = 1;
686 break;
78d46eaa 687
80a13697
NC
688 case 'P':
689 len = 1;
690 size = sizeof(char*);
78d46eaa
NC
691 break;
692 }
206947d2
IZ
693 }
694 total += len * size;
695 }
696 return total;
697}
698
49704364
LW
699
700/* locate matching closing parenthesis or bracket
701 * returns char pointer to char after match, or NULL
702 */
703STATIC char *
704S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 705{
49704364
LW
706 while (patptr < patend) {
707 char c = *patptr++;
708
709 if (isSPACE(c))
710 continue;
711 else if (c == ender)
712 return patptr-1;
713 else if (c == '#') {
714 while (patptr < patend && *patptr != '\n')
715 patptr++;
716 continue;
717 } else if (c == '(')
718 patptr = group_end(patptr, patend, ')') + 1;
719 else if (c == '[')
720 patptr = group_end(patptr, patend, ']') + 1;
18529408 721 }
49704364
LW
722 Perl_croak(aTHX_ "No group ending character '%c' found in template",
723 ender);
724 return 0;
18529408
IZ
725}
726
49704364
LW
727
728/* Convert unsigned decimal number to binary.
729 * Expects a pointer to the first digit and address of length variable
730 * Advances char pointer to 1st non-digit char and returns number
731 */
18529408 732STATIC char *
49704364
LW
733S_get_num(pTHX_ register char *patptr, I32 *lenptr )
734{
735 I32 len = *patptr++ - '0';
736 while (isDIGIT(*patptr)) {
737 if (len >= 0x7FFFFFFF/10)
738 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
739 len = (len * 10) + (*patptr++ - '0');
740 }
741 *lenptr = len;
742 return patptr;
743}
744
745/* The marvellous template parsing routine: Using state stored in *symptr,
746 * locates next template code and count
747 */
748STATIC bool
749S_next_symbol(pTHX_ register tempsym_t* symptr )
18529408 750{
49704364
LW
751 register char* patptr = symptr->patptr;
752 register char* patend = symptr->patend;
753
754 symptr->flags &= ~FLAG_SLASH;
755
756 while (patptr < patend) {
757 if (isSPACE(*patptr))
758 patptr++;
759 else if (*patptr == '#') {
760 patptr++;
761 while (patptr < patend && *patptr != '\n')
762 patptr++;
763 if (patptr < patend)
764 patptr++;
765 } else {
766 /* We should have found a template code */
767 I32 code = *patptr++ & 0xFF;
66c611c5 768 U32 inherited_modifiers = 0;
49704364
LW
769
770 if (code == ','){ /* grandfather in commas but with a warning */
771 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
772 symptr->flags |= FLAG_COMMA;
773 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
774 "Invalid type ',' in %s",
775 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
776 }
777 continue;
778 }
779
780 /* for '(', skip to ')' */
781 if (code == '(') {
782 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
783 Perl_croak(aTHX_ "()-group starts with a count in %s",
784 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
785 symptr->grpbeg = patptr;
786 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
787 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
788 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
789 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
790 }
791
66c611c5
MHM
792 /* look for group modifiers to inherit */
793 if (TYPE_ENDIANNESS(symptr->flags)) {
794 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
795 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
796 }
797
1109a392
MHM
798 /* look for modifiers */
799 while (patptr < patend) {
800 const char *allowed;
801 I32 modifier = 0;
802 switch (*patptr) {
803 case '!':
804 modifier = TYPE_IS_SHRIEKING;
7212898e 805 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 806 break;
7212898e 807#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
808 case '>':
809 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 810 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
811 break;
812 case '<':
813 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 814 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 815 break;
7212898e 816#endif
1109a392
MHM
817 default:
818 break;
819 }
66c611c5 820
1109a392
MHM
821 if (modifier == 0)
822 break;
66c611c5 823
1109a392
MHM
824 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
825 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
826 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
827
828 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392
MHM
829 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
830 (int) TYPE_NO_MODIFIERS(code),
831 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5
MHM
832 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
833 TYPE_ENDIANNESS_MASK)
834 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
835 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
836
1109a392
MHM
837 if (ckWARN(WARN_UNPACK)) {
838 if (code & modifier)
839 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
840 "Duplicate modifier '%c' after '%c' in %s",
841 *patptr, (int) TYPE_NO_MODIFIERS(code),
842 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
843 }
66c611c5 844
1109a392
MHM
845 code |= modifier;
846 patptr++;
49704364
LW
847 }
848
66c611c5
MHM
849 /* inherit modifiers */
850 code |= inherited_modifiers;
851
49704364
LW
852 /* look for count and/or / */
853 if (patptr < patend) {
854 if (isDIGIT(*patptr)) {
855 patptr = get_num( patptr, &symptr->length );
856 symptr->howlen = e_number;
857
858 } else if (*patptr == '*') {
859 patptr++;
860 symptr->howlen = e_star;
861
862 } else if (*patptr == '[') {
863 char* lenptr = ++patptr;
864 symptr->howlen = e_number;
865 patptr = group_end( patptr, patend, ']' ) + 1;
866 /* what kind of [] is it? */
867 if (isDIGIT(*lenptr)) {
868 lenptr = get_num( lenptr, &symptr->length );
869 if( *lenptr != ']' )
870 Perl_croak(aTHX_ "Malformed integer in [] in %s",
871 symptr->flags & FLAG_PACK ? "pack" : "unpack");
872 } else {
873 tempsym_t savsym = *symptr;
874 symptr->patend = patptr-1;
875 symptr->patptr = lenptr;
876 savsym.length = measure_struct(symptr);
877 *symptr = savsym;
878 }
879 } else {
880 symptr->howlen = e_no_len;
881 symptr->length = 1;
882 }
883
884 /* try to find / */
885 while (patptr < patend) {
886 if (isSPACE(*patptr))
887 patptr++;
888 else if (*patptr == '#') {
889 patptr++;
890 while (patptr < patend && *patptr != '\n')
891 patptr++;
892 if (patptr < patend)
893 patptr++;
894 } else {
66c611c5 895 if (*patptr == '/') {
49704364
LW
896 symptr->flags |= FLAG_SLASH;
897 patptr++;
66c611c5
MHM
898 if (patptr < patend &&
899 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364
LW
900 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
901 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
902 }
903 break;
904 }
18529408 905 }
49704364
LW
906 } else {
907 /* at end - no count, no / */
908 symptr->howlen = e_no_len;
909 symptr->length = 1;
910 }
911
912 symptr->code = code;
913 symptr->patptr = patptr;
914 return TRUE;
18529408 915 }
49704364
LW
916 }
917 symptr->patptr = patptr;
918 return FALSE;
18529408
IZ
919}
920
18529408 921/*
08ca2aa3
TH
922 There is no way to cleanly handle the case where we should process the
923 string per byte in its upgraded form while it's really in downgraded form
924 (e.g. estimates like strend-s as an upper bound for the number of
925 characters left wouldn't work). So if we foresee the need of this
926 (pattern starts with U or contains U0), we want to work on the encoded
927 version of the string. Users are advised to upgrade their pack string
928 themselves if they need to do a lot of unpacks like this on it
929*/
930STATIC bool
931need_utf8(const char *pat, const char *patend)
932{
933 bool first = TRUE;
934 while (pat < patend) {
935 if (pat[0] == '#') {
936 pat++;
937 pat = memchr(pat, '\n', patend-pat);
938 if (!pat) return FALSE;
939 } else if (pat[0] == 'U') {
940 if (first || pat[1] == '0') return TRUE;
941 } else first = FALSE;
942 pat++;
943 }
944 return FALSE;
945}
946
947STATIC char
948first_symbol(const char *pat, const char *patend) {
949 while (pat < patend) {
950 if (pat[0] != '#') return pat[0];
951 pat++;
952 pat = memchr(pat, '\n', patend-pat);
953 if (!pat) return 0;
954 pat++;
955 }
956 return 0;
957}
958
959/*
18529408
IZ
960=for apidoc unpack_str
961
7accc089
JH
962The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
963and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
964
965=cut */
966
967I32
968Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 969{
49704364 970 tempsym_t sym = { 0 };
08ca2aa3
TH
971
972 if (flags & FLAG_UNPACK_DO_UTF8) flags |= FLAG_UNPACK_WAS_UTF8;
973 else if (need_utf8(pat, patend)) {
974 /* We probably should try to avoid this in case a scalar context call
975 wouldn't get to the "U0" */
976 STRLEN len = strend - s;
4be42731 977 s = (char*)bytes_to_utf8((U8*)s, &len);
08ca2aa3
TH
978 SAVEFREEPV(s);
979 strend = s + len;
980 flags |= FLAG_UNPACK_DO_UTF8;
981 }
982
983 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_UNPACK_DO_UTF8))
984 flags |= FLAG_UNPACK_PARSE_UTF8;
985
49704364
LW
986 sym.patptr = pat;
987 sym.patend = patend;
988 sym.flags = flags;
989
990 return unpack_rec(&sym, s, s, strend, NULL );
991}
992
7accc089
JH
993/*
994=for apidoc unpackstring
995
608d3aed
LW
996The engine implementing unpack() Perl function. C<unpackstring> puts the
997extracted list items on the stack and returns the number of elements.
998Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
999
1000=cut */
1001
1002I32
1003Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char *strend, U32 flags)
1004{
1005 tempsym_t sym = { 0 };
08ca2aa3
TH
1006
1007 if (flags & FLAG_UNPACK_DO_UTF8) flags |= FLAG_UNPACK_WAS_UTF8;
1008 else if (need_utf8(pat, patend)) {
1009 /* We probably should try to avoid this in case a scalar context call
1010 wouldn't get to the "U0" */
1011 STRLEN len = strend - s;
4be42731 1012 s = (char*)bytes_to_utf8((U8*)s, &len);
08ca2aa3
TH
1013 SAVEFREEPV(s);
1014 strend = s + len;
1015 flags |= FLAG_UNPACK_DO_UTF8;
1016 }
1017
1018 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_UNPACK_DO_UTF8))
1019 flags |= FLAG_UNPACK_PARSE_UTF8;
1020
7accc089
JH
1021 sym.patptr = pat;
1022 sym.patend = patend;
1023 sym.flags = flags;
1024
1025 return unpack_rec(&sym, s, s, strend, NULL );
1026}
1027
49704364
LW
1028STATIC
1029I32
08ca2aa3 1030S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
49704364 1031{
a6ec74c1 1032 dSP;
08ca2aa3
TH
1033 I32 datumtype, ai32;
1034 I32 len = 0;
18529408
IZ
1035 SV *sv;
1036 I32 start_sp_offset = SP - PL_stack_base;
49704364 1037 howlen_t howlen;
a6ec74c1 1038
a6ec74c1 1039 I32 checksum = 0;
92d41999 1040 UV cuv = 0;
a6ec74c1 1041 NV cdouble = 0.0;
92d41999 1042 const int bits_in_uv = 8 * sizeof(cuv);
49704364
LW
1043 char* strrelbeg = s;
1044 bool beyond = FALSE;
21c16052 1045 bool explicit_length;
49704364 1046 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
08ca2aa3 1047 bool utf8 = (symptr->flags & FLAG_UNPACK_PARSE_UTF8) ? 1 : 0;
49704364 1048
49704364
LW
1049 while (next_symbol(symptr)) {
1050 datumtype = symptr->code;
206947d2 1051 /* do first one only unless in list context
08ca2aa3 1052 / is implemented by unpacking the count, then popping it from the
206947d2 1053 stack, so must check that we're not in the middle of a / */
49704364 1054 if ( unpack_only_one
206947d2 1055 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1056 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1057 break;
49704364
LW
1058
1059 switch( howlen = symptr->howlen ){
1060 case e_no_len:
1061 case e_number:
1062 len = symptr->length;
1063 break;
1064 case e_star:
1065 len = strend - strbeg; /* long enough */
1066 break;
1067 }
18529408 1068
21c16052 1069 explicit_length = TRUE;
a6ec74c1 1070 redo_switch:
49704364 1071 beyond = s >= strend;
78d46eaa
NC
1072 {
1073 int which = (symptr->code & TYPE_IS_SHRIEKING)
1074 ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
a72c271b
NC
1075 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
1076 int offset = rawtype - packsize[which].first;
78d46eaa
NC
1077
1078 if (offset >= 0 && offset < packsize[which].size) {
1079 /* Data about this template letter */
1080 unsigned char data = packsize[which].array[offset];
1081
1082 if (data) {
1083 /* data nonzero means we can process this letter. */
1084 long size = data & PACK_SIZE_MASK;
1085 long howmany = (strend - s) / size;
1086 if (len > howmany)
1087 len = howmany;
1088
1089 if (!checksum || (data & PACK_SIZE_CANNOT_CSUM)) {
08ca2aa3 1090 if (len && unpack_only_one) len = 1;
78d46eaa
NC
1091 EXTEND(SP, len);
1092 EXTEND_MORTAL(len);
1093 }
1094 }
1095 }
1096 }
1109a392 1097 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1098 default:
1109a392 1099 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1100
a6ec74c1 1101 case '%':
49704364 1102 if (howlen == e_no_len)
18529408 1103 len = 16; /* len is not specified */
a6ec74c1 1104 checksum = len;
92d41999 1105 cuv = 0;
a6ec74c1 1106 cdouble = 0;
18529408 1107 continue;
a6ec74c1 1108 break;
18529408
IZ
1109 case '(':
1110 {
49704364 1111 tempsym_t savsym = *symptr;
66c611c5
MHM
1112 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1113 symptr->flags |= group_modifiers;
49704364
LW
1114 symptr->patend = savsym.grpend;
1115 symptr->level++;
18529408
IZ
1116 PUTBACK;
1117 while (len--) {
49704364 1118 symptr->patptr = savsym.grpbeg;
08ca2aa3
TH
1119 if (utf8) symptr->flags |= FLAG_UNPACK_PARSE_UTF8;
1120 else symptr->flags &= ~FLAG_UNPACK_PARSE_UTF8;
1121 unpack_rec(symptr, s, strbeg, strend, &s);
1122 if (s == strend && savsym.howlen == e_star)
49704364 1123 break; /* No way to continue */
18529408
IZ
1124 }
1125 SPAGAIN;
66c611c5 1126 symptr->flags &= ~group_modifiers;
49704364
LW
1127 savsym.flags = symptr->flags;
1128 *symptr = savsym;
18529408
IZ
1129 break;
1130 }
a6ec74c1 1131 case '@':
08ca2aa3
TH
1132 if (utf8) {
1133 s = strrelbeg;
1134 while (len > 0) {
1135 if (s >= strend)
1136 Perl_croak(aTHX_ "'@' outside of string in unpack");
1137 s += UTF8SKIP(s);
1138 len--;
1139 }
1140 if (s > strend)
1141 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1142 } else {
49704364
LW
1143 if (len > strend - strrelbeg)
1144 Perl_croak(aTHX_ "'@' outside of string in unpack");
1145 s = strrelbeg + len;
08ca2aa3 1146 }
a6ec74c1 1147 break;
62f95557
IZ
1148 case 'X' | TYPE_IS_SHRIEKING:
1149 if (!len) /* Avoid division by 0 */
1150 len = 1;
08ca2aa3
TH
1151 if (utf8) {
1152 char *hop, *last;
1153 I32 l;
1154 for (l=len, hop = strbeg; hop < s; l++, hop += UTF8SKIP(hop))
1155 if (l == len) {
1156 last = hop;
1157 l = 0;
1158 }
1159 s = last;
1160 break;
1161 } else len = (s - strbeg) % len;
62f95557 1162 /* FALL THROUGH */
a6ec74c1 1163 case 'X':
08ca2aa3
TH
1164 if (utf8) {
1165 while (len > 0) {
1166 if (s <= strbeg)
1167 Perl_croak(aTHX_ "'X' outside of string in unpack");
1168 while (UTF8_IS_CONTINUATION(*--s)) {
1169 if (s <= strbeg)
1170 Perl_croak(aTHX_ "'X' outside of string in unpack");
1171 }
1172 len--;
1173 }
1174 } else {
a6ec74c1 1175 if (len > s - strbeg)
49704364 1176 Perl_croak(aTHX_ "'X' outside of string in unpack" );
a6ec74c1 1177 s -= len;
08ca2aa3 1178 }
a6ec74c1 1179 break;
62f95557
IZ
1180 case 'x' | TYPE_IS_SHRIEKING:
1181 if (!len) /* Avoid division by 0 */
1182 len = 1;
08ca2aa3
TH
1183 if (utf8) {
1184 char *hop = strbeg;
1185 I32 l = 0;
1186 for (hop = strbeg; hop < s; hop += UTF8SKIP(hop)) l++;
1187 if (s != hop)
1188 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1189 ai32 = l % len;
1190 } else ai32 = (s - strbeg) % len;
1191 if (ai32 == 0) break;
1192 len -= ai32;
62f95557 1193 /* FALL THROUGH */
a6ec74c1 1194 case 'x':
08ca2aa3
TH
1195 if (utf8) {
1196 while (len>0) {
1197 if (s >= strend)
1198 Perl_croak(aTHX_ "'x' outside of string in unpack");
1199 s += UTF8SKIP(s);
1200 len--;
1201 }
1202 } else {
a6ec74c1 1203 if (len > strend - s)
49704364 1204 Perl_croak(aTHX_ "'x' outside of string in unpack");
a6ec74c1 1205 s += len;
08ca2aa3 1206 };
a6ec74c1
JH
1207 break;
1208 case '/':
49704364
LW
1209 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1210 break;
a6ec74c1
JH
1211 case 'A':
1212 case 'Z':
1213 case 'a':
08ca2aa3
TH
1214 if (checksum) {
1215 /* Preliminary length estimate is assumed done in 'W' */
1216 if (len > strend - s) len = strend - s;
1217 goto W_checksum;
1218 }
1219 if (utf8) {
1220 I32 l;
1221 char *hop;
1222 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1223 if (hop >= strend) {
1224 if (hop > strend)
1225 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1226 break;
a6ec74c1 1227 }
a6ec74c1 1228 }
08ca2aa3
TH
1229 if (hop > strend)
1230 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1231 len = hop - s;
1232 } else if (len > strend - s)
1233 len = strend - s;
1234
1235 if (datumtype == 'Z') {
1236 /* 'Z' strips stuff after first null */
1237 char *ptr;
1238 for (ptr = s; ptr < strend; ptr++) if (*ptr == 0) break;
1239 sv = newSVpvn(s, ptr-s);
1240 if (howlen == e_star) /* exact for 'Z*' */
1241 len = ptr-s + (ptr != strend ? 1 : 0);
1242 } else if (datumtype == 'A') {
1243 /* 'A' strips both nulls and spaces */
1244 char *ptr;
1245 for (ptr = s+len-1; ptr >= s; ptr--)
1246 if (*ptr != 0 && !isSPACE(*ptr)) break;
1247 ptr++;
1248 sv = newSVpvn(s, ptr-s);
1249 } else sv = newSVpvn(s, len);
1250
1251 if (utf8) {
1252 SvUTF8_on(sv);
1253 /* Undo any upgrade done due to need_utf8() */
1254 if (!(symptr->flags & FLAG_UNPACK_WAS_UTF8))
1255 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1256 }
1257 XPUSHs(sv_2mortal(sv));
08ca2aa3 1258 s += len;
a6ec74c1
JH
1259 break;
1260 case 'B':
08ca2aa3
TH
1261 case 'b': {
1262 char *str;
49704364 1263 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1264 len = (strend - s) * 8;
1265 if (checksum) {
1266 if (!PL_bitcount) {
08ca2aa3 1267 int bits;
a6ec74c1
JH
1268 Newz(601, PL_bitcount, 256, char);
1269 for (bits = 1; bits < 256; bits++) {
1270 if (bits & 1) PL_bitcount[bits]++;
1271 if (bits & 2) PL_bitcount[bits]++;
1272 if (bits & 4) PL_bitcount[bits]++;
1273 if (bits & 8) PL_bitcount[bits]++;
1274 if (bits & 16) PL_bitcount[bits]++;
1275 if (bits & 32) PL_bitcount[bits]++;
1276 if (bits & 64) PL_bitcount[bits]++;
1277 if (bits & 128) PL_bitcount[bits]++;
1278 }
1279 }
08ca2aa3
TH
1280 if (utf8) {
1281 while (len >= 8 && s < strend) {
1282 cuv += PL_bitcount[next_uni_byte(aTHX_ &s, strend, datumtype)];
1283 len -= 8;
1284 }
1285 } else {
a6ec74c1 1286 while (len >= 8) {
08ca2aa3 1287 cuv += PL_bitcount[*(U8 *)s++];
a6ec74c1
JH
1288 len -= 8;
1289 }
08ca2aa3
TH
1290 }
1291 if (len && s < strend) {
1292 U8 bits;
1293 bits = NEXT_BYTE(utf8, s, strend, datumtype);
a6ec74c1
JH
1294 if (datumtype == 'b') {
1295 while (len-- > 0) {
92d41999 1296 if (bits & 1) cuv++;
a6ec74c1
JH
1297 bits >>= 1;
1298 }
08ca2aa3 1299 } else {
a6ec74c1 1300 while (len-- > 0) {
08ca2aa3 1301 if (bits & 0x80) cuv++;
a6ec74c1
JH
1302 bits <<= 1;
1303 }
1304 }
1305 }
1306 break;
1307 }
08ca2aa3
TH
1308
1309 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1310 SvPOK_on(sv);
1311 str = SvPVX(sv);
1312 if (datumtype == 'b') {
08ca2aa3
TH
1313 U8 bits;
1314 ai32 = len;
1315 for (len = 0; len < ai32; len++) {
1316 if (len & 7) bits >>= 1;
1317 else if (utf8) {
1318 if (s >= strend) break;
1319 bits = next_uni_byte(aTHX_ &s, strend, datumtype);
1320 } else bits = *(U8 *) s++;
1321 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1322 }
08ca2aa3
TH
1323 } else {
1324 U8 bits;
1325 ai32 = len;
1326 for (len = 0; len < ai32; len++) {
1327 if (len & 7) bits <<= 1;
1328 else if (utf8) {
1329 if (s >= strend) break;
1330 bits = next_uni_byte(aTHX_ &s, strend, datumtype);
1331 } else bits = *(U8 *) s++;
1332 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1333 }
1334 }
1335 *str = '\0';
08ca2aa3
TH
1336 SvCUR_set(sv, str - SvPVX(sv));
1337 XPUSHs(sv);
a6ec74c1 1338 break;
08ca2aa3 1339 }
a6ec74c1 1340 case 'H':
08ca2aa3
TH
1341 case 'h': {
1342 char *str;
1343 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1344 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1345 len = (strend - s) * 2;
08ca2aa3 1346 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1347 SvPOK_on(sv);
1348 str = SvPVX(sv);
1349 if (datumtype == 'h') {
08ca2aa3
TH
1350 U8 bits;
1351 ai32 = len;
1352 for (len = 0; len < ai32; len++) {
1353 if (len & 1) bits >>= 4;
1354 else if (utf8) {
1355 if (s >= strend) break;
1356 bits = next_uni_byte(aTHX_ &s, strend, datumtype);
1357 } else bits = * (U8 *) s++;
a6ec74c1
JH
1358 *str++ = PL_hexdigit[bits & 15];
1359 }
08ca2aa3
TH
1360 } else {
1361 U8 bits;
1362 ai32 = len;
1363 for (len = 0; len < ai32; len++) {
1364 if (len & 1) bits <<= 4;
1365 else if (utf8) {
1366 if (s >= strend) break;
1367 bits = next_uni_byte(aTHX_ &s, strend, datumtype);
1368 } else bits = *(U8 *) s++;
a6ec74c1
JH
1369 *str++ = PL_hexdigit[(bits >> 4) & 15];
1370 }
1371 }
1372 *str = '\0';
08ca2aa3
TH
1373 SvCUR_set(sv, str - SvPVX(sv));
1374 XPUSHs(sv);
a6ec74c1 1375 break;
08ca2aa3 1376 }
a6ec74c1 1377 case 'c':
73cb7263 1378 while (len-- > 0) {
08ca2aa3 1379 int aint = NEXT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1380 if (aint >= 128) /* fake up signed chars */
1381 aint -= 256;
08ca2aa3 1382 if (!checksum)
ac7f3b1b 1383 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1384 else if (checksum > bits_in_uv)
1385 cdouble += (NV)aint;
1386 else
1387 cuv += aint;
a6ec74c1
JH
1388 }
1389 break;
1390 case 'C':
08ca2aa3
TH
1391 case 'W':
1392 W_checksum:
35bcd338 1393 if (len == 0) {
08ca2aa3
TH
1394 if (explicit_length && datumtype == 'C')
1395 /* Switch to "character" mode */
1396 utf8 = (symptr->flags & FLAG_UNPACK_DO_UTF8) ? 1 : 0;
35bcd338
JH
1397 break;
1398 }
08ca2aa3
TH
1399 if (datumtype == 'C' ?
1400 (symptr->flags & FLAG_UNPACK_DO_UTF8) &&
1401 !(symptr->flags & FLAG_UNPACK_WAS_UTF8) : utf8) {
1402 while (len-- > 0 && s < strend) {
1403 UV val;
1404 STRLEN retlen;
1405 val =
4be42731 1406 UNI_TO_NATIVE(utf8n_to_uvuni((U8*)s, strend-s, &retlen,
08ca2aa3
TH
1407 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY));
1408 if (retlen == (STRLEN) -1 || retlen == 0)
1409 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1410 s += retlen;
1411 if (!checksum)
1412 PUSHs(sv_2mortal(newSVuv((UV) val)));
1413 else if (checksum > bits_in_uv)
1414 cdouble += (NV) val;
d6d3e8bd 1415 else
08ca2aa3 1416 cuv += val;
a6ec74c1 1417 }
08ca2aa3 1418 } else if (!checksum)
a6ec74c1 1419 while (len-- > 0) {
08ca2aa3
TH
1420 U8 ch = *(U8 *) s++;
1421 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1422 }
08ca2aa3
TH
1423 else if (checksum > bits_in_uv)
1424 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1425 else
1426 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1427 break;
1428 case 'U':
35bcd338 1429 if (len == 0) {
08ca2aa3
TH
1430 if (explicit_length) {
1431 /* Switch to "bytes in UTF-8" mode */
1432 if (symptr->flags & FLAG_UNPACK_DO_UTF8) utf8 = 0;
1433 else
1434 /* Should be impossible due to the need_utf8() test */
1435 Perl_croak(aTHX_ "U0 mode on a byte string");
1436 }
35bcd338
JH
1437 break;
1438 }
08ca2aa3 1439 if (len > strend - s) len = strend - s;
73cb7263 1440 if (!checksum) {
08ca2aa3
TH
1441 if (len && unpack_only_one) len = 1;
1442 EXTEND(SP, len);
1443 EXTEND_MORTAL(len);
a6ec74c1 1444 }
08ca2aa3
TH
1445 while (len-- > 0 && s < strend) {
1446 STRLEN retlen;
1447 UV auv;
1448 if (utf8) {
1449 U8 result[UTF8_MAXLEN];
1450 char *ptr;
1451 STRLEN len;
1452 ptr = s;
1453 /* Bug: warns about bad utf8 even if we are short on bytes
1454 and will break out of the loop */
4be42731 1455 if (!next_uni_bytes(aTHX_ &ptr, strend, (char*)result, 1))
08ca2aa3
TH
1456 break;
1457 len = UTF8SKIP(result);
4be42731 1458 if (!next_uni_bytes(aTHX_ &ptr, strend, (char*)&result[1], len-1))
08ca2aa3
TH
1459 break;
1460 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1461 s = ptr;
1462 } else {
1463 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1464 if (retlen == (STRLEN) -1 || retlen == 0)
1465 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1466 s += retlen;
1467 }
1468 if (!checksum)
1469 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1470 else if (checksum > bits_in_uv)
08ca2aa3 1471 cdouble += (NV) auv;
73cb7263 1472 else
08ca2aa3 1473 cuv += auv;
a6ec74c1
JH
1474 }
1475 break;
49704364
LW
1476 case 's' | TYPE_IS_SHRIEKING:
1477#if SHORTSIZE != SIZE16
73cb7263 1478 while (len-- > 0) {
08ca2aa3
TH
1479 short ashort;
1480 COPYVAR(s, strend, utf8, ashort, s);
1481 if (!checksum)
ac7f3b1b 1482 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1483 else if (checksum > bits_in_uv)
1484 cdouble += (NV)ashort;
1485 else
1486 cuv += ashort;
49704364
LW
1487 }
1488 break;
1489#else
1490 /* Fallthrough! */
a6ec74c1 1491#endif
49704364 1492 case 's':
73cb7263 1493 while (len-- > 0) {
08ca2aa3
TH
1494 I16 ai16;
1495
1496#if U16SIZE > SIZE16
1497 ai16 = 0;
1498#endif
1499 if (utf8) {
1500 if (!next_uni_bytes(aTHX_ &s, strend,
1501 OFF16(&ai16), SIZE16)) break;
1502 } else {
73cb7263 1503 COPY16(s, &ai16);
08ca2aa3
TH
1504 s += SIZE16;
1505 }
73cb7263 1506 DO_BO_UNPACK(ai16, 16);
1109a392 1507#if U16SIZE > SIZE16
73cb7263
NC
1508 if (ai16 > 32767)
1509 ai16 -= 65536;
a6ec74c1 1510#endif
08ca2aa3 1511 if (!checksum)
ac7f3b1b 1512 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV)ai16;
1515 else
1516 cuv += ai16;
a6ec74c1
JH
1517 }
1518 break;
49704364
LW
1519 case 'S' | TYPE_IS_SHRIEKING:
1520#if SHORTSIZE != SIZE16
73cb7263 1521 while (len-- > 0) {
08ca2aa3
TH
1522 unsigned short aushort;
1523 COPYVAR(s, strend, utf8, aushort, s);
1524 if (!checksum)
1525 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1526 else if (checksum > bits_in_uv)
1527 cdouble += (NV)aushort;
1528 else
1529 cuv += aushort;
49704364
LW
1530 }
1531 break;
1532#else
1533 /* Fallhrough! */
1534#endif
a6ec74c1
JH
1535 case 'v':
1536 case 'n':
1537 case 'S':
73cb7263 1538 while (len-- > 0) {
08ca2aa3
TH
1539 U16 au16;
1540#if U16SIZE > SIZE16
1541 au16 = 0;
1542#endif
1543 if (utf8) {
1544 if (!next_uni_bytes(aTHX_ &s, strend,
1545 OFF16(&au16), SIZE16)) break;
1546 } else {
73cb7263 1547 COPY16(s, &au16);
73cb7263 1548 s += SIZE16;
08ca2aa3
TH
1549 }
1550 DO_BO_UNPACK(au16, 16);
a6ec74c1 1551#ifdef HAS_NTOHS
73cb7263
NC
1552 if (datumtype == 'n')
1553 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1554#endif
1555#ifdef HAS_VTOHS
73cb7263
NC
1556 if (datumtype == 'v')
1557 au16 = vtohs(au16);
a6ec74c1 1558#endif
08ca2aa3
TH
1559 if (!checksum)
1560 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263
NC
1561 else if (checksum > bits_in_uv)
1562 cdouble += (NV)au16;
1563 else
1564 cuv += au16;
a6ec74c1
JH
1565 }
1566 break;
7212898e 1567#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1568 case 'v' | TYPE_IS_SHRIEKING:
1569 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1570 while (len-- > 0) {
08ca2aa3
TH
1571 I16 ai16;
1572# if U16SIZE > SIZE16
1573 ai16 = 0;
1574# endif
1575 if (utf8) {
1576 if (!next_uni_bytes(aTHX_ &s, strend,
1577 (char *) &ai16, sizeof(ai16))) break;
1578 } else {
73cb7263
NC
1579 COPY16(s, &ai16);
1580 s += SIZE16;
08ca2aa3
TH
1581 }
1582# ifdef HAS_NTOHS
73cb7263 1583 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1584 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1585# endif /* HAS_NTOHS */
1586# ifdef HAS_VTOHS
73cb7263 1587 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1588 ai16 = (I16) vtohs((U16) ai16);
1589# endif /* HAS_VTOHS */
1590 if (!checksum)
ac7f3b1b 1591 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1592 else if (checksum > bits_in_uv)
08ca2aa3 1593 cdouble += (NV) ai16;
73cb7263
NC
1594 else
1595 cuv += ai16;
068bd2e7
MHM
1596 }
1597 break;
08ca2aa3 1598#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1599 case 'i':
49704364 1600 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1601 while (len-- > 0) {
08ca2aa3
TH
1602 int aint;
1603 COPYVAR(s, strend, utf8, aint, i);
1604 if (!checksum)
ac7f3b1b 1605 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1606 else if (checksum > bits_in_uv)
1607 cdouble += (NV)aint;
1608 else
1609 cuv += aint;
a6ec74c1
JH
1610 }
1611 break;
1612 case 'I':
49704364 1613 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1614 while (len-- > 0) {
08ca2aa3
TH
1615 unsigned int auint;
1616 COPYVAR(s, strend, utf8, auint, i);
1617 if (!checksum)
ac7f3b1b 1618 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV)auint;
1621 else
1622 cuv += auint;
a6ec74c1
JH
1623 }
1624 break;
92d41999 1625 case 'j':
73cb7263 1626 while (len-- > 0) {
08ca2aa3 1627 IV aiv;
1109a392 1628#if IVSIZE == INTSIZE
08ca2aa3 1629 COPYVAR(s, strend, utf8, aiv, i);
1109a392 1630#elif IVSIZE == LONGSIZE
08ca2aa3 1631 COPYVAR(s, strend, utf8, aiv, l);
1109a392 1632#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
08ca2aa3
TH
1633 COPYVAR(s, strend, utf8, aiv, 64);
1634#else
1635 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1636#endif
08ca2aa3 1637 if (!checksum)
ac7f3b1b 1638 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1639 else if (checksum > bits_in_uv)
1640 cdouble += (NV)aiv;
1641 else
1642 cuv += aiv;
92d41999
JH
1643 }
1644 break;
1645 case 'J':
73cb7263 1646 while (len-- > 0) {
08ca2aa3
TH
1647 UV auv;
1648#if IVSIZE == INTSIZE
1649 COPYVAR(s, strend, utf8, auv, i);
1650#elif IVSIZE == LONGSIZE
1651 COPYVAR(s, strend, utf8, auv, l);
1652#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1653 COPYVAR(s, strend, utf8, auv, 64);
1654#else
1655 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1656#endif
08ca2aa3 1657 if (!checksum)
ac7f3b1b 1658 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1659 else if (checksum > bits_in_uv)
1660 cdouble += (NV)auv;
1661 else
1662 cuv += auv;
92d41999
JH
1663 }
1664 break;
49704364
LW
1665 case 'l' | TYPE_IS_SHRIEKING:
1666#if LONGSIZE != SIZE32
73cb7263 1667 while (len-- > 0) {
08ca2aa3
TH
1668 long along;
1669 COPYVAR(s, strend, utf8, along, l);
1670 if (!checksum)
ac7f3b1b 1671 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1672 else if (checksum > bits_in_uv)
1673 cdouble += (NV)along;
1674 else
1675 cuv += along;
49704364
LW
1676 }
1677 break;
1678#else
1679 /* Fallthrough! */
a6ec74c1 1680#endif
49704364 1681 case 'l':
73cb7263 1682 while (len-- > 0) {
08ca2aa3
TH
1683 I32 ai32;
1684#if U32SIZE > SIZE32
1685 ai32 = 0;
1686#endif
1687 if (utf8) {
1688 if (!next_uni_bytes(aTHX_ &s, strend,
1689 OFF32(&ai32), SIZE32)) break;
1690 } else {
73cb7263 1691 COPY32(s, &ai32);
08ca2aa3
TH
1692 s += SIZE32;
1693 }
73cb7263 1694 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1695#if U32SIZE > SIZE32
08ca2aa3 1696 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1697#endif
08ca2aa3 1698 if (!checksum)
ac7f3b1b 1699 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1700 else if (checksum > bits_in_uv)
1701 cdouble += (NV)ai32;
1702 else
1703 cuv += ai32;
a6ec74c1
JH
1704 }
1705 break;
49704364
LW
1706 case 'L' | TYPE_IS_SHRIEKING:
1707#if LONGSIZE != SIZE32
73cb7263 1708 while (len-- > 0) {
08ca2aa3
TH
1709 unsigned long aulong;
1710 COPYVAR(s, strend, utf8, aulong, l);
1711 if (!checksum)
ac7f3b1b 1712 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1713 else if (checksum > bits_in_uv)
1714 cdouble += (NV)aulong;
1715 else
1716 cuv += aulong;
49704364
LW
1717 }
1718 break;
1719#else
1720 /* Fall through! */
1721#endif
a6ec74c1
JH
1722 case 'V':
1723 case 'N':
1724 case 'L':
73cb7263 1725 while (len-- > 0) {
08ca2aa3
TH
1726 U32 au32;
1727#if U32SIZE > SIZE32
1728 au32 = 0;
1729#endif
1730 if (utf8) {
1731 if (!next_uni_bytes(aTHX_ &s, strend,
1732 OFF32(&au32), SIZE32)) break;
1733 } else {
73cb7263 1734 COPY32(s, &au32);
73cb7263 1735 s += SIZE32;
08ca2aa3
TH
1736 }
1737 DO_BO_UNPACK(au32, 32);
a6ec74c1 1738#ifdef HAS_NTOHL
73cb7263
NC
1739 if (datumtype == 'N')
1740 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1741#endif
1742#ifdef HAS_VTOHL
73cb7263
NC
1743 if (datumtype == 'V')
1744 au32 = vtohl(au32);
a6ec74c1 1745#endif
08ca2aa3 1746 if (!checksum)
ac7f3b1b 1747 PUSHs(sv_2mortal(newSVuv((UV)au32)));
73cb7263
NC
1748 else if (checksum > bits_in_uv)
1749 cdouble += (NV)au32;
1750 else
1751 cuv += au32;
a6ec74c1
JH
1752 }
1753 break;
7212898e 1754#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1755 case 'V' | TYPE_IS_SHRIEKING:
1756 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1757 while (len-- > 0) {
08ca2aa3
TH
1758 I32 ai32;
1759# if U32SIZE > SIZE32
1760 ai32 = 0;
1761# endif
1762 if (utf8) {
1763 if (!next_uni_bytes(aTHX_ &s, strend,
1764 OFF32(&ai32), SIZE32)) break;
1765 } else {
73cb7263
NC
1766 COPY32(s, &ai32);
1767 s += SIZE32;
08ca2aa3
TH
1768 }
1769# ifdef HAS_NTOHL
73cb7263
NC
1770 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1771 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1772# endif
1773# ifdef HAS_VTOHL
73cb7263
NC
1774 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1775 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1776# endif
1777 if (!checksum)
ac7f3b1b 1778 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1779 else if (checksum > bits_in_uv)
1780 cdouble += (NV)ai32;
1781 else
1782 cuv += ai32;
068bd2e7
MHM
1783 }
1784 break;
08ca2aa3 1785#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1786 case 'p':
a6ec74c1 1787 while (len-- > 0) {
08ca2aa3
TH
1788 char *aptr;
1789 if (utf8) {
1790 if (!next_uni_bytes(aTHX_ &s, strend,
1791 (char *) &aptr, sizeof(aptr))) break;
1792 } else {
5512a2f9 1793 Copy(s, &aptr, 1, char*);
08ca2aa3
TH
1794 s += sizeof(aptr);
1795 }
5512a2f9 1796 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1797 /* newSVpv generates undef if aptr is NULL */
1798 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1799 }
1800 break;
1801 case 'w':
a6ec74c1
JH
1802 {
1803 UV auv = 0;
1804 U32 bytes = 0;
1805
08ca2aa3
TH
1806 while (len > 0 && s < strend) {
1807 U8 ch;
1808 ch = NEXT_BYTE(utf8, s, strend, 'w');
1809 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1810 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1811 if (ch < 0x80) {
a6ec74c1 1812 bytes = 0;
ac7f3b1b 1813 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1814 len--;
1815 auv = 0;
08ca2aa3 1816 continue;
a6ec74c1 1817 }
08ca2aa3 1818 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
1819 char *t;
1820 STRLEN n_a;
1821
1822 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1823 while (s < strend) {
08ca2aa3
TH
1824 ch = NEXT_BYTE(utf8, s, strend, 'w');
1825 sv = mul128(sv, (U8)(ch & 0x7f));
1826 if (!(ch & 0x80)) {
a6ec74c1
JH
1827 bytes = 0;
1828 break;
1829 }
1830 }
1831 t = SvPV(sv, n_a);
1832 while (*t == '0')
1833 t++;
1834 sv_chop(sv, t);
1835 PUSHs(sv_2mortal(sv));
1836 len--;
1837 auv = 0;
1838 }
1839 }
1840 if ((s >= strend) && bytes)
49704364 1841 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1842 }
1843 break;
1844 case 'P':
49704364
LW
1845 if (symptr->howlen == e_star)
1846 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1847 EXTEND(SP, 1);
08ca2aa3
TH
1848 if (sizeof(char*) <= strend - s) {
1849 char *aptr;
1850 if (utf8) {
1851 if (!next_uni_bytes(aTHX_ &s, strend, (char *) &aptr,
1852 sizeof(aptr))) break;
1853 } else {
a6ec74c1 1854 Copy(s, &aptr, 1, char*);
08ca2aa3 1855 s += sizeof(aptr);
a6ec74c1 1856 }
08ca2aa3 1857 DO_BO_UNPACK_P(aptr);
c4c5f44a
NC
1858 /* newSVpvn generates undef if aptr is NULL */
1859 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 1860 }
a6ec74c1
JH
1861 break;
1862#ifdef HAS_QUAD
1863 case 'q':
73cb7263 1864 while (len-- > 0) {
08ca2aa3
TH
1865 Quad_t aquad;
1866 COPYVAR(s, strend, utf8, aquad, 64);
1867 if (!checksum)
1868 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 1869 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)aquad;
1872 else
1873 cuv += aquad;
1874 }
a6ec74c1
JH
1875 break;
1876 case 'Q':
73cb7263 1877 while (len-- > 0) {
08ca2aa3
TH
1878 Uquad_t auquad;
1879 COPYVAR(s, strend, utf8, auquad, 64);
1880 if (!checksum)
1881 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1882 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
1883 else if (checksum > bits_in_uv)
1884 cdouble += (NV)auquad;
1885 else
1886 cuv += auquad;
a6ec74c1
JH
1887 }
1888 break;
08ca2aa3 1889#endif /* HAS_QUAD */
a6ec74c1
JH
1890 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1891 case 'f':
73cb7263 1892 while (len-- > 0) {
08ca2aa3
TH
1893 float afloat;
1894 if (utf8) {
1895 if (!next_uni_bytes(aTHX_ &s, strend, (char *) &afloat,
1896 sizeof(afloat))) break;
1897 } else {
73cb7263 1898 Copy(s, &afloat, 1, float);
73cb7263 1899 s += sizeof(float);
a6ec74c1 1900 }
08ca2aa3
TH
1901 DO_BO_UNPACK_N(afloat, float);
1902 if (!checksum)
1903 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1904 else
73cb7263
NC
1905 cdouble += afloat;
1906 }
a6ec74c1
JH
1907 break;
1908 case 'd':
73cb7263 1909 while (len-- > 0) {
08ca2aa3
TH
1910 double adouble;
1911 if (utf8) {
1912 if (!next_uni_bytes(aTHX_ &s, strend, (char *) &adouble,
1913 sizeof(adouble))) break;
1914 } else {
73cb7263 1915 Copy(s, &adouble, 1, double);
73cb7263 1916 s += sizeof(double);
a6ec74c1 1917 }
08ca2aa3
TH
1918 DO_BO_UNPACK_N(adouble, double);
1919 if (!checksum)
1920 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
1921 else
73cb7263
NC
1922 cdouble += adouble;
1923 }
a6ec74c1 1924 break;
92d41999 1925 case 'F':
73cb7263 1926 while (len-- > 0) {
08ca2aa3
TH
1927 NV anv;
1928 if (utf8) {
1929 if (!next_uni_bytes(aTHX_ &s, strend,
1930 (char *) &anv, sizeof(anv))) break;
1931 } else {
73cb7263 1932 Copy(s, &anv, 1, NV);
73cb7263 1933 s += NVSIZE;
92d41999 1934 }
08ca2aa3
TH
1935 DO_BO_UNPACK_N(anv, NV);
1936 if (!checksum)
1937 PUSHs(sv_2mortal(newSVnv(anv)));
1938 else
73cb7263
NC
1939 cdouble += anv;
1940 }
92d41999
JH
1941 break;
1942#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1943 case 'D':
73cb7263 1944 while (len-- > 0) {
08ca2aa3
TH
1945 long double aldouble;
1946 if (utf8) {
1947 if (!next_uni_bytes(aTHX_ &s, strend, (char *) &aldouble,
1948 sizeof(aldouble))) break;
1949 } else {
73cb7263 1950 Copy(s, &aldouble, 1, long double);
73cb7263 1951 s += LONG_DOUBLESIZE;
73cb7263 1952 }
08ca2aa3
TH
1953 DO_BO_UNPACK_N(aldouble, long double);
1954 if (!checksum)
1955 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
1956 else
1957 cdouble += aldouble;
92d41999
JH
1958 }
1959 break;
1960#endif
a6ec74c1
JH
1961 case 'u':
1962 /* MKS:
1963 * Initialise the decode mapping. By using a table driven
1964 * algorithm, the code will be character-set independent
1965 * (and just as fast as doing character arithmetic)
1966 */
1967 if (PL_uudmap['M'] == 0) {
1968 int i;
1969
1970 for (i = 0; i < sizeof(PL_uuemap); i += 1)
1971 PL_uudmap[(U8)PL_uuemap[i]] = i;
1972 /*
1973 * Because ' ' and '`' map to the same value,
1974 * we need to decode them both the same.
1975 */
1976 PL_uudmap[' '] = 0;
1977 }
08ca2aa3
TH
1978 {
1979 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1980 sv = sv_2mortal(NEWSV(42, l));
1981 if (l) SvPOK_on(sv);
1982 }
1983 if (utf8) {
1984 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1985 I32 a, b, c, d;
1986 char hunk[4];
1987
1988 hunk[3] = '\0';
1989 while (len > 0) {
1990 next_uni_uu(aTHX_ &s, strend, &a);
1991 next_uni_uu(aTHX_ &s, strend, &b);
1992 next_uni_uu(aTHX_ &s, strend, &c);
1993 next_uni_uu(aTHX_ &s, strend, &d);
1994 hunk[0] = (char)((a << 2) | (b >> 4));
1995 hunk[1] = (char)((b << 4) | (c >> 2));
1996 hunk[2] = (char)((c << 6) | d);
1997 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1998 len -= 3;
1999 }
2000 if (s < strend) {
2001 if (*s == '\n') s++;
2002 else {
2003 /* possible checksum byte */
2004 char *skip = s+UTF8SKIP(s);
2005 if (skip < strend && *skip == '\n') s = skip+1;
2006 }
2007 }
2008 }
2009 } else {
a6ec74c1
JH
2010 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2011 I32 a, b, c, d;
2012 char hunk[4];
2013
2014 hunk[3] = '\0';
2015 len = PL_uudmap[*(U8*)s++] & 077;
2016 while (len > 0) {
2017 if (s < strend && ISUUCHAR(*s))
2018 a = PL_uudmap[*(U8*)s++] & 077;
2019 else
2020 a = 0;
2021 if (s < strend && ISUUCHAR(*s))
2022 b = PL_uudmap[*(U8*)s++] & 077;
2023 else
2024 b = 0;
2025 if (s < strend && ISUUCHAR(*s))
2026 c = PL_uudmap[*(U8*)s++] & 077;
2027 else
2028 c = 0;
2029 if (s < strend && ISUUCHAR(*s))
2030 d = PL_uudmap[*(U8*)s++] & 077;
2031 else
2032 d = 0;
eb160463
GS
2033 hunk[0] = (char)((a << 2) | (b >> 4));
2034 hunk[1] = (char)((b << 4) | (c >> 2));
2035 hunk[2] = (char)((c << 6) | d);
a6ec74c1
JH
2036 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2037 len -= 3;
2038 }
2039 if (*s == '\n')
2040 s++;
92aa5668
JH
2041 else /* possible checksum byte */
2042 if (s + 1 < strend && s[1] == '\n')
2043 s += 2;
a6ec74c1 2044 }
08ca2aa3
TH
2045 }
2046 XPUSHs(sv);
a6ec74c1
JH
2047 break;
2048 }
49704364 2049
a6ec74c1 2050 if (checksum) {
1109a392 2051 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2052 (checksum > bits_in_uv &&
08ca2aa3
TH
2053 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2054 NV trouble, anv;
a6ec74c1 2055
08ca2aa3 2056 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2057 while (checksum >= 16) {
2058 checksum -= 16;
08ca2aa3 2059 anv *= 65536.0;
a6ec74c1 2060 }
a6ec74c1 2061 while (cdouble < 0.0)
08ca2aa3
TH
2062 cdouble += anv;
2063 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2064 sv = newSVnv(cdouble);
a6ec74c1
JH
2065 }
2066 else {
fa8ec7c1
NC
2067 if (checksum < bits_in_uv) {
2068 UV mask = ((UV)1 << checksum) - 1;
92d41999 2069 cuv &= mask;
a6ec74c1 2070 }
c4c5f44a 2071 sv = newSVuv(cuv);
a6ec74c1
JH
2072 }
2073 XPUSHs(sv_2mortal(sv));
2074 checksum = 0;
2075 }
49704364
LW
2076
2077 if (symptr->flags & FLAG_SLASH){
2078 if (SP - PL_stack_base - start_sp_offset <= 0)
2079 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2080 if( next_symbol(symptr) ){
2081 if( symptr->howlen == e_number )
2082 Perl_croak(aTHX_ "Count after length/code in unpack" );
2083 if( beyond ){
2084 /* ...end of char buffer then no decent length available */
2085 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2086 } else {
2087 /* take top of stack (hope it's numeric) */
2088 len = POPi;
2089 if( len < 0 )
2090 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2091 }
2092 } else {
2093 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2094 }
2095 datumtype = symptr->code;
21c16052 2096 explicit_length = FALSE;
49704364
LW
2097 goto redo_switch;
2098 }
a6ec74c1 2099 }
49704364 2100
18529408
IZ
2101 if (new_s)
2102 *new_s = s;
2103 PUTBACK;
2104 return SP - PL_stack_base - start_sp_offset;
2105}
2106
2107PP(pp_unpack)
2108{
2109 dSP;
bab9c0ac 2110 dPOPPOPssrl;
18529408
IZ
2111 I32 gimme = GIMME_V;
2112 STRLEN llen;
2113 STRLEN rlen;
08ca2aa3
TH
2114 char *pat = SvPV(left, llen);
2115 char *s = SvPV(right, rlen);
18529408 2116 char *strend = s + rlen;
08ca2aa3
TH
2117 char *patend = pat + llen;
2118 I32 cnt;
18529408
IZ
2119
2120 PUTBACK;
7accc089 2121 cnt = unpackstring(pat, patend, s, strend,
49704364
LW
2122 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2123 | (DO_UTF8(right) ? FLAG_UNPACK_DO_UTF8 : 0));
2124
18529408
IZ
2125 SPAGAIN;
2126 if ( !cnt && gimme == G_SCALAR )
2127 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2128 RETURN;
2129}
2130
2131STATIC void
2132S_doencodes(pTHX_ register SV *sv, register char *s, register I32 len)
2133{
2134 char hunk[5];
2135
2136 *hunk = PL_uuemap[len];
2137 sv_catpvn(sv, hunk, 1);
2138 hunk[4] = '\0';
2139 while (len > 2) {
2140 hunk[0] = PL_uuemap[(077 & (*s >> 2))];
2141 hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017)))];
2142 hunk[2] = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2143 hunk[3] = PL_uuemap[(077 & (s[2] & 077))];
2144 sv_catpvn(sv, hunk, 4);
2145 s += 3;
2146 len -= 3;
2147 }
2148 if (len > 0) {
2149 char r = (len > 1 ? s[1] : '\0');
2150 hunk[0] = PL_uuemap[(077 & (*s >> 2))];
2151 hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((r >> 4) & 017)))];
2152 hunk[2] = PL_uuemap[(077 & ((r << 2) & 074))];
2153 hunk[3] = PL_uuemap[0];
2154 sv_catpvn(sv, hunk, 4);
2155 }
2156 sv_catpvn(sv, "\n", 1);
2157}
2158
2159STATIC SV *
2160S_is_an_int(pTHX_ char *s, STRLEN l)
2161{
2162 STRLEN n_a;
2163 SV *result = newSVpvn(s, l);
2164 char *result_c = SvPV(result, n_a); /* convenience */
2165 char *out = result_c;
2166 bool skip = 1;
2167 bool ignore = 0;
2168
2169 while (*s) {
2170 switch (*s) {
2171 case ' ':
2172 break;
2173 case '+':
2174 if (!skip) {
2175 SvREFCNT_dec(result);
2176 return (NULL);
2177 }
2178 break;
2179 case '0':
2180 case '1':
2181 case '2':
2182 case '3':
2183 case '4':
2184 case '5':
2185 case '6':
2186 case '7':
2187 case '8':
2188 case '9':
2189 skip = 0;
2190 if (!ignore) {
2191 *(out++) = *s;
2192 }
2193 break;
2194 case '.':
2195 ignore = 1;
2196 break;
2197 default:
2198 SvREFCNT_dec(result);
2199 return (NULL);
2200 }
2201 s++;
2202 }
2203 *(out++) = '\0';
2204 SvCUR_set(result, out - result_c);
2205 return (result);
2206}
2207
2208/* pnum must be '\0' terminated */
2209STATIC int
2210S_div128(pTHX_ SV *pnum, bool *done)
2211{
2212 STRLEN len;
2213 char *s = SvPV(pnum, len);
2214 int m = 0;
2215 int r = 0;
2216 char *t = s;
2217
2218 *done = 1;
2219 while (*t) {
2220 int i;
2221
2222 i = m * 10 + (*t - '0');
2223 m = i & 0x7F;
2224 r = (i >> 7); /* r < 10 */
2225 if (r) {
2226 *done = 0;
2227 }
2228 *(t++) = '0' + r;
2229 }
2230 *(t++) = '\0';
2231 SvCUR_set(pnum, (STRLEN) (t - s));
2232 return (m);
2233}
2234
49704364 2235
a6ec74c1 2236
18529408
IZ
2237/*
2238=for apidoc pack_cat
2239
7accc089
JH
2240The engine implementing pack() Perl function. Note: parameters next_in_list and
2241flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2242
2243=cut */
2244
49704364 2245
18529408
IZ
2246void
2247Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2248{
49704364
LW
2249 tempsym_t sym = { 0 };
2250 sym.patptr = pat;
2251 sym.patend = patend;
7accc089
JH
2252 sym.flags = FLAG_PACK;
2253
2254 (void)pack_rec( cat, &sym, beglist, endlist );
2255}
2256
2257
2258/*
2259=for apidoc packlist
2260
2261The engine implementing pack() Perl function.
2262
2263=cut */
2264
2265
2266void
2267Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2268{
2269 tempsym_t sym = { 0 };
2270 sym.patptr = pat;
2271 sym.patend = patend;
2272 sym.flags = FLAG_PACK;
49704364
LW
2273
2274 (void)pack_rec( cat, &sym, beglist, endlist );
2275}
2276
2277
2278STATIC
2279SV **
2280S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV **endlist )
2281{
a6ec74c1
JH
2282 register I32 items;
2283 STRLEN fromlen;
49704364 2284 register I32 len = 0;
a6ec74c1
JH
2285 SV *fromstr;
2286 /*SUPPRESS 442*/
2287 static char null10[] = {0,0,0,0,0,0,0,0,0,0};
2288 static char *space10 = " ";
49704364 2289 bool found;
a6ec74c1
JH
2290
2291 /* These must not be in registers: */
2292 char achar;
ef108786
MHM
2293 I16 ai16;
2294 U16 au16;
2295 I32 ai32;
2296 U32 au32;
a6ec74c1
JH
2297#ifdef HAS_QUAD
2298 Quad_t aquad;
2299 Uquad_t auquad;
2300#endif
ef108786
MHM
2301#if SHORTSIZE != SIZE16
2302 short ashort;
2303 unsigned short aushort;
2304#endif
2305 int aint;
2306 unsigned int auint;
2307#if LONGSIZE != SIZE32
2308 long along;
2309 unsigned long aulong;
2310#endif
a6ec74c1
JH
2311 char *aptr;
2312 float afloat;
2313 double adouble;
ef108786
MHM
2314#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2315 long double aldouble;
2316#endif
2317 IV aiv;
2318 UV auv;
2319 NV anv;
2320
49704364
LW
2321 int strrelbeg = SvCUR(cat);
2322 tempsym_t lookahead;
a6ec74c1 2323
18529408 2324 items = endlist - beglist;
49704364
LW
2325 found = next_symbol( symptr );
2326
18529408 2327#ifndef PACKED_IS_OCTETS
49704364 2328 if (symptr->level == 0 && found && symptr->code == 'U' ){
18529408 2329 SvUTF8_on(cat);
49704364 2330 }
18529408 2331#endif
49704364
LW
2332
2333 while (found) {
a6ec74c1 2334 SV *lengthcode = Nullsv;
18529408 2335#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
49704364
LW
2336
2337 I32 datumtype = symptr->code;
2338 howlen_t howlen;
2339
2340 switch( howlen = symptr->howlen ){
2341 case e_no_len:
2342 case e_number:
2343 len = symptr->length;
2344 break;
2345 case e_star:
1109a392 2346 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 0 : items;
49704364
LW
2347 break;
2348 }
2349
2350 /* Look ahead for next symbol. Do we have code/code? */
2351 lookahead = *symptr;
2352 found = next_symbol(&lookahead);
2353 if ( symptr->flags & FLAG_SLASH ) {
2354 if (found){
2355 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2356 e_star != lookahead.howlen )
2357 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
2358 lengthcode = sv_2mortal(newSViv(sv_len(items > 0
18529408 2359 ? *beglist : &PL_sv_no)
49704364
LW
2360 + (lookahead.code == 'Z' ? 1 : 0)));
2361 } else {
2362 Perl_croak(aTHX_ "Code missing after '/' in pack");
2363 }
a6ec74c1 2364 }
49704364 2365
1109a392 2366 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2367 default:
1109a392 2368 Perl_croak(aTHX_ "Invalid type '%c' in pack", (int)TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2369 case '%':
49704364 2370 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2371 case '@':
49704364 2372 len += strrelbeg - SvCUR(cat);
a6ec74c1
JH
2373 if (len > 0)
2374 goto grow;
2375 len = -len;
2376 if (len > 0)
2377 goto shrink;
2378 break;
18529408
IZ
2379 case '(':
2380 {
49704364 2381 tempsym_t savsym = *symptr;
66c611c5
MHM
2382 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2383 symptr->flags |= group_modifiers;
49704364
LW
2384 symptr->patend = savsym.grpend;
2385 symptr->level++;
18529408 2386 while (len--) {
49704364
LW
2387 symptr->patptr = savsym.grpbeg;
2388 beglist = pack_rec(cat, symptr, beglist, endlist );
2389 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2390 break; /* No way to continue */
2391 }
66c611c5 2392 symptr->flags &= ~group_modifiers;
49704364
LW
2393 lookahead.flags = symptr->flags;
2394 *symptr = savsym;
18529408
IZ
2395 break;
2396 }
62f95557
IZ
2397 case 'X' | TYPE_IS_SHRIEKING:
2398 if (!len) /* Avoid division by 0 */
2399 len = 1;
2400 len = (SvCUR(cat)) % len;
2401 /* FALL THROUGH */
a6ec74c1
JH
2402 case 'X':
2403 shrink:
eb160463 2404 if ((I32)SvCUR(cat) < len)
49704364 2405 Perl_croak(aTHX_ "'X' outside of string in pack");
a6ec74c1
JH
2406 SvCUR(cat) -= len;
2407 *SvEND(cat) = '\0';
2408 break;
62f95557
IZ
2409 case 'x' | TYPE_IS_SHRIEKING:
2410 if (!len) /* Avoid division by 0 */
2411 len = 1;
2412 aint = (SvCUR(cat)) % len;
2413 if (aint) /* Other portable ways? */
2414 len = len - aint;
2415 else
2416 len = 0;
2417 /* FALL THROUGH */
49704364 2418
a6ec74c1
JH
2419 case 'x':
2420 grow:
2421 while (len >= 10) {
2422 sv_catpvn(cat, null10, 10);
2423 len -= 10;
2424 }
2425 sv_catpvn(cat, null10, len);
2426 break;
2427 case 'A':
2428 case 'Z':
2429 case 'a':
2430 fromstr = NEXTFROM;
2431 aptr = SvPV(fromstr, fromlen);
49704364 2432 if (howlen == e_star) {
a6ec74c1
JH
2433 len = fromlen;
2434 if (datumtype == 'Z')
2435 ++len;
2436 }
eb160463 2437 if ((I32)fromlen >= len) {
a6ec74c1 2438 sv_catpvn(cat, aptr, len);
f0df5f8b 2439 if (datumtype == 'Z' && len > 0)
a6ec74c1
JH
2440 *(SvEND(cat)-1) = '\0';
2441 }
2442 else {
2443 sv_catpvn(cat, aptr, fromlen);
2444 len -= fromlen;
2445 if (datumtype == 'A') {
2446 while (len >= 10) {
2447 sv_catpvn(cat, space10, 10);
2448 len -= 10;
2449 }
2450 sv_catpvn(cat, space10, len);
2451 }
2452 else {
2453 while (len >= 10) {
2454 sv_catpvn(cat, null10, 10);
2455 len -= 10;
2456 }
2457 sv_catpvn(cat, null10, len);
2458 }
2459 }
2460 break;
2461 case 'B':
2462 case 'b':
2463 {
2464 register char *str;
2465 I32 saveitems;
2466
2467 fromstr = NEXTFROM;
2468 saveitems = items;
2469 str = SvPV(fromstr, fromlen);
49704364 2470 if (howlen == e_star)
a6ec74c1
JH
2471 len = fromlen;
2472 aint = SvCUR(cat);
2473 SvCUR(cat) += (len+7)/8;
2474 SvGROW(cat, SvCUR(cat) + 1);
2475 aptr = SvPVX(cat) + aint;
eb160463 2476 if (len > (I32)fromlen)
a6ec74c1
JH
2477 len = fromlen;
2478 aint = len;
2479 items = 0;
2480 if (datumtype == 'B') {
2481 for (len = 0; len++ < aint;) {
2482 items |= *str++ & 1;
2483 if (len & 7)
2484 items <<= 1;
2485 else {
2486 *aptr++ = items & 0xff;
2487 items = 0;
2488 }
2489 }
2490 }
2491 else {
2492 for (len = 0; len++ < aint;) {
2493 if (*str++ & 1)
2494 items |= 128;
2495 if (len & 7)
2496 items >>= 1;
2497 else {
2498 *aptr++ = items & 0xff;
2499 items = 0;
2500 }
2501 }
2502 }
2503 if (aint & 7) {
2504 if (datumtype == 'B')
2505 items <<= 7 - (aint & 7);
2506 else
2507 items >>= 7 - (aint & 7);
2508 *aptr++ = items & 0xff;
2509 }
2510 str = SvPVX(cat) + SvCUR(cat);
2511 while (aptr <= str)
2512 *aptr++ = '\0';
2513
2514 items = saveitems;
2515 }
2516 break;
2517 case 'H':
2518 case 'h':
2519 {
2520 register char *str;
2521 I32 saveitems;
2522
2523 fromstr = NEXTFROM;
2524 saveitems = items;
2525 str = SvPV(fromstr, fromlen);
49704364 2526 if (howlen == e_star)
a6ec74c1
JH
2527 len = fromlen;
2528 aint = SvCUR(cat);
2529 SvCUR(cat) += (len+1)/2;
2530 SvGROW(cat, SvCUR(cat) + 1);
2531 aptr = SvPVX(cat) + aint;
eb160463 2532 if (len > (I32)fromlen)
a6ec74c1
JH
2533 len = fromlen;
2534 aint = len;
2535 items = 0;
2536 if (datumtype == 'H') {
2537 for (len = 0; len++ < aint;) {
2538 if (isALPHA(*str))
2539 items |= ((*str++ & 15) + 9) & 15;
2540 else
2541 items |= *str++ & 15;
2542 if (len & 1)
2543 items <<= 4;
2544 else {
2545 *aptr++ = items & 0xff;
2546 items = 0;
2547 }
2548 }
2549 }
2550 else {
2551 for (len = 0; len++ < aint;) {
2552 if (isALPHA(*str))
2553 items |= (((*str++ & 15) + 9) & 15) << 4;
2554 else
2555 items |= (*str++ & 15) << 4;
2556 if (len & 1)
2557 items >>= 4;
2558 else {
2559 *aptr++ = items & 0xff;
2560 items = 0;
2561 }
2562 }
2563 }
2564 if (aint & 1)
2565 *aptr++ = items & 0xff;
2566 str = SvPVX(cat) + SvCUR(cat);
2567 while (aptr <= str)
2568 *aptr++ = '\0';
2569
2570 items = saveitems;
2571 }
2572 break;
2573 case 'C':
2574 case 'c':
2575 while (len-- > 0) {
2576 fromstr = NEXTFROM;
1109a392 2577 switch (TYPE_NO_MODIFIERS(datumtype)) {
a6ec74c1
JH
2578 case 'C':
2579 aint = SvIV(fromstr);
2580 if ((aint < 0 || aint > 255) &&
2581 ckWARN(WARN_PACK))
9014280d 2582 Perl_warner(aTHX_ packWARN(WARN_PACK),
49704364 2583 "Character in 'C' format wrapped in pack");
a6ec74c1
JH
2584 achar = aint & 255;
2585 sv_catpvn(cat, &achar, sizeof(char));
2586 break;
2587 case 'c':
2588 aint = SvIV(fromstr);
2589 if ((aint < -128 || aint > 127) &&
2590 ckWARN(WARN_PACK))
9014280d 2591 Perl_warner(aTHX_ packWARN(WARN_PACK),
49704364 2592 "Character in 'c' format wrapped in pack" );
a6ec74c1
JH
2593 achar = aint & 255;
2594 sv_catpvn(cat, &achar, sizeof(char));
2595 break;
2596 }
2597 }
2598 break;
2599 case 'U':
2600 while (len-- > 0) {
2601 fromstr = NEXTFROM;
e87322b2 2602 auint = UNI_TO_NATIVE(SvUV(fromstr));
89ebb4a3 2603 SvGROW(cat, SvCUR(cat) + UTF8_MAXBYTES + 1);
52ea3e69
JH
2604 SvCUR_set(cat,
2605 (char*)uvchr_to_utf8_flags((U8*)SvEND(cat),
2606 auint,
2607 ckWARN(WARN_UTF8) ?
2608 0 : UNICODE_ALLOW_ANY)
2609 - SvPVX(cat));
a6ec74c1
JH
2610 }
2611 *SvEND(cat) = '\0';
2612 break;
2613 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2614 case 'f':
a6ec74c1
JH
2615 while (len-- > 0) {
2616 fromstr = NEXTFROM;
5cdb9e01
PG
2617#ifdef __VOS__
2618/* VOS does not automatically map a floating-point overflow
2619 during conversion from double to float into infinity, so we
2620 do it by hand. This code should either be generalized for
2621 any OS that needs it, or removed if and when VOS implements
2622 posix-976 (suggestion to support mapping to infinity).
2623 Paul.Green@stratus.com 02-04-02. */
2624 if (SvNV(fromstr) > FLT_MAX)
2625 afloat = _float_constants[0]; /* single prec. inf. */
2626 else if (SvNV(fromstr) < -FLT_MAX)
2627 afloat = _float_constants[0]; /* single prec. inf. */
2628 else afloat = (float)SvNV(fromstr);
2629#else
baf3cf9c
CB
2630# if defined(VMS) && !defined(__IEEE_FP)
2631/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2632 * on Alpha; fake it if we don't have them.
2633 */
2634 if (SvNV(fromstr) > FLT_MAX)
2635 afloat = FLT_MAX;
2636 else if (SvNV(fromstr) < -FLT_MAX)
2637 afloat = -FLT_MAX;
2638 else afloat = (float)SvNV(fromstr);
2639# else
a6ec74c1 2640 afloat = (float)SvNV(fromstr);
baf3cf9c 2641# endif
5cdb9e01 2642#endif
1109a392 2643 DO_BO_PACK_N(afloat, float);
a6ec74c1
JH
2644 sv_catpvn(cat, (char *)&afloat, sizeof (float));
2645 }
2646 break;
2647 case 'd':
a6ec74c1
JH
2648 while (len-- > 0) {
2649 fromstr = NEXTFROM;
5cdb9e01
PG
2650#ifdef __VOS__
2651/* VOS does not automatically map a floating-point overflow
2652 during conversion from long double to double into infinity,
2653 so we do it by hand. This code should either be generalized
2654 for any OS that needs it, or removed if and when VOS
2655 implements posix-976 (suggestion to support mapping to
2656 infinity). Paul.Green@stratus.com 02-04-02. */
2657 if (SvNV(fromstr) > DBL_MAX)
2658 adouble = _double_constants[0]; /* double prec. inf. */
2659 else if (SvNV(fromstr) < -DBL_MAX)
2660 adouble = _double_constants[0]; /* double prec. inf. */
2661 else adouble = (double)SvNV(fromstr);
2662#else
baf3cf9c
CB
2663# if defined(VMS) && !defined(__IEEE_FP)
2664/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2665 * on Alpha; fake it if we don't have them.
2666 */
2667 if (SvNV(fromstr) > DBL_MAX)
2668 adouble = DBL_MAX;
2669 else if (SvNV(fromstr) < -DBL_MAX)
2670 adouble = -DBL_MAX;
2671 else adouble = (double)SvNV(fromstr);
2672# else
a6ec74c1 2673 adouble = (double)SvNV(fromstr);
baf3cf9c 2674# endif
5cdb9e01 2675#endif
1109a392 2676 DO_BO_PACK_N(adouble, double);
a6ec74c1
JH
2677 sv_catpvn(cat, (char *)&adouble, sizeof (double));
2678 }
2679 break;
92d41999 2680 case 'F':
1109a392 2681 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
2682 while (len-- > 0) {
2683 fromstr = NEXTFROM;
2684 anv = SvNV(fromstr);
1109a392 2685 DO_BO_PACK_N(anv, NV);
92d41999
JH
2686 sv_catpvn(cat, (char *)&anv, NVSIZE);
2687 }
2688 break;
2689#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2690 case 'D':
1109a392
MHM
2691 /* long doubles can have unused bits, which may be nonzero */
2692 Zero(&aldouble, 1, long double);
92d41999
JH
2693 while (len-- > 0) {
2694 fromstr = NEXTFROM;
2695 aldouble = (long double)SvNV(fromstr);
1109a392 2696 DO_BO_PACK_N(aldouble, long double);
92d41999
JH
2697 sv_catpvn(cat, (char *)&aldouble, LONG_DOUBLESIZE);
2698 }
2699 break;
2700#endif
7212898e 2701#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2702 case 'n' | TYPE_IS_SHRIEKING:
7212898e 2703#endif
a6ec74c1
JH
2704 case 'n':
2705 while (len-- > 0) {
2706 fromstr = NEXTFROM;
ef108786 2707 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2708#ifdef HAS_HTONS
ef108786 2709 ai16 = PerlSock_htons(ai16);
a6ec74c1 2710#endif
ef108786 2711 CAT16(cat, &ai16);
a6ec74c1
JH
2712 }
2713 break;
7212898e 2714#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2715 case 'v' | TYPE_IS_SHRIEKING:
7212898e 2716#endif
a6ec74c1
JH
2717 case 'v':
2718 while (len-- > 0) {
2719 fromstr = NEXTFROM;
ef108786 2720 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2721#ifdef HAS_HTOVS
ef108786 2722 ai16 = htovs(ai16);
a6ec74c1 2723#endif
ef108786 2724 CAT16(cat, &ai16);
a6ec74c1
JH
2725 }
2726 break;
49704364 2727 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2728#if SHORTSIZE != SIZE16
49704364 2729 {
a6ec74c1
JH
2730 while (len-- > 0) {
2731 fromstr = NEXTFROM;
2732 aushort = SvUV(fromstr);
1109a392 2733 DO_BO_PACK(aushort, s);
a6ec74c1
JH
2734 sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short));
2735 }
49704364
LW
2736 }
2737 break;
2738#else
2739 /* Fall through! */
a6ec74c1 2740#endif
49704364 2741 case 'S':
a6ec74c1 2742 {
a6ec74c1
JH
2743 while (len-- > 0) {
2744 fromstr = NEXTFROM;
ef108786
MHM
2745 au16 = (U16)SvUV(fromstr);
2746 DO_BO_PACK(au16, 16);
2747 CAT16(cat, &au16);
a6ec74c1
JH
2748 }
2749
2750 }
2751 break;
49704364 2752 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2753#if SHORTSIZE != SIZE16
49704364 2754 {
a6ec74c1
JH
2755 while (len-- > 0) {
2756 fromstr = NEXTFROM;
2757 ashort = SvIV(fromstr);
1109a392 2758 DO_BO_PACK(ashort, s);
a6ec74c1
JH
2759 sv_catpvn(cat, (char *)&ashort, sizeof(short));
2760 }
2761 }
49704364
LW
2762 break;
2763#else
2764 /* Fall through! */
a6ec74c1 2765#endif
49704364
LW
2766 case 's':
2767 while (len-- > 0) {
2768 fromstr = NEXTFROM;
ef108786
MHM
2769 ai16 = (I16)SvIV(fromstr);
2770 DO_BO_PACK(ai16, 16);
2771 CAT16(cat, &ai16);
a6ec74c1
JH
2772 }
2773 break;
2774 case 'I':
49704364 2775 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2776 while (len-- > 0) {
2777 fromstr = NEXTFROM;
2778 auint = SvUV(fromstr);
1109a392 2779 DO_BO_PACK(auint, i);
a6ec74c1
JH
2780 sv_catpvn(cat, (char*)&auint, sizeof(unsigned int));
2781 }
2782 break;
92d41999
JH
2783 case 'j':
2784 while (len-- > 0) {
2785 fromstr = NEXTFROM;
2786 aiv = SvIV(fromstr);
1109a392
MHM
2787#if IVSIZE == INTSIZE
2788 DO_BO_PACK(aiv, i);
2789#elif IVSIZE == LONGSIZE
2790 DO_BO_PACK(aiv, l);
2791#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2792 DO_BO_PACK(aiv, 64);
2793#endif
92d41999
JH
2794 sv_catpvn(cat, (char*)&aiv, IVSIZE);
2795 }
2796 break;
2797 case 'J':
2798 while (len-- > 0) {
2799 fromstr = NEXTFROM;
2800 auv = SvUV(fromstr);
1109a392
MHM
2801#if UVSIZE == INTSIZE
2802 DO_BO_PACK(auv, i);
2803#elif UVSIZE == LONGSIZE
2804 DO_BO_PACK(auv, l);
2805#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2806 DO_BO_PACK(auv, 64);
2807#endif
92d41999
JH
2808 sv_catpvn(cat, (char*)&auv, UVSIZE);
2809 }
2810 break;
a6ec74c1
JH
2811 case 'w':
2812 while (len-- > 0) {
2813 fromstr = NEXTFROM;
15e9f109 2814 anv = SvNV(fromstr);
a6ec74c1 2815
15e9f109 2816 if (anv < 0)
49704364 2817 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
a6ec74c1 2818
196b62db
NC
2819 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2820 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2821 any negative IVs will have already been got by the croak()
2822 above. IOK is untrue for fractions, so we test them
2823 against UV_MAX_P1. */
15e9f109 2824 if (SvIOK(fromstr) || anv < UV_MAX_P1)
a6ec74c1 2825 {
7c1b502b 2826 char buf[(sizeof(UV)*8)/7+1];
a6ec74c1 2827 char *in = buf + sizeof(buf);
196b62db 2828 UV auv = SvUV(fromstr);
a6ec74c1
JH
2829
2830 do {
eb160463 2831 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
2832 auv >>= 7;
2833 } while (auv);
2834 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2835 sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
2836 }
2837 else if (SvPOKp(fromstr)) { /* decimal string arithmetics */
2838 char *from, *result, *in;
2839 SV *norm;
2840 STRLEN len;
2841 bool done;
2842
2843 /* Copy string and check for compliance */
2844 from = SvPV(fromstr, len);
2845 if ((norm = is_an_int(from, len)) == NULL)
49704364 2846 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
a6ec74c1
JH
2847
2848 New('w', result, len, char);
2849 in = result + len;
2850 done = FALSE;
2851 while (!done)
2852 *--in = div128(norm, &done) | 0x80;
2853 result[len - 1] &= 0x7F; /* clear continue bit */
2854 sv_catpvn(cat, in, (result + len) - in);
2855 Safefree(result);
2856 SvREFCNT_dec(norm); /* free norm */
2857 }
2858 else if (SvNOKp(fromstr)) {
0258719b
NC
2859 /* 10**NV_MAX_10_EXP is the largest power of 10
2860 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
2861 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2862 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2863 And with that many bytes only Inf can overflow.
8f8d40ab
PG
2864 Some C compilers are strict about integral constant
2865 expressions so we conservatively divide by a slightly
2866 smaller integer instead of multiplying by the exact
2867 floating-point value.
0258719b
NC
2868 */
2869#ifdef NV_MAX_10_EXP
8f8d40ab
PG
2870/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2871 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2872#else
8f8d40ab
PG
2873/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2874 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2875#endif
a6ec74c1
JH
2876 char *in = buf + sizeof(buf);
2877
15e9f109 2878 anv = Perl_floor(anv);
a6ec74c1 2879 do {
15e9f109 2880 NV next = Perl_floor(anv / 128);
a6ec74c1 2881 if (in <= buf) /* this cannot happen ;-) */
49704364 2882 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2883 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
2884 anv = next;
2885 } while (anv > 0);
a6ec74c1
JH
2886 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2887 sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
2888 }
735b914b
JH
2889 else {
2890 char *from, *result, *in;
2891 SV *norm;
2892 STRLEN len;
2893 bool done;
2894
2895 /* Copy string and check for compliance */
2896 from = SvPV(fromstr, len);
2897 if ((norm = is_an_int(from, len)) == NULL)
49704364 2898 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
2899
2900 New('w', result, len, char);
2901 in = result + len;
2902 done = FALSE;
2903 while (!done)
2904 *--in = div128(norm, &done) | 0x80;
2905 result[len - 1] &= 0x7F; /* clear continue bit */
2906 sv_catpvn(cat, in, (result + len) - in);
2907 Safefree(result);
2908 SvREFCNT_dec(norm); /* free norm */
2909 }
a6ec74c1
JH
2910 }
2911 break;
2912 case 'i':
49704364 2913 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1
JH
2914 while (len-- > 0) {
2915 fromstr = NEXTFROM;
2916 aint = SvIV(fromstr);
1109a392 2917 DO_BO_PACK(aint, i);
a6ec74c1
JH
2918 sv_catpvn(cat, (char*)&aint, sizeof(int));
2919 }
2920 break;
7212898e 2921#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2922 case 'N' | TYPE_IS_SHRIEKING:
7212898e 2923#endif
a6ec74c1
JH
2924 case 'N':
2925 while (len-- > 0) {
2926 fromstr = NEXTFROM;
ef108786 2927 au32 = SvUV(fromstr);
a6ec74c1 2928#ifdef HAS_HTONL
ef108786 2929 au32 = PerlSock_htonl(au32);
a6ec74c1 2930#endif
ef108786 2931 CAT32(cat, &au32);
a6ec74c1
JH
2932 }
2933 break;
7212898e 2934#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2935 case 'V' | TYPE_IS_SHRIEKING:
7212898e 2936#endif
a6ec74c1
JH
2937 case 'V':
2938 while (len-- > 0) {
2939 fromstr = NEXTFROM;
ef108786 2940 au32 = SvUV(fromstr);
a6ec74c1 2941#ifdef HAS_HTOVL
ef108786 2942 au32 = htovl(au32);
a6ec74c1 2943#endif
ef108786 2944 CAT32(cat, &au32);
a6ec74c1
JH
2945 }
2946 break;
49704364 2947 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2948#if LONGSIZE != SIZE32
49704364 2949 {
a6ec74c1
JH
2950 while (len-- > 0) {
2951 fromstr = NEXTFROM;
2952 aulong = SvUV(fromstr);
1109a392 2953 DO_BO_PACK(aulong, l);
a6ec74c1
JH
2954 sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long));
2955 }
2956 }
49704364
LW
2957 break;
2958#else
2959 /* Fall though! */
a6ec74c1 2960#endif
49704364 2961 case 'L':
a6ec74c1
JH
2962 {
2963 while (len-- > 0) {
2964 fromstr = NEXTFROM;
ef108786
MHM
2965 au32 = SvUV(fromstr);
2966 DO_BO_PACK(au32, 32);
2967 CAT32(cat, &au32);
a6ec74c1
JH
2968 }
2969 }
2970 break;
49704364 2971 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2972#if LONGSIZE != SIZE32
49704364 2973 {
a6ec74c1
JH
2974 while (len-- > 0) {
2975 fromstr = NEXTFROM;
2976 along = SvIV(fromstr);
1109a392 2977 DO_BO_PACK(along, l);
a6ec74c1
JH
2978 sv_catpvn(cat, (char *)&along, sizeof(long));
2979 }
2980 }
49704364
LW
2981 break;
2982#else
2983 /* Fall though! */
a6ec74c1 2984#endif
49704364
LW
2985 case 'l':
2986 while (len-- > 0) {
2987 fromstr = NEXTFROM;
ef108786
MHM
2988 ai32 = SvIV(fromstr);
2989 DO_BO_PACK(ai32, 32);
2990 CAT32(cat, &ai32);
a6ec74c1
JH
2991 }
2992 break;
2993#ifdef HAS_QUAD
2994 case 'Q':
2995 while (len-- > 0) {
2996 fromstr = NEXTFROM;
2997 auquad = (Uquad_t)SvUV(fromstr);
1109a392 2998 DO_BO_PACK(auquad, 64);
a6ec74c1
JH
2999 sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t));
3000 }
3001 break;
3002 case 'q':
3003 while (len-- > 0) {
3004 fromstr = NEXTFROM;
3005 aquad = (Quad_t)SvIV(fromstr);
1109a392 3006 DO_BO_PACK(aquad, 64);
a6ec74c1
JH
3007 sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t));
3008 }
3009 break;
3010#endif
3011 case 'P':
3012 len = 1; /* assume SV is correct length */
49704364 3013 /* Fall through! */
a6ec74c1
JH
3014 case 'p':
3015 while (len-- > 0) {
3016 fromstr = NEXTFROM;
28a4f200 3017 SvGETMAGIC(fromstr);
3018 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3019 else {
3020 STRLEN n_a;
3021 /* XXX better yet, could spirit away the string to
3022 * a safe spot and hang on to it until the result
3023 * of pack() (and all copies of the result) are
3024 * gone.
3025 */
3026 if (ckWARN(WARN_PACK) && (SvTEMP(fromstr)
3027 || (SvPADTMP(fromstr)
3028 && !SvREADONLY(fromstr))))
3029 {
9014280d 3030 Perl_warner(aTHX_ packWARN(WARN_PACK),
a6ec74c1
JH
3031 "Attempt to pack pointer to temporary value");
3032 }
3033 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3034 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3035 else
28a4f200 3036 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3037 }
1109a392 3038 DO_BO_PACK_P(aptr);
a6ec74c1
JH
3039 sv_catpvn(cat, (char*)&aptr, sizeof(char*));
3040 }
3041 break;
3042 case 'u':
3043 fromstr = NEXTFROM;
3044 aptr = SvPV(fromstr, fromlen);
3045 SvGROW(cat, fromlen * 4 / 3);
19c9db5e 3046 if (len <= 2)
a6ec74c1
JH
3047 len = 45;
3048 else
3049 len = len / 3 * 3;
3050 while (fromlen > 0) {
3051 I32 todo;
3052
eb160463 3053 if ((I32)fromlen > len)
a6ec74c1
JH
3054 todo = len;
3055 else
3056 todo = fromlen;
3057 doencodes(cat, aptr, todo);
3058 fromlen -= todo;
3059 aptr += todo;
3060 }
3061 break;
3062 }
49704364 3063 *symptr = lookahead;
a6ec74c1 3064 }
49704364 3065 return beglist;
18529408
IZ
3066}
3067#undef NEXTFROM
3068
3069
3070PP(pp_pack)
3071{
3072 dSP; dMARK; dORIGMARK; dTARGET;
3073 register SV *cat = TARG;
3074 STRLEN fromlen;
3075 register char *pat = SvPVx(*++MARK, fromlen);
3076 register char *patend = pat + fromlen;
3077
3078 MARK++;
3079 sv_setpvn(cat, "", 0);
3080
7accc089 3081 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3082
a6ec74c1
JH
3083 SvSETMAGIC(cat);
3084 SP = ORIGMARK;
3085 PUSHs(cat);
3086 RETURN;
3087}
a6ec74c1 3088
73cb7263
NC
3089/*
3090 * Local variables:
3091 * c-indentation-style: bsd
3092 * c-basic-offset: 4
3093 * indent-tabs-mode: t
3094 * End:
3095 *
edf815fd 3096 * vim: shiftwidth=4:
73cb7263 3097*/